From: Alistair Leslie-Hughes leslie_alistair@hotmail.com
Stops a linker error of duplicate name when using upstream vkd3d. --- dlls/wined3d/shader.c | 2 +- dlls/wined3d/shader_sm4.c | 2 +- dlls/wined3d/wined3d_private.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c index b4df79508ec..2728929045f 100644 --- a/dlls/wined3d/shader.c +++ b/dlls/wined3d/shader.c @@ -2481,7 +2481,7 @@ static HRESULT shader_init(struct wined3d_shader *shader, struct wined3d_device shader->byte_code_size = desc->byte_code_size;
max_version = shader_max_version_from_feature_level(device->cs->c.state->feature_level); - if (FAILED(hr = shader_extract_from_dxbc(shader, max_version, &source_type))) + if (FAILED(hr = wined3d_shader_extract_from_dxbc(shader, max_version, &source_type))) goto fail;
if (!(shader->frontend = shader_select_frontend(source_type))) diff --git a/dlls/wined3d/shader_sm4.c b/dlls/wined3d/shader_sm4.c index 78abdbe81ac..74391cc1be8 100644 --- a/dlls/wined3d/shader_sm4.c +++ b/dlls/wined3d/shader_sm4.c @@ -2077,7 +2077,7 @@ static HRESULT shader_dxbc_process_section(struct wined3d_shader *shader, unsign return S_OK; }
-HRESULT shader_extract_from_dxbc(struct wined3d_shader *shader, +HRESULT wined3d_shader_extract_from_dxbc(struct wined3d_shader *shader, unsigned int max_shader_version, enum vkd3d_shader_source_type *source_type) { const struct vkd3d_shader_code dxbc = {.code = shader->byte_code, .size = shader->byte_code_size}; diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index a586d6847bf..001534e8c38 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -1466,7 +1466,7 @@ struct wined3d_shader_frontend extern const struct wined3d_shader_frontend sm1_shader_frontend DECLSPEC_HIDDEN; extern const struct wined3d_shader_frontend sm4_shader_frontend DECLSPEC_HIDDEN;
-HRESULT shader_extract_from_dxbc(struct wined3d_shader *shader, +HRESULT wined3d_shader_extract_from_dxbc(struct wined3d_shader *shader, unsigned int max_shader_version, enum vkd3d_shader_source_type *source_type) DECLSPEC_HIDDEN; BOOL shader_get_stream_output_register_info(const struct wined3d_shader *shader, const struct wined3d_stream_output_element *so_element, unsigned int *register_idx,
From: Alexandre Julliard julliard@winehq.org
--- dlls/d3dcompiler_43/tests/blob.c | 2 +- dlls/d3dcompiler_43/tests/hlsl_d3d9.c | 8 +- libs/vkd3d/AUTHORS | 2 + libs/vkd3d/Makefile.in | 6 +- libs/vkd3d/config.h | 4 +- libs/vkd3d/include/private/vkd3d_common.h | 1 + libs/vkd3d/include/private/vkd3d_debug.h | 2 +- libs/vkd3d/include/vkd3d.h | 1 + libs/vkd3d/include/vkd3d_shader.h | 1 + libs/vkd3d/libs/vkd3d-common/debug.c | 4 +- .../libs/vkd3d-shader/{trace.c => d3d_asm.c} | 25 +- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1131 +++- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 1773 +----- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 619 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 233 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 8 + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 2362 +++++--- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1821 ++++-- .../libs/vkd3d-shader/hlsl_constant_ops.c | 358 +- libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c | 980 --- libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c | 2531 -------- libs/vkd3d/libs/vkd3d-shader/ir.c | 1072 ++++ libs/vkd3d/libs/vkd3d-shader/preproc.l | 6 +- libs/vkd3d/libs/vkd3d-shader/sm4.h | 552 -- libs/vkd3d/libs/vkd3d-shader/spirv.c | 1323 ++--- libs/vkd3d/libs/vkd3d-shader/tpf.c | 5234 +++++++++++++++++ .../libs/vkd3d-shader/vkd3d_shader_main.c | 239 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 89 +- libs/vkd3d/libs/vkd3d/command.c | 160 +- libs/vkd3d/libs/vkd3d/device.c | 205 +- libs/vkd3d/libs/vkd3d/resource.c | 935 ++- libs/vkd3d/libs/vkd3d/state.c | 4 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 234 +- 33 files changed, 12569 insertions(+), 9356 deletions(-) rename libs/vkd3d/libs/vkd3d-shader/{trace.c => d3d_asm.c} (98%) delete mode 100644 libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c delete mode 100644 libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c create mode 100644 libs/vkd3d/libs/vkd3d-shader/ir.c delete mode 100644 libs/vkd3d/libs/vkd3d-shader/sm4.h create mode 100644 libs/vkd3d/libs/vkd3d-shader/tpf.c
diff --git a/dlls/d3dcompiler_43/tests/blob.c b/dlls/d3dcompiler_43/tests/blob.c index 8b2ccc667b2..56432a9a81d 100644 --- a/dlls/d3dcompiler_43/tests/blob.c +++ b/dlls/d3dcompiler_43/tests/blob.c @@ -764,7 +764,7 @@ static void test_get_blob_part2(void) ok(hr == S_OK, "Got unexpected hr %#lx.\n", hr);
size = ID3D10Blob_GetBufferSize(blob); - todo_wine ok(size == 4735, "Got unexpected size %Iu.\n", size); + ok(size == 4735, "Got unexpected size %Iu.\n", size);
dword = ((DWORD*)ID3D10Blob_GetBufferPointer(blob)); ok(TAG_DXBC == *dword, "DXBC got %#lx, expected %#lx.\n", *dword, TAG_DXBC); diff --git a/dlls/d3dcompiler_43/tests/hlsl_d3d9.c b/dlls/d3dcompiler_43/tests/hlsl_d3d9.c index f99aba87753..3eddc62a2b1 100644 --- a/dlls/d3dcompiler_43/tests/hlsl_d3d9.c +++ b/dlls/d3dcompiler_43/tests/hlsl_d3d9.c @@ -574,7 +574,7 @@ static void test_conditionals(void) release_readback(&rb); ID3D10Blob_Release(ps_code);
- todo_wine ps_code = compile_shader(ps_ternary_source, "ps_2_0"); + ps_code = compile_shader(ps_ternary_source, "ps_2_0"); if (ps_code) { draw_quad(device, ps_code); @@ -583,14 +583,14 @@ static void test_conditionals(void) for (i = 0; i < 320; i += 40) { v = get_readback_vec4(&rb, i, 0); - ok(compare_vec4(v, 0.5f, 0.25f, 0.5f, 0.75f, 0), + todo_wine ok(compare_vec4(v, 0.5f, 0.25f, 0.5f, 0.75f, 0), "Got unexpected value {%.8e, %.8e, %.8e, %.8e}.\n", v->x, v->y, v->z, v->w); }
for (i = 360; i < 640; i += 40) { v = get_readback_vec4(&rb, i, 0); - ok(compare_vec4(v, 0.6f, 0.8f, 0.1f, 0.2f, 0), + todo_wine ok(compare_vec4(v, 0.6f, 0.8f, 0.1f, 0.2f, 0), "Got unexpected value {%.8e, %.8e, %.8e, %.8e}.\n", v->x, v->y, v->z, v->w); }
@@ -1109,7 +1109,7 @@ static void test_samplers(void) draw_quad(test_context.device, ps_code);
v = get_color_vec4(test_context.device, 0, 0); - todo_wine ok(compare_vec4(&v, 0.25f, 0.0f, 0.25f, 0.0f, 128), + ok(compare_vec4(&v, 0.25f, 0.0f, 0.25f, 0.0f, 128), "Test %u: Got unexpected value {%.8e, %.8e, %.8e, %.8e}.\n", i, v.x, v.y, v.z, v.w);
ID3D10Blob_Release(ps_code); diff --git a/libs/vkd3d/AUTHORS b/libs/vkd3d/AUTHORS index 622aecbb078..6d4f0e0617e 100644 --- a/libs/vkd3d/AUTHORS +++ b/libs/vkd3d/AUTHORS @@ -8,6 +8,7 @@ Chip Davis Conor McCarthy David Gow Derek Lesho +Ethan Lee Fabian Maurer Francisco Casas Francois Gouget @@ -16,6 +17,7 @@ Hans-Kristian Arntzen Henri Verbeet Isabella Bosia Jactry Zeng +Jan Sikorski Joshua Ashton Józef Kucia Martin Storsjö diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in index 0ed4e27ad83..1ba0e9f71e1 100644 --- a/libs/vkd3d/Makefile.in +++ b/libs/vkd3d/Makefile.in @@ -14,6 +14,7 @@ SOURCES = \ libs/vkd3d-common/memory.c \ libs/vkd3d-common/utf8.c \ libs/vkd3d-shader/checksum.c \ + libs/vkd3d-shader/d3d_asm.c \ libs/vkd3d-shader/d3dbc.c \ libs/vkd3d-shader/dxbc.c \ libs/vkd3d-shader/glsl.c \ @@ -22,12 +23,11 @@ SOURCES = \ libs/vkd3d-shader/hlsl.y \ libs/vkd3d-shader/hlsl_codegen.c \ libs/vkd3d-shader/hlsl_constant_ops.c \ - libs/vkd3d-shader/hlsl_sm1.c \ - libs/vkd3d-shader/hlsl_sm4.c \ + libs/vkd3d-shader/ir.c \ libs/vkd3d-shader/preproc.l \ libs/vkd3d-shader/preproc.y \ libs/vkd3d-shader/spirv.c \ - libs/vkd3d-shader/trace.c \ + libs/vkd3d-shader/tpf.c \ libs/vkd3d-shader/vkd3d_shader_main.c \ libs/vkd3d/command.c \ libs/vkd3d/device.c \ diff --git a/libs/vkd3d/config.h b/libs/vkd3d/config.h index 6d2eeba235a..8c5aa958dde 100644 --- a/libs/vkd3d/config.h +++ b/libs/vkd3d/config.h @@ -1,5 +1,5 @@ #define PACKAGE_NAME "vkd3d" -#define PACKAGE_STRING "vkd3d 1.7" -#define PACKAGE_VERSION "1.7" +#define PACKAGE_STRING "vkd3d 1.8" +#define PACKAGE_VERSION "1.8" #define PATH_MAX 1024 #define SONAME_LIBVULKAN "vulkan-1.dll" diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index 3cf0422596c..08dde1b2e7f 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -249,6 +249,7 @@ static inline LONG InterlockedDecrement(LONG volatile *x) # else # error "InterlockedDecrement() not implemented for this platform" # endif + #endif /* _WIN32 */
static inline void vkd3d_parse_version(const char *version, int *major, int *minor) diff --git a/libs/vkd3d/include/private/vkd3d_debug.h b/libs/vkd3d/include/private/vkd3d_debug.h index 4f6d43af12f..6708cad344f 100644 --- a/libs/vkd3d/include/private/vkd3d_debug.h +++ b/libs/vkd3d/include/private/vkd3d_debug.h @@ -91,7 +91,7 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size);
#define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN)
-#define VKD3D_DEBUG_ENV_NAME(name) const char *vkd3d_dbg_env_name = name +#define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name
static inline const char *debugstr_guid(const GUID *guid) { diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h index ff2b15c51dc..72ed3ced671 100644 --- a/libs/vkd3d/include/vkd3d.h +++ b/libs/vkd3d/include/vkd3d.h @@ -76,6 +76,7 @@ enum vkd3d_api_version VKD3D_API_VERSION_1_5, VKD3D_API_VERSION_1_6, VKD3D_API_VERSION_1_7, + VKD3D_API_VERSION_1_8,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_API_VERSION), }; diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index 859b8c79792..274241546ea 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -49,6 +49,7 @@ enum vkd3d_shader_api_version VKD3D_SHADER_API_VERSION_1_5, VKD3D_SHADER_API_VERSION_1_6, VKD3D_SHADER_API_VERSION_1_7, + VKD3D_SHADER_API_VERSION_1_8,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_API_VERSION), }; diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c index 499334a35f1..b363efbd360 100644 --- a/libs/vkd3d/libs/vkd3d-common/debug.c +++ b/libs/vkd3d/libs/vkd3d-common/debug.c @@ -40,9 +40,9 @@ #define VKD3D_DEBUG_BUFFER_COUNT 64 #define VKD3D_DEBUG_BUFFER_SIZE 512
-extern const char *vkd3d_dbg_env_name; +extern const char *const vkd3d_dbg_env_name;
-static const char *debug_level_names[] = +static const char *const debug_level_names[] = { /* VKD3D_DBG_LEVEL_NONE */ "none", /* VKD3D_DBG_LEVEL_ERR */ "err", diff --git a/libs/vkd3d/libs/vkd3d-shader/trace.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c similarity index 98% rename from libs/vkd3d/libs/vkd3d-shader/trace.c rename to libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 6cd2dcb270c..0a821b5c878 100644 --- a/libs/vkd3d/libs/vkd3d-shader/trace.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -109,6 +109,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_DEQ ] = "deq", [VKD3DSIH_DFMA ] = "dfma", [VKD3DSIH_DGE ] = "dge", + [VKD3DSIH_DISCARD ] = "discard", [VKD3DSIH_DIV ] = "div", [VKD3DSIH_DLT ] = "dlt", [VKD3DSIH_DMAX ] = "dmax", @@ -645,7 +646,7 @@ static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, break;
case VKD3D_SHADER_RESOURCE_TEXTURE_3D: - shader_addline(buffer, "_3d"); + shader_addline(buffer, "_volume"); break;
case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: @@ -660,8 +661,9 @@ static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, else if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) { if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE) - shader_addline(buffer, "_resource_"); + shader_addline(buffer, "_resource");
+ shader_addline(buffer, "_"); shader_dump_resource_type(compiler, semantic->resource_type); if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) @@ -1505,9 +1507,9 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile { case VKD3DSIH_BREAKP: case VKD3DSIH_CONTINUEP: + case VKD3DSIH_DISCARD: case VKD3DSIH_IF: case VKD3DSIH_RETP: - case VKD3DSIH_TEXKILL: switch (ins->flags) { case VKD3D_SHADER_CONDITIONAL_OP_NZ: shader_addline(buffer, "_nz"); break; @@ -1857,11 +1859,11 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, shader_addline(buffer, "\n"); }
-enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out) +enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vkd3d_shader_instruction_array *instructions, + const struct vkd3d_shader_version *shader_version, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out) { enum vkd3d_shader_compile_option_formatting_flags formatting; - struct vkd3d_shader_version *shader_version; struct vkd3d_d3d_asm_compiler compiler; enum vkd3d_result result = VKD3D_OK; struct vkd3d_string_buffer *buffer; @@ -1919,16 +1921,16 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, buffer = &compiler.buffer; vkd3d_string_buffer_init(buffer);
+ compiler.shader_version = *shader_version; shader_version = &compiler.shader_version; - *shader_version = parser->shader_version; vkd3d_string_buffer_printf(buffer, "%s%s_%u_%u%s\n", compiler.colours.version, shader_get_type_prefix(shader_version->type), shader_version->major, shader_version->minor, compiler.colours.reset);
indent = 0; - for (i = 0; i < parser->instructions.count; ++i) + for (i = 0; i < instructions->count; ++i) { - struct vkd3d_shader_instruction *ins = &parser->instructions.elements[i]; + struct vkd3d_shader_instruction *ins = &instructions->elements[i];
switch (ins->handler_idx) { @@ -1981,12 +1983,13 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, return result; }
-void vkd3d_shader_trace(struct vkd3d_shader_parser *parser) +void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, + const struct vkd3d_shader_version *shader_version) { const char *p, *q, *end; struct vkd3d_shader_code code;
- if (vkd3d_dxbc_binary_to_text(parser, NULL, &code) != VKD3D_OK) + if (vkd3d_dxbc_binary_to_text(instructions, shader_version, NULL, &code) != VKD3D_OK) return;
end = (const char *)code.code + code.size; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index ed81137d225..712613ac13b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -1,4 +1,6 @@ /* + * d3dbc (Direct3D shader models 1-3 bytecode) support + * * Copyright 2002-2003 Jason Edmeades * Copyright 2002-2003 Raphael Junqueira * Copyright 2004 Christian Costa @@ -6,6 +8,7 @@ * Copyright 2006 Ivan Gyurdiev * Copyright 2007-2008 Stefan Dösinger for CodeWeavers * Copyright 2009, 2021 Henri Verbeet for CodeWeavers + * Copyright 2019-2020 Zebediah Figura for CodeWeavers * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -22,7 +25,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */
-#include "vkd3d_shader_private.h" +#include "hlsl.h"
#define VKD3D_SM1_VS 0xfffeu #define VKD3D_SM1_PS 0xffffu @@ -207,7 +210,7 @@ struct vkd3d_sm1_opcode_info struct vkd3d_shader_sm1_parser { const struct vkd3d_sm1_opcode_info *opcode_table; - const uint32_t *start, *end; + const uint32_t *start, *end, *ptr; bool abort;
struct vkd3d_shader_parser p; @@ -462,6 +465,7 @@ static void shader_sm1_parse_src_param(uint32_t param, const struct vkd3d_shader src->reg.idx[1].rel_addr = NULL; src->reg.idx[2].offset = ~0u; src->reg.idx[2].rel_addr = NULL; + src->reg.idx_count = 1; src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT); src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT; } @@ -480,6 +484,7 @@ static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader dst->reg.idx[1].rel_addr = NULL; dst->reg.idx[2].offset = ~0u; dst->reg.idx[2].rel_addr = NULL; + dst->reg.idx_count = 1; dst->write_mask = (param & VKD3D_SM1_WRITEMASK_MASK) >> VKD3D_SM1_WRITEMASK_SHIFT; dst->modifiers = (param & VKD3D_SM1_DST_MODIFIER_MASK) >> VKD3D_SM1_DST_MODIFIER_SHIFT; dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; @@ -661,6 +666,7 @@ static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const src_param->reg.idx[1].rel_addr = NULL; src_param->reg.idx[2].offset = ~0u; src_param->reg.idx[2].rel_addr = NULL; + src_param->reg.idx_count = 0; src_param->reg.immconst_type = type; memcpy(src_param->reg.u.immconst_uint, *ptr, count * sizeof(uint32_t)); src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; @@ -671,7 +677,7 @@ static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const
static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1) { - const uint32_t **ptr = &sm1->p.ptr; + const uint32_t **ptr = &sm1->ptr; const char *comment; unsigned int size; size_t remaining; @@ -738,13 +744,12 @@ static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, } }
-static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, struct vkd3d_shader_instruction *ins) +static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) { - struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); struct vkd3d_shader_src_param *src_params, *predicate; const struct vkd3d_sm1_opcode_info *opcode_info; struct vkd3d_shader_dst_param *dst_param; - const uint32_t **ptr = &parser->ptr; + const uint32_t **ptr = &sm1->ptr; uint32_t opcode_token; const uint32_t *p; bool predicated; @@ -758,11 +763,11 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru goto fail; }
- ++parser->location.line; + ++sm1->p.location.line; opcode_token = read_u32(ptr); if (!(opcode_info = shader_sm1_get_opcode_info(sm1, opcode_token & VKD3D_SM1_OPCODE_MASK))) { - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, "Invalid opcode %#x (token 0x%08x, shader version %u.%u).", opcode_token & VKD3D_SM1_OPCODE_MASK, opcode_token, sm1->p.shader_version.major, sm1->p.shader_version.minor); @@ -775,14 +780,14 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru ins->raw = false; ins->structured = false; predicated = !!(opcode_token & VKD3D_SM1_INSTRUCTION_PREDICATED); - ins->predicate = predicate = predicated ? shader_parser_get_src_params(parser, 1) : NULL; + ins->predicate = predicate = predicated ? shader_parser_get_src_params(&sm1->p, 1) : NULL; ins->dst_count = opcode_info->dst_count; - ins->dst = dst_param = shader_parser_get_dst_params(parser, ins->dst_count); + ins->dst = dst_param = shader_parser_get_dst_params(&sm1->p, ins->dst_count); ins->src_count = opcode_info->src_count; - ins->src = src_params = shader_parser_get_src_params(parser, ins->src_count); + ins->src = src_params = shader_parser_get_src_params(&sm1->p, ins->src_count); if ((!predicate && predicated) || (!src_params && ins->src_count) || (!dst_param && ins->dst_count)) { - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); goto fail; }
@@ -852,10 +857,9 @@ fail: *ptr = sm1->end; }
-static bool shader_sm1_is_end(struct vkd3d_shader_parser *parser) +static bool shader_sm1_is_end(struct vkd3d_shader_sm1_parser *sm1) { - struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); - const uint32_t **ptr = &parser->ptr; + const uint32_t **ptr = &sm1->ptr;
shader_sm1_read_comment(sm1);
@@ -938,7 +942,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, shader_desc = &sm1->p.shader_desc; shader_desc->byte_code = code; shader_desc->byte_code_size = code_size; - sm1->p.ptr = sm1->start; + sm1->ptr = sm1->start;
return VKD3D_OK; } @@ -965,7 +969,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi }
instructions = &sm1->p.instructions; - while (!shader_sm1_is_end(&sm1->p)) + while (!shader_sm1_is_end(sm1)) { if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) { @@ -975,7 +979,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi return VKD3D_ERROR_OUT_OF_MEMORY; } ins = &instructions->elements[instructions->count]; - shader_sm1_read_instruction(&sm1->p, ins); + shader_sm1_read_instruction(sm1, ins);
if (ins->handler_idx == VKD3DSIH_INVALID) { @@ -988,5 +992,1094 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi
*parser = &sm1->p;
- return VKD3D_OK; + return sm1->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; +} + +bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) +{ + unsigned int i; + + static const struct + { + const char *semantic; + bool output; + enum vkd3d_shader_type shader_type; + unsigned int major_version; + D3DSHADER_PARAM_REGISTER_TYPE type; + DWORD offset; + } + register_table[] = + { + {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT}, + {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, + {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, + {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, + {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, + {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, + + {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, + + {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) + { + if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) + && output == register_table[i].output + && ctx->profile->type == register_table[i].shader_type + && ctx->profile->major_version == register_table[i].major_version) + { + *type = register_table[i].type; + if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) + *reg = register_table[i].offset; + else + *reg = semantic->index; + return true; + } + } + + return false; +} + +bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) +{ + static const struct + { + const char *name; + D3DDECLUSAGE usage; + } + semantics[] = + { + {"binormal", D3DDECLUSAGE_BINORMAL}, + {"blendindices", D3DDECLUSAGE_BLENDINDICES}, + {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, + {"color", D3DDECLUSAGE_COLOR}, + {"depth", D3DDECLUSAGE_DEPTH}, + {"fog", D3DDECLUSAGE_FOG}, + {"normal", D3DDECLUSAGE_NORMAL}, + {"position", D3DDECLUSAGE_POSITION}, + {"positiont", D3DDECLUSAGE_POSITIONT}, + {"psize", D3DDECLUSAGE_PSIZE}, + {"sample", D3DDECLUSAGE_SAMPLE}, + {"sv_depth", D3DDECLUSAGE_DEPTH}, + {"sv_position", D3DDECLUSAGE_POSITION}, + {"sv_target", D3DDECLUSAGE_COLOR}, + {"tangent", D3DDECLUSAGE_TANGENT}, + {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, + {"texcoord", D3DDECLUSAGE_TEXCOORD}, + }; + + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(semantics); ++i) + { + if (!ascii_strcasecmp(semantic->name, semantics[i].name)) + { + *usage = semantics[i].usage; + *usage_idx = semantic->index; + return true; + } + } + + return false; +} + +static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) +{ + if (type == VKD3D_SHADER_TYPE_VERTEX) + return D3DVS_VERSION(major, minor); + else + return D3DPS_VERSION(major, minor); +} + +static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) +{ + switch (type->class) + { + case HLSL_CLASS_ARRAY: + return sm1_class(type->e.array.type); + case HLSL_CLASS_MATRIX: + assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3DXPC_MATRIX_COLUMNS; + else + return D3DXPC_MATRIX_ROWS; + case HLSL_CLASS_OBJECT: + return D3DXPC_OBJECT; + case HLSL_CLASS_SCALAR: + return D3DXPC_SCALAR; + case HLSL_CLASS_STRUCT: + return D3DXPC_STRUCT; + case HLSL_CLASS_VECTOR: + return D3DXPC_VECTOR; + default: + ERR("Invalid class %#x.\n", type->class); + vkd3d_unreachable(); + } +} + +static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) +{ + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + return D3DXPT_BOOL; + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3DXPT_FLOAT; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return D3DXPT_INT; + case HLSL_TYPE_PIXELSHADER: + return D3DXPT_PIXELSHADER; + case HLSL_TYPE_SAMPLER: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_SAMPLER1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_SAMPLER2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_SAMPLER3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_SAMPLERCUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_SAMPLER; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_STRING: + return D3DXPT_STRING; + case HLSL_TYPE_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_TEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_TEXTURECUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_TEXTURE; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_VERTEXSHADER: + return D3DXPT_VERTEXSHADER; + case HLSL_TYPE_VOID: + return D3DXPT_VOID; + default: + vkd3d_unreachable(); + } +} + +static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) +{ + const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); + unsigned int array_size = hlsl_get_multiarray_size(type); + unsigned int field_count = 0; + size_t fields_offset = 0; + size_t i; + + if (type->bytecode_offset) + return; + + if (array_type->class == HLSL_CLASS_STRUCT) + { + field_count = array_type->e.record.field_count; + + for (i = 0; i < field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + field->name_bytecode_offset = put_string(buffer, field->name); + write_sm1_type(buffer, field->type, ctab_start); + } + + fields_offset = bytecode_align(buffer) - ctab_start; + + for (i = 0; i < field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + put_u32(buffer, field->name_bytecode_offset - ctab_start); + put_u32(buffer, field->type->bytecode_offset - ctab_start); + } + } + + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); + put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); + put_u32(buffer, vkd3d_make_u32(array_size, field_count)); + put_u32(buffer, fields_offset); +} + +static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) +{ + struct hlsl_ir_var *var; + + list_remove(&to_sort->extern_entry); + + LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) + { + if (strcmp(to_sort->name, var->name) < 0) + { + list_add_before(&var->extern_entry, &to_sort->extern_entry); + return; + } + } + + list_add_tail(sorted, &to_sort->extern_entry); +} + +static void sm1_sort_externs(struct hlsl_ctx *ctx) +{ + struct list sorted = LIST_INIT(sorted); + struct hlsl_ir_var *var, *next; + + LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform) + sm1_sort_extern(&sorted, var); + } + list_move_tail(&ctx->extern_vars, &sorted); +} + +static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + struct hlsl_ir_function_decl *entry_func) +{ + size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; + unsigned int uniform_count = 0; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int r; + + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + if (var->semantic.name || !var->regs[r].allocated) + continue; + + ++uniform_count; + + if (var->is_param && var->is_uniform) + { + struct vkd3d_string_buffer *name; + + if (!(name = hlsl_get_string_buffer(ctx))) + { + buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + vkd3d_string_buffer_printf(name, "$%s", var->name); + vkd3d_free((char *)var->name); + var->name = hlsl_strdup(ctx, name->buffer); + hlsl_release_string_buffer(ctx, name); + } + } + } + + sm1_sort_externs(ctx); + + size_offset = put_u32(buffer, 0); + ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); + + ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); + creator_offset = put_u32(buffer, 0); + put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + put_u32(buffer, uniform_count); + put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ + put_u32(buffer, 0); /* FIXME: flags */ + put_u32(buffer, 0); /* FIXME: target string */ + + vars_start = bytecode_align(buffer); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int r; + + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + if (var->semantic.name || !var->regs[r].allocated) + continue; + + put_u32(buffer, 0); /* name */ + if (r == HLSL_REGSET_NUMERIC) + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); + put_u32(buffer, var->data_type->reg_size[r] / 4); + } + else + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); + put_u32(buffer, var->regs[r].bind_count); + } + put_u32(buffer, 0); /* type */ + put_u32(buffer, 0); /* FIXME: default value */ + } + } + + uniform_count = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int r; + + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + size_t var_offset, name_offset; + + if (var->semantic.name || !var->regs[r].allocated) + continue; + + var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); + + name_offset = put_string(buffer, var->name); + set_u32(buffer, var_offset, name_offset - ctab_start); + + write_sm1_type(buffer, var->data_type, ctab_start); + set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); + ++uniform_count; + } + } + + offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(buffer, creator_offset, offset - ctab_start); + + ctab_end = bytecode_align(buffer); + set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); +} + +static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) +{ + return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) + | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); +} + +struct sm1_instruction +{ + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; + + struct sm1_dst_register + { + D3DSHADER_PARAM_REGISTER_TYPE type; + D3DSHADER_PARAM_DSTMOD_TYPE mod; + unsigned int writemask; + uint32_t reg; + } dst; + + struct sm1_src_register + { + D3DSHADER_PARAM_REGISTER_TYPE type; + D3DSHADER_PARAM_SRCMOD_TYPE mod; + unsigned int swizzle; + uint32_t reg; + } srcs[3]; + unsigned int src_count; + + unsigned int has_dst; +}; + +static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) +{ + assert(reg->writemask); + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); +} + +static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, + const struct sm1_src_register *reg) +{ + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); +} + +static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct sm1_instruction *instr) +{ + uint32_t token = instr->opcode; + unsigned int i; + + if (ctx->profile->major_version > 1) + token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + if (instr->has_dst) + write_sm1_dst_register(buffer, &instr->dst); + + for (i = 0; i < instr->src_count; ++i) + write_sm1_src_register(buffer, &instr->srcs[i]); +}; + +static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask) +{ + src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); +} + +static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, + const struct hlsl_reg *src3) +{ + struct sm1_instruction instr = + { + .opcode = D3DSIO_DP2ADD, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .srcs[2].type = D3DSPR_TEMP, + .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), + .srcs[2].reg = src3->id, + .src_count = 3, + }; + + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src1, const struct hlsl_reg *src2) +{ + struct sm1_instruction instr = + { + .opcode = opcode, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .src_count = 2, + }; + + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); + sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src1, const struct hlsl_reg *src2) +{ + struct sm1_instruction instr = + { + .opcode = opcode, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .src_count = 2, + }; + + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) +{ + struct sm1_instruction instr = + { + .opcode = opcode, + + .dst.type = D3DSPR_TEMP, + .dst.mod = dst_mod, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), + .srcs[0].reg = src->id, + .srcs[0].mod = src_mod, + .src_count = 1, + }; + + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + unsigned int i, x; + + for (i = 0; i < ctx->constant_defs.count; ++i) + { + uint32_t token = D3DSIO_DEF; + const struct sm1_dst_register reg = + { + .type = D3DSPR_CONST, + .writemask = VKD3DSP_WRITEMASK_ALL, + .reg = i, + }; + + if (ctx->profile->major_version > 1) + token |= 5 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + write_sm1_dst_register(buffer, ®); + for (x = 0; x < 4; ++x) + put_f32(buffer, ctx->constant_defs.values[i].f[x]); + } +} + +static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_var *var, bool output) +{ + struct sm1_dst_register reg = {0}; + uint32_t token, usage_idx; + D3DDECLUSAGE usage; + bool ret; + + if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) + { + usage = 0; + usage_idx = 0; + } + else + { + ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); + assert(ret); + reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; + reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; + } + + token = D3DSIO_DCL; + if (ctx->profile->major_version > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + token = (1u << 31); + token |= usage << D3DSP_DCL_USAGE_SHIFT; + token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; + put_u32(buffer, token); + + reg.writemask = (1 << var->data_type->dimx) - 1; + write_sm1_dst_register(buffer, ®); +} + +static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + bool write_in = false, write_out = false; + struct hlsl_ir_var *var; + + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2) + write_in = true; + else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) + write_in = write_out = true; + else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) + write_in = true; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (write_in && var->is_input_semantic) + write_sm1_semantic_dcl(ctx, buffer, var, false); + if (write_out && var->is_output_semantic) + write_sm1_semantic_dcl(ctx, buffer, var, true); + } +} + +static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) +{ + struct sm1_dst_register reg = {0}; + uint32_t token, res_type = 0; + + token = D3DSIO_DCL; + if (ctx->profile->major_version > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + switch (sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_1D; + break; + + case HLSL_SAMPLER_DIM_2D: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_2D; + break; + + case HLSL_SAMPLER_DIM_CUBE: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_CUBE; + break; + + case HLSL_SAMPLER_DIM_3D: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_3D; + break; + + default: + vkd3d_unreachable(); + break; + } + + token = (1u << 31); + token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; + put_u32(buffer, token); + + reg.type = D3DSPR_SAMPLER; + reg.writemask = VKD3DSP_WRITEMASK_ALL; + reg.reg = reg_id; + + write_sm1_dst_register(buffer, ®); +} + +static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + enum hlsl_sampler_dim sampler_dim; + unsigned int i, count, reg_id; + struct hlsl_ir_var *var; + + if (ctx->profile->major_version < 2) + return; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) + continue; + + count = var->regs[HLSL_REGSET_SAMPLERS].bind_count; + + for (i = 0; i < count; ++i) + { + if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + { + sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; + assert(sampler_dim != HLSL_SAMPLER_DIM_GENERIC); + + reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; + write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); + } + } + } +} + +static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_CONST, + .srcs[0].reg = constant->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), + .src_count = 1, + }; + + assert(instr->reg.allocated); + assert(constant->reg.allocated); + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) +{ + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + struct hlsl_ir_node *arg1 = expr->operands[0].node; + unsigned int i; + + for (i = 0; i < instr->data_type->dimx; ++i) + { + struct hlsl_reg src = arg1->reg, dst = instr->reg; + + src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); + dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); + write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); + } +} + +static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + struct hlsl_ir_node *arg1 = expr->operands[0].node; + struct hlsl_ir_node *arg2 = expr->operands[1].node; + struct hlsl_ir_node *arg3 = expr->operands[2].node; + + assert(instr->reg.allocated); + + if (instr->data_type->base_type != HLSL_TYPE_FLOAT) + { + /* These need to be lowered. */ + hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); + return; + } + + switch (expr->op) + { + case HLSL_OP1_ABS: + write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_DSX: + write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_DSY: + write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_EXP2: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); + break; + + case HLSL_OP1_LOG2: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG); + break; + + case HLSL_OP1_NEG: + write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); + break; + + case HLSL_OP1_SAT: + write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); + break; + + case HLSL_OP1_RCP: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); + break; + + case HLSL_OP1_RSQ: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); + break; + + case HLSL_OP2_ADD: + write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MAX: + write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MIN: + write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MUL: + write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP1_FRACT: + write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); + break; + + case HLSL_OP2_DOT: + switch (arg1->data_type->dimx) + { + case 4: + write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case 3: + write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_OP3_DP2ADD: + write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "SM1 "%s" expression.", debug_hlsl_expr_op(expr->op)); + break; + } +} + +static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_load *load = hlsl_ir_load(instr); + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), + .src_count = 1, + }; + + assert(instr->reg.allocated); + + if (load->src.var->is_uniform) + { + assert(reg.allocated); + sm1_instr.srcs[0].type = D3DSPR_CONST; + } + else if (load->src.var->is_input_semantic) + { + if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, + false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) + { + assert(reg.allocated); + sm1_instr.srcs[0].type = D3DSPR_INPUT; + sm1_instr.srcs[0].reg = reg.id; + } + else + sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); + } + + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + struct hlsl_ir_node *coords = load->coords.node; + unsigned int sampler_offset, reg_id; + struct sm1_instruction sm1_instr; + + sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); + reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset; + + sm1_instr = (struct sm1_instruction) + { + .opcode = D3DSIO_TEX, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = coords->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + + .srcs[1].type = D3DSPR_SAMPLER, + .srcs[1].reg = reg_id, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + + .src_count = 2, + }; + + assert(instr->reg.allocated); + + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_store *store = hlsl_ir_store(instr); + const struct hlsl_ir_node *rhs = store->rhs.node; + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = reg.id, + .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = rhs->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), + .src_count = 1, + }; + + if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) + { + FIXME("Matrix writemasks need to be lowered.\n"); + return; + } + + if (store->lhs.var->is_output_semantic) + { + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version == 1) + { + sm1_instr.dst.type = D3DSPR_TEMP; + sm1_instr.dst.reg = 0; + } + else if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, + true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) + { + assert(reg.allocated); + sm1_instr.dst.type = D3DSPR_OUTPUT; + sm1_instr.dst.reg = reg.id; + } + else + sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; + } + else + assert(reg.allocated); + + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); + const struct hlsl_ir_node *val = swizzle->val.node; + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = val->reg.id, + .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), + swizzle->swizzle, instr->data_type->dimx), + .src_count = 1, + }; + + assert(instr->reg.allocated); + assert(val->reg.allocated); + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_function_decl *entry_func) +{ + const struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->class == HLSL_CLASS_MATRIX) + { + /* These need to be lowered. */ + hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); + continue; + } + else if (instr->data_type->class == HLSL_CLASS_OBJECT) + { + hlsl_fixme(ctx, &instr->loc, "Object copy."); + break; + } + + assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); + } + + switch (instr->type) + { + case HLSL_IR_CALL: + vkd3d_unreachable(); + + case HLSL_IR_CONSTANT: + write_sm1_constant(ctx, buffer, instr); + break; + + case HLSL_IR_EXPR: + write_sm1_expr(ctx, buffer, instr); + break; + + case HLSL_IR_LOAD: + write_sm1_load(ctx, buffer, instr); + break; + + case HLSL_IR_RESOURCE_LOAD: + write_sm1_resource_load(ctx, buffer, instr); + break; + + case HLSL_IR_STORE: + write_sm1_store(ctx, buffer, instr); + break; + + case HLSL_IR_SWIZZLE: + write_sm1_swizzle(ctx, buffer, instr); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + } + } +} + +int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +{ + struct vkd3d_bytecode_buffer buffer = {0}; + int ret; + + put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + + write_sm1_uniforms(ctx, &buffer, entry_func); + + write_sm1_constant_defs(ctx, &buffer); + write_sm1_semantic_dcls(ctx, &buffer); + write_sm1_sampler_dcls(ctx, &buffer); + write_sm1_instructions(ctx, &buffer, entry_func); + + put_u32(&buffer, D3DSIO_END); + + if (!(ret = buffer.status)) + { + out->code = buffer.data; + out->size = buffer.size; + } + return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index d99ea2e36b6..3e3f06faeb5 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -19,1680 +19,74 @@ */
#include "vkd3d_shader_private.h" -#include "sm4.h"
-#define SM4_MAX_SRC_COUNT 6 -#define SM4_MAX_DST_COUNT 2 - -STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); - -void dxbc_writer_init(struct dxbc_writer *dxbc) -{ - memset(dxbc, 0, sizeof(*dxbc)); -} - -void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void *data, size_t size) -{ - struct vkd3d_shader_dxbc_section_desc *section; - - assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); - - section = &dxbc->sections[dxbc->section_count++]; - section->tag = tag; - section->data.code = data; - section->data.size = size; -} - -int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, - struct vkd3d_shader_code *dxbc, char **messages) -{ - size_t size_position, offsets_position, checksum_position, i; - struct vkd3d_bytecode_buffer buffer = {0}; - uint32_t checksum[4]; - - TRACE("section_count %zu, sections %p, dxbc %p, messages %p.\n", section_count, sections, dxbc, messages); - - if (messages) - *messages = NULL; - - put_u32(&buffer, TAG_DXBC); - - checksum_position = bytecode_get_size(&buffer); - for (i = 0; i < 4; ++i) - put_u32(&buffer, 0); - - put_u32(&buffer, 1); /* version */ - size_position = put_u32(&buffer, 0); - put_u32(&buffer, section_count); - - offsets_position = bytecode_get_size(&buffer); - for (i = 0; i < section_count; ++i) - put_u32(&buffer, 0); - - for (i = 0; i < section_count; ++i) - { - set_u32(&buffer, offsets_position + i * sizeof(uint32_t), bytecode_get_size(&buffer)); - put_u32(&buffer, sections[i].tag); - put_u32(&buffer, sections[i].data.size); - bytecode_put_bytes(&buffer, sections[i].data.code, sections[i].data.size); - } - set_u32(&buffer, size_position, bytecode_get_size(&buffer)); - - vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); - for (i = 0; i < 4; ++i) - set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]); - - if (!buffer.status) - { - dxbc->code = buffer.data; - dxbc->size = buffer.size; - } - return buffer.status; -} - -int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *out) -{ - return vkd3d_shader_serialize_dxbc(dxbc->section_count, dxbc->sections, out, NULL); -} - -struct vkd3d_shader_src_param_entry -{ - struct list entry; - struct vkd3d_shader_src_param param; -}; - -struct vkd3d_shader_sm4_parser -{ - const uint32_t *start, *end; - - unsigned int output_map[MAX_REG_OUTPUT]; - - struct vkd3d_shader_parser p; -}; - -struct vkd3d_sm4_opcode_info -{ - enum vkd3d_sm4_opcode opcode; - enum vkd3d_shader_opcode handler_idx; - char dst_info[SM4_MAX_DST_COUNT]; - char src_info[SM4_MAX_SRC_COUNT]; - void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); -}; - -static const enum vkd3d_primitive_type output_primitive_type_table[] = -{ - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_OUTPUT_PT_POINTLIST */ VKD3D_PT_POINTLIST, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_OUTPUT_PT_LINESTRIP */ VKD3D_PT_LINESTRIP, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP */ VKD3D_PT_TRIANGLESTRIP, -}; - -static const enum vkd3d_primitive_type input_primitive_type_table[] = -{ - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_INPUT_PT_POINT */ VKD3D_PT_POINTLIST, - /* VKD3D_SM4_INPUT_PT_LINE */ VKD3D_PT_LINELIST, - /* VKD3D_SM4_INPUT_PT_TRIANGLE */ VKD3D_PT_TRIANGLELIST, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_INPUT_PT_LINEADJ */ VKD3D_PT_LINELIST_ADJ, - /* VKD3D_SM4_INPUT_PT_TRIANGLEADJ */ VKD3D_PT_TRIANGLELIST_ADJ, -}; - -static const enum vkd3d_shader_resource_type resource_type_table[] = -{ - /* 0 */ VKD3D_SHADER_RESOURCE_NONE, - /* VKD3D_SM4_RESOURCE_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, - /* VKD3D_SM4_RESOURCE_TEXTURE_1D */ VKD3D_SHADER_RESOURCE_TEXTURE_1D, - /* VKD3D_SM4_RESOURCE_TEXTURE_2D */ VKD3D_SHADER_RESOURCE_TEXTURE_2D, - /* VKD3D_SM4_RESOURCE_TEXTURE_2DMS */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, - /* VKD3D_SM4_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, - /* VKD3D_SM4_RESOURCE_TEXTURE_CUBE */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, - /* VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, - /* VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, - /* VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, - /* VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, - /* VKD3D_SM4_RESOURCE_RAW_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, - /* VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, -}; - -static const enum vkd3d_data_type data_type_table[] = -{ - /* 0 */ VKD3D_DATA_FLOAT, - /* VKD3D_SM4_DATA_UNORM */ VKD3D_DATA_UNORM, - /* VKD3D_SM4_DATA_SNORM */ VKD3D_DATA_SNORM, - /* VKD3D_SM4_DATA_INT */ VKD3D_DATA_INT, - /* VKD3D_SM4_DATA_UINT */ VKD3D_DATA_UINT, - /* VKD3D_SM4_DATA_FLOAT */ VKD3D_DATA_FLOAT, - /* VKD3D_SM4_DATA_MIXED */ VKD3D_DATA_MIXED, - /* VKD3D_SM4_DATA_DOUBLE */ VKD3D_DATA_DOUBLE, - /* VKD3D_SM4_DATA_CONTINUED */ VKD3D_DATA_CONTINUED, - /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, -}; - -static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) -{ - return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); -} - -static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) -{ - const struct vkd3d_shader_version *version = &sm4->p.shader_version; - - return version->major >= 5 && version->minor >= 1; -} - -static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param); -static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param); - -static bool shader_sm4_read_register_space(struct vkd3d_shader_sm4_parser *priv, - const uint32_t **ptr, const uint32_t *end, unsigned int *register_space) -{ - *register_space = 0; - - if (!shader_is_sm_5_1(priv)) - return true; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - - *register_space = *(*ptr)++; - return true; -} - -static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, - (struct vkd3d_shader_src_param *)&ins->src[0]); - ins->flags = (opcode_token & VKD3D_SM4_CONDITIONAL_NZ) ? - VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; -} - -static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_immediate_constant_buffer *icb; - enum vkd3d_sm4_shader_data_type type; - unsigned int icb_size; - - type = (opcode_token & VKD3D_SM4_SHADER_DATA_TYPE_MASK) >> VKD3D_SM4_SHADER_DATA_TYPE_SHIFT; - if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) - { - FIXME("Ignoring shader data type %#x.\n", type); - ins->handler_idx = VKD3DSIH_NOP; - return; - } - - ++tokens; - icb_size = token_count - 1; - if (icb_size % 4) - { - FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - - if (!(icb = vkd3d_malloc(offsetof(struct vkd3d_shader_immediate_constant_buffer, data[icb_size])))) - { - ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); - vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - icb->vec4_count = icb_size / 4; - memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); - shader_instruction_array_add_icb(&priv->p.instructions, icb); - ins->declaration.icb = icb; -} - -static void shader_sm4_set_descriptor_register_range(struct vkd3d_shader_sm4_parser *sm4, - const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_range *range) -{ - range->first = reg->idx[1].offset; - range->last = reg->idx[shader_is_sm_5_1(sm4) ? 2 : 1].offset; - if (range->last < range->first) - { - FIXME("Invalid register range [%u:%u].\n", range->first, range->last); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE, - "Last register %u must not be less than first register %u in range.\n", range->last, range->first); - } -} - -static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; - enum vkd3d_sm4_resource_type resource_type; - const uint32_t *end = &tokens[token_count]; - enum vkd3d_sm4_data_type data_type; - enum vkd3d_data_type reg_data_type; - DWORD components; - unsigned int i; - - resource_type = (opcode_token & VKD3D_SM4_RESOURCE_TYPE_MASK) >> VKD3D_SM4_RESOURCE_TYPE_SHIFT; - if (!resource_type || (resource_type >= ARRAY_SIZE(resource_type_table))) - { - FIXME("Unhandled resource type %#x.\n", resource_type); - semantic->resource_type = VKD3D_SHADER_RESOURCE_NONE; - } - else - { - semantic->resource_type = resource_type_table[resource_type]; - } - - if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS - || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) - { - semantic->sample_count = (opcode_token & VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK) >> VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; - } - - reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV; - shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); - - components = *tokens++; - for (i = 0; i < VKD3D_VEC4_SIZE; i++) - { - data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); - - if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) - { - FIXME("Unhandled data type %#x.\n", data_type); - semantic->resource_data_type[i] = VKD3D_DATA_FLOAT; - } - else - { - semantic->resource_data_type[i] = data_type_table[data_type]; - } - } - - if (reg_data_type == VKD3D_DATA_UAV) - ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; - - shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); -} - -static void shader_sm4_read_dcl_constant_buffer(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_FLOAT, &ins->declaration.cb.src); - shader_sm4_set_descriptor_register_range(priv, &ins->declaration.cb.src.reg, &ins->declaration.cb.range); - if (opcode_token & VKD3D_SM4_INDEX_TYPE_MASK) - ins->flags |= VKD3DSI_INDEXED_DYNAMIC; - - ins->declaration.cb.size = ins->declaration.cb.src.reg.idx[2].offset; - ins->declaration.cb.range.space = 0; - - if (shader_is_sm_5_1(priv)) - { - if (tokens >= end) - { - FIXME("Invalid ptr %p >= end %p.\n", tokens, end); - return; - } - - ins->declaration.cb.size = *tokens++; - shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.cb.range.space); - } -} - -static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - const uint32_t *end = &tokens[token_count]; - - ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; - if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) - FIXME("Unhandled sampler mode %#x.\n", ins->flags); - shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src); - shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); - shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); -} - -static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, - &ins->declaration.index_range.dst); - ins->declaration.index_range.register_count = *tokens; -} - -static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - enum vkd3d_sm4_output_primitive_type primitive_type; - - primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; - if (primitive_type >= ARRAY_SIZE(output_primitive_type_table)) - ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; - else - ins->declaration.primitive_type.type = output_primitive_type_table[primitive_type]; - - if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) - FIXME("Unhandled output primitive type %#x.\n", primitive_type); -} - -static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - enum vkd3d_sm4_input_primitive_type primitive_type; - - primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; - if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) - { - ins->declaration.primitive_type.type = VKD3D_PT_PATCH; - ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; - } - else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) - { - ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; - } - else - { - ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type]; - } - - if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) - FIXME("Unhandled input primitive type %#x.\n", primitive_type); -} - -static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.count = *tokens; -} - -static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); -} - -static void shader_sm4_read_declaration_register_semantic(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, - &ins->declaration.register_semantic.reg); - ins->declaration.register_semantic.sysval_semantic = *tokens; -} - -static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); -} - -static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, - &ins->declaration.register_semantic.reg); - ins->declaration.register_semantic.sysval_semantic = *tokens; -} - -static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.indexable_temp.register_idx = *tokens++; - ins->declaration.indexable_temp.register_size = *tokens++; - ins->declaration.indexable_temp.component_count = *tokens; -} - -static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; -} - -static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_src_param *src_params = (struct vkd3d_shader_src_param *)ins->src; - src_params[0].reg.u.fp_body_idx = *tokens++; - shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, &src_params[0]); -} - -static void shader_sm5_read_dcl_function_body(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.index = *tokens; -} - -static void shader_sm5_read_dcl_function_table(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.index = *tokens++; - FIXME("Ignoring set of function bodies (count %u).\n", *tokens); -} - -static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.fp.index = *tokens++; - ins->declaration.fp.body_count = *tokens++; - ins->declaration.fp.array_size = *tokens >> VKD3D_SM5_FP_ARRAY_SIZE_SHIFT; - ins->declaration.fp.table_count = *tokens++ & VKD3D_SM5_FP_TABLE_COUNT_MASK; - FIXME("Ignoring set of function tables (count %u).\n", ins->declaration.fp.table_count); -} - -static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) - >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; -} - -static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -} - -static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -} - -static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -} - -static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.max_tessellation_factor = *(float *)tokens; -} - -static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.thread_group_size.x = *tokens++; - ins->declaration.thread_group_size.y = *tokens++; - ins->declaration.thread_group_size.z = *tokens++; -} - -static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; - resource->byte_stride = *tokens++; - if (resource->byte_stride % 4) - FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.tgsm_raw.reg); - ins->declaration.tgsm_raw.byte_count = *tokens; - if (ins->declaration.tgsm_raw.byte_count % 4) - FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); -} - -static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, - &ins->declaration.tgsm_structured.reg); - ins->declaration.tgsm_structured.byte_stride = *tokens++; - ins->declaration.tgsm_structured.structure_count = *tokens; - if (ins->declaration.tgsm_structured.byte_stride % 4) - FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); -} - -static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - resource->byte_stride = *tokens++; - if (resource->byte_stride % 4) - FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_sync(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM5_SYNC_FLAGS_MASK) >> VKD3D_SM5_SYNC_FLAGS_SHIFT; -} - -/* - * d -> VKD3D_DATA_DOUBLE - * f -> VKD3D_DATA_FLOAT - * i -> VKD3D_DATA_INT - * u -> VKD3D_DATA_UINT - * O -> VKD3D_DATA_OPAQUE - * R -> VKD3D_DATA_RESOURCE - * S -> VKD3D_DATA_SAMPLER - * U -> VKD3D_DATA_UAV - */ -static const struct vkd3d_sm4_opcode_info opcode_table[] = -{ - {VKD3D_SM4_OP_ADD, VKD3DSIH_ADD, "f", "ff"}, - {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, - {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, - {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, - {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, - {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, - {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, - {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, - {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, - {VKD3D_SM4_OP_DISCARD, VKD3DSIH_TEXKILL, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, - {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, - {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, - {VKD3D_SM4_OP_DP4, VKD3DSIH_DP4, "f", "ff"}, - {VKD3D_SM4_OP_ELSE, VKD3DSIH_ELSE, "", ""}, - {VKD3D_SM4_OP_EMIT, VKD3DSIH_EMIT, "", ""}, - {VKD3D_SM4_OP_ENDIF, VKD3DSIH_ENDIF, "", ""}, - {VKD3D_SM4_OP_ENDLOOP, VKD3DSIH_ENDLOOP, "", ""}, - {VKD3D_SM4_OP_ENDSWITCH, VKD3DSIH_ENDSWITCH, "", ""}, - {VKD3D_SM4_OP_EQ, VKD3DSIH_EQ, "u", "ff"}, - {VKD3D_SM4_OP_EXP, VKD3DSIH_EXP, "f", "f"}, - {VKD3D_SM4_OP_FRC, VKD3DSIH_FRC, "f", "f"}, - {VKD3D_SM4_OP_FTOI, VKD3DSIH_FTOI, "i", "f"}, - {VKD3D_SM4_OP_FTOU, VKD3DSIH_FTOU, "u", "f"}, - {VKD3D_SM4_OP_GE, VKD3DSIH_GE, "u", "ff"}, - {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, - {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, - {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, - {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, - {VKD3D_SM4_OP_IMAD, VKD3DSIH_IMAD, "i", "iii"}, - {VKD3D_SM4_OP_IMAX, VKD3DSIH_IMAX, "i", "ii"}, - {VKD3D_SM4_OP_IMIN, VKD3DSIH_IMIN, "i", "ii"}, - {VKD3D_SM4_OP_IMUL, VKD3DSIH_IMUL, "ii", "ii"}, - {VKD3D_SM4_OP_INE, VKD3DSIH_INE, "u", "ii"}, - {VKD3D_SM4_OP_INEG, VKD3DSIH_INEG, "i", "i"}, - {VKD3D_SM4_OP_ISHL, VKD3DSIH_ISHL, "i", "ii"}, - {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, - {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, - {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, - {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "iR"}, - {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "iRi"}, - {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, - {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, - {VKD3D_SM4_OP_LT, VKD3DSIH_LT, "u", "ff"}, - {VKD3D_SM4_OP_MAD, VKD3DSIH_MAD, "f", "fff"}, - {VKD3D_SM4_OP_MIN, VKD3DSIH_MIN, "f", "ff"}, - {VKD3D_SM4_OP_MAX, VKD3DSIH_MAX, "f", "ff"}, - {VKD3D_SM4_OP_SHADER_DATA, VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER, "", "", - shader_sm4_read_shader_data}, - {VKD3D_SM4_OP_MOV, VKD3DSIH_MOV, "f", "f"}, - {VKD3D_SM4_OP_MOVC, VKD3DSIH_MOVC, "f", "uff"}, - {VKD3D_SM4_OP_MUL, VKD3DSIH_MUL, "f", "ff"}, - {VKD3D_SM4_OP_NE, VKD3DSIH_NE, "u", "ff"}, - {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, - {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, - {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, - {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "iR"}, - {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, - {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, - {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, - {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, - {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, - {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, - {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "fRS"}, - {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "fRSf"}, - {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "fRSf"}, - {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "fRSf"}, - {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "fRSff"}, - {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "fRSf"}, - {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, - {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, - {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, - {VKD3D_SM4_OP_UDIV, VKD3DSIH_UDIV, "uu", "uu"}, - {VKD3D_SM4_OP_ULT, VKD3DSIH_ULT, "u", "uu"}, - {VKD3D_SM4_OP_UGE, VKD3DSIH_UGE, "u", "uu"}, - {VKD3D_SM4_OP_UMUL, VKD3DSIH_UMUL, "uu", "uu"}, - {VKD3D_SM4_OP_UMAX, VKD3DSIH_UMAX, "u", "uu"}, - {VKD3D_SM4_OP_UMIN, VKD3DSIH_UMIN, "u", "uu"}, - {VKD3D_SM4_OP_USHR, VKD3DSIH_USHR, "u", "uu"}, - {VKD3D_SM4_OP_UTOF, VKD3DSIH_UTOF, "f", "u"}, - {VKD3D_SM4_OP_XOR, VKD3DSIH_XOR, "u", "uu"}, - {VKD3D_SM4_OP_DCL_RESOURCE, VKD3DSIH_DCL, "", "", - shader_sm4_read_dcl_resource}, - {VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, VKD3DSIH_DCL_CONSTANT_BUFFER, "", "", - shader_sm4_read_dcl_constant_buffer}, - {VKD3D_SM4_OP_DCL_SAMPLER, VKD3DSIH_DCL_SAMPLER, "", "", - shader_sm4_read_dcl_sampler}, - {VKD3D_SM4_OP_DCL_INDEX_RANGE, VKD3DSIH_DCL_INDEX_RANGE, "", "", - shader_sm4_read_dcl_index_range}, - {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, "", "", - shader_sm4_read_dcl_output_topology}, - {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3DSIH_DCL_INPUT_PRIMITIVE, "", "", - shader_sm4_read_dcl_input_primitive}, - {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3DSIH_DCL_VERTICES_OUT, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM4_OP_DCL_INPUT, VKD3DSIH_DCL_INPUT, "", "", - shader_sm4_read_declaration_dst}, - {VKD3D_SM4_OP_DCL_INPUT_SGV, VKD3DSIH_DCL_INPUT_SGV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_INPUT_SIV, VKD3DSIH_DCL_INPUT_SIV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_INPUT_PS, VKD3DSIH_DCL_INPUT_PS, "", "", - shader_sm4_read_dcl_input_ps}, - {VKD3D_SM4_OP_DCL_INPUT_PS_SGV, VKD3DSIH_DCL_INPUT_PS_SGV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_INPUT_PS_SIV, VKD3DSIH_DCL_INPUT_PS_SIV, "", "", - shader_sm4_read_dcl_input_ps_siv}, - {VKD3D_SM4_OP_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT, "", "", - shader_sm4_read_declaration_dst}, - {VKD3D_SM4_OP_DCL_OUTPUT_SIV, VKD3DSIH_DCL_OUTPUT_SIV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_TEMPS, VKD3DSIH_DCL_TEMPS, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, VKD3DSIH_DCL_INDEXABLE_TEMP, "", "", - shader_sm4_read_dcl_indexable_temp}, - {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", - shader_sm4_read_dcl_global_flags}, - {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "fRS"}, - {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "fRS"}, - {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "Ru"}, - {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "R"}, - {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, - {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, - {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, - {VKD3D_SM5_OP_HS_JOIN_PHASE, VKD3DSIH_HS_JOIN_PHASE, "", ""}, - {VKD3D_SM5_OP_EMIT_STREAM, VKD3DSIH_EMIT_STREAM, "", "f"}, - {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, - {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", - shader_sm5_read_fcall}, - {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "U"}, - {VKD3D_SM5_OP_DERIV_RTX_COARSE, VKD3DSIH_DSX_COARSE, "f", "f"}, - {VKD3D_SM5_OP_DERIV_RTX_FINE, VKD3DSIH_DSX_FINE, "f", "f"}, - {VKD3D_SM5_OP_DERIV_RTY_COARSE, VKD3DSIH_DSY_COARSE, "f", "f"}, - {VKD3D_SM5_OP_DERIV_RTY_FINE, VKD3DSIH_DSY_FINE, "f", "f"}, - {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "fRSf"}, - {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fiRS"}, - {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fiRSf"}, - {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, - {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, - {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, - {VKD3D_SM5_OP_COUNTBITS, VKD3DSIH_COUNTBITS, "u", "u"}, - {VKD3D_SM5_OP_FIRSTBIT_HI, VKD3DSIH_FIRSTBIT_HI, "u", "u"}, - {VKD3D_SM5_OP_FIRSTBIT_LO, VKD3DSIH_FIRSTBIT_LO, "u", "u"}, - {VKD3D_SM5_OP_FIRSTBIT_SHI, VKD3DSIH_FIRSTBIT_SHI, "u", "i"}, - {VKD3D_SM5_OP_UBFE, VKD3DSIH_UBFE, "u", "iiu"}, - {VKD3D_SM5_OP_IBFE, VKD3DSIH_IBFE, "i", "iii"}, - {VKD3D_SM5_OP_BFI, VKD3DSIH_BFI, "u", "iiuu"}, - {VKD3D_SM5_OP_BFREV, VKD3DSIH_BFREV, "u", "u"}, - {VKD3D_SM5_OP_SWAPC, VKD3DSIH_SWAPC, "ff", "uff"}, - {VKD3D_SM5_OP_DCL_STREAM, VKD3DSIH_DCL_STREAM, "", "O"}, - {VKD3D_SM5_OP_DCL_FUNCTION_BODY, VKD3DSIH_DCL_FUNCTION_BODY, "", "", - shader_sm5_read_dcl_function_body}, - {VKD3D_SM5_OP_DCL_FUNCTION_TABLE, VKD3DSIH_DCL_FUNCTION_TABLE, "", "", - shader_sm5_read_dcl_function_table}, - {VKD3D_SM5_OP_DCL_INTERFACE, VKD3DSIH_DCL_INTERFACE, "", "", - shader_sm5_read_dcl_interface}, - {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT, "", "", - shader_sm5_read_control_point_count}, - {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, "", "", - shader_sm5_read_control_point_count}, - {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3DSIH_DCL_TESSELLATOR_DOMAIN, "", "", - shader_sm5_read_dcl_tessellator_domain}, - {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING, "", "", - shader_sm5_read_dcl_tessellator_partitioning}, - {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, "", "", - shader_sm5_read_dcl_tessellator_output_primitive}, - {VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR, VKD3DSIH_DCL_HS_MAX_TESSFACTOR, "", "", - shader_sm5_read_dcl_hs_max_tessfactor}, - {VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM5_OP_DCL_THREAD_GROUP, VKD3DSIH_DCL_THREAD_GROUP, "", "", - shader_sm5_read_dcl_thread_group}, - {VKD3D_SM5_OP_DCL_UAV_TYPED, VKD3DSIH_DCL_UAV_TYPED, "", "", - shader_sm4_read_dcl_resource}, - {VKD3D_SM5_OP_DCL_UAV_RAW, VKD3DSIH_DCL_UAV_RAW, "", "", - shader_sm5_read_dcl_uav_raw}, - {VKD3D_SM5_OP_DCL_UAV_STRUCTURED, VKD3DSIH_DCL_UAV_STRUCTURED, "", "", - shader_sm5_read_dcl_uav_structured}, - {VKD3D_SM5_OP_DCL_TGSM_RAW, VKD3DSIH_DCL_TGSM_RAW, "", "", - shader_sm5_read_dcl_tgsm_raw}, - {VKD3D_SM5_OP_DCL_TGSM_STRUCTURED, VKD3DSIH_DCL_TGSM_STRUCTURED, "", "", - shader_sm5_read_dcl_tgsm_structured}, - {VKD3D_SM5_OP_DCL_RESOURCE_RAW, VKD3DSIH_DCL_RESOURCE_RAW, "", "", - shader_sm5_read_dcl_resource_raw}, - {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", - shader_sm5_read_dcl_resource_structured}, - {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "iU"}, - {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "U", "iu"}, - {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "iU"}, - {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "U", "uu"}, - {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "iiR"}, - {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "U", "iiu"}, - {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "U", "iuu"}, - {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "U", "ii"}, - {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "U", "ii"}, - {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "U", "ii"}, - {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "U", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "U"}, - {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "U"}, - {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "uU", "ii"}, - {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "uU", "iuu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "iU", "ii"}, - {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "iU", "ii"}, - {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "uU", "iu"}, - {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", - shader_sm5_read_sync}, - {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, - {VKD3D_SM5_OP_DMAX, VKD3DSIH_DMAX, "d", "dd"}, - {VKD3D_SM5_OP_DMIN, VKD3DSIH_DMIN, "d", "dd"}, - {VKD3D_SM5_OP_DMUL, VKD3DSIH_DMUL, "d", "dd"}, - {VKD3D_SM5_OP_DEQ, VKD3DSIH_DEQ, "u", "dd"}, - {VKD3D_SM5_OP_DGE, VKD3DSIH_DGE, "u", "dd"}, - {VKD3D_SM5_OP_DLT, VKD3DSIH_DLT, "u", "dd"}, - {VKD3D_SM5_OP_DNE, VKD3DSIH_DNE, "u", "dd"}, - {VKD3D_SM5_OP_DMOV, VKD3DSIH_DMOV, "d", "d"}, - {VKD3D_SM5_OP_DMOVC, VKD3DSIH_DMOVC, "d", "udd"}, - {VKD3D_SM5_OP_DTOF, VKD3DSIH_DTOF, "f", "d"}, - {VKD3D_SM5_OP_FTOD, VKD3DSIH_FTOD, "d", "f"}, - {VKD3D_SM5_OP_EVAL_SAMPLE_INDEX, VKD3DSIH_EVAL_SAMPLE_INDEX, "f", "fi"}, - {VKD3D_SM5_OP_EVAL_CENTROID, VKD3DSIH_EVAL_CENTROID, "f", "f"}, - {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3DSIH_DCL_GS_INSTANCES, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM5_OP_DDIV, VKD3DSIH_DDIV, "d", "dd"}, - {VKD3D_SM5_OP_DFMA, VKD3DSIH_DFMA, "d", "ddd"}, - {VKD3D_SM5_OP_DRCP, VKD3DSIH_DRCP, "d", "d"}, - {VKD3D_SM5_OP_MSAD, VKD3DSIH_MSAD, "u", "uuu"}, - {VKD3D_SM5_OP_DTOI, VKD3DSIH_DTOI, "i", "d"}, - {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, - {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, - {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, - {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "fRS"}, - {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "fRSf"}, - {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fiRS"}, - {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fiRSf"}, - {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "iR"}, - {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "iRi"}, - {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3DSIH_LD_UAV_TYPED_S, "uu", "iU"}, - {VKD3D_SM5_OP_LD_RAW_S, VKD3DSIH_LD_RAW_S, "uu", "iU"}, - {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "iiR"}, - {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "fRSf"}, - {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "fRSf"}, - {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "fRSf"}, - {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "fRSff"}, - {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "fRSfff"}, - {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "fRSff"}, - {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, -}; - -static const enum vkd3d_shader_register_type register_type_table[] = -{ - /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, - /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, - /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, - /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, - /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, - /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, - /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, - /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, - /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, - /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, - /* UNKNOWN */ ~0u, - /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, - /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, - /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, - /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, - /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, - /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, - /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, - /* UNKNOWN */ ~0u, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, - /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, - /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, - /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, - /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, - /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, - /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, - /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, - /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, - /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, - /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, - /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, - /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, - /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, - /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, - /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, - /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, - /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, -}; - -static const enum vkd3d_shader_register_precision register_precision_table[] = -{ - /* VKD3D_SM4_REGISTER_PRECISION_DEFAULT */ VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_16, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_10, - /* UNKNOWN */ VKD3D_SHADER_REGISTER_PRECISION_INVALID, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, -}; - -static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) -{ - unsigned int i; - - for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) - { - if (opcode == opcode_table[i].opcode) return &opcode_table[i]; - } - - return NULL; -} - -static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) -{ - switch (sm4->p.shader_version.type) - { - case VKD3D_SHADER_TYPE_PIXEL: - if (reg->type == VKD3DSPR_OUTPUT) - { - unsigned int reg_idx = reg->idx[0].offset; - - if (reg_idx >= ARRAY_SIZE(sm4->output_map)) - { - ERR("Invalid output index %u.\n", reg_idx); - break; - } - - reg->type = VKD3DSPR_COLOROUT; - reg->idx[0].offset = sm4->output_map[reg_idx]; - } - break; - - default: - break; - } -} - -static enum vkd3d_data_type map_data_type(char t) -{ - switch (t) - { - case 'd': - return VKD3D_DATA_DOUBLE; - case 'f': - return VKD3D_DATA_FLOAT; - case 'i': - return VKD3D_DATA_INT; - case 'u': - return VKD3D_DATA_UINT; - case 'O': - return VKD3D_DATA_OPAQUE; - case 'R': - return VKD3D_DATA_RESOURCE; - case 'S': - return VKD3D_DATA_SAMPLER; - case 'U': - return VKD3D_DATA_UAV; - default: - ERR("Invalid data type '%c'.\n", t); - return VKD3D_DATA_FLOAT; - } -} - -static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) -{ - struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); - - shader_instruction_array_destroy(&parser->instructions); - free_shader_desc(&parser->shader_desc); - vkd3d_free(sm4); -} - -static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) -{ - if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) - { - struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&priv->p, 1); - - if (!(reg_idx->rel_addr = rel_addr)) - { - ERR("Failed to get src param for relative addressing.\n"); - return false; - } - - if (addressing & VKD3D_SM4_ADDRESSING_OFFSET) - reg_idx->offset = *(*ptr)++; - else - reg_idx->offset = 0; - shader_sm4_read_src_param(priv, ptr, end, VKD3D_DATA_INT, rel_addr); - } - else - { - reg_idx->rel_addr = NULL; - reg_idx->offset = *(*ptr)++; - } - - return true; -} - -static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_type) -{ - switch (register_type) - { - case VKD3D_SM4_RT_SAMPLER: - case VKD3D_SM4_RT_RESOURCE: - case VKD3D_SM4_RT_CONSTBUFFER: - case VKD3D_SM5_RT_UAV: - return true; - - default: - return false; - } -} - -static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, - enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) -{ - enum vkd3d_sm4_register_precision precision; - enum vkd3d_sm4_register_type register_type; - enum vkd3d_sm4_extended_operand_type type; - enum vkd3d_sm4_register_modifier m; - uint32_t token, order, extended; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - token = *(*ptr)++; - - register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; - if (register_type >= ARRAY_SIZE(register_type_table) - || register_type_table[register_type] == VKD3DSPR_INVALID) - { - FIXME("Unhandled register type %#x.\n", register_type); - param->type = VKD3DSPR_TEMP; - } - else - { - param->type = register_type_table[register_type]; - } - param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; - param->non_uniform = false; - param->data_type = data_type; - - *modifier = VKD3DSPSM_NONE; - if (token & VKD3D_SM4_EXTENDED_OPERAND) - { - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - extended = *(*ptr)++; - - if (extended & VKD3D_SM4_EXTENDED_OPERAND) - { - FIXME("Skipping second-order extended operand.\n"); - *ptr += *ptr < end; - } - - type = extended & VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK; - if (type == VKD3D_SM4_EXTENDED_OPERAND_MODIFIER) - { - m = (extended & VKD3D_SM4_REGISTER_MODIFIER_MASK) >> VKD3D_SM4_REGISTER_MODIFIER_SHIFT; - switch (m) - { - case VKD3D_SM4_REGISTER_MODIFIER_NEGATE: - *modifier = VKD3DSPSM_NEG; - break; - - case VKD3D_SM4_REGISTER_MODIFIER_ABS: - *modifier = VKD3DSPSM_ABS; - break; - - case VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE: - *modifier = VKD3DSPSM_ABSNEG; - break; - - default: - FIXME("Unhandled register modifier %#x.\n", m); - /* fall-through */ - case VKD3D_SM4_REGISTER_MODIFIER_NONE: - break; - } - - precision = (extended & VKD3D_SM4_REGISTER_PRECISION_MASK) >> VKD3D_SM4_REGISTER_PRECISION_SHIFT; - if (precision >= ARRAY_SIZE(register_precision_table) - || register_precision_table[precision] == VKD3D_SHADER_REGISTER_PRECISION_INVALID) - { - FIXME("Unhandled register precision %#x.\n", precision); - param->precision = VKD3D_SHADER_REGISTER_PRECISION_INVALID; - } - else - { - param->precision = register_precision_table[precision]; - } - - if (extended & VKD3D_SM4_REGISTER_NON_UNIFORM_MASK) - param->non_uniform = true; - - extended &= ~(VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK | VKD3D_SM4_REGISTER_MODIFIER_MASK - | VKD3D_SM4_REGISTER_PRECISION_MASK | VKD3D_SM4_REGISTER_NON_UNIFORM_MASK - | VKD3D_SM4_EXTENDED_OPERAND); - if (extended) - FIXME("Skipping unhandled extended operand bits 0x%08x.\n", extended); - } - else if (type) - { - FIXME("Skipping unhandled extended operand token 0x%08x (type %#x).\n", extended, type); - } - } - - order = (token & VKD3D_SM4_REGISTER_ORDER_MASK) >> VKD3D_SM4_REGISTER_ORDER_SHIFT; - - if (order < 1) - { - param->idx[0].offset = ~0u; - param->idx[0].rel_addr = NULL; - } - else - { - DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK0) >> VKD3D_SM4_ADDRESSING_SHIFT0; - if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[0]))) - { - ERR("Failed to read register index.\n"); - return false; - } - } - - if (order < 2) - { - param->idx[1].offset = ~0u; - param->idx[1].rel_addr = NULL; - } - else - { - DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK1) >> VKD3D_SM4_ADDRESSING_SHIFT1; - if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[1]))) - { - ERR("Failed to read register index.\n"); - return false; - } - } - - if (order < 3) - { - param->idx[2].offset = ~0u; - param->idx[2].rel_addr = NULL; - } - else - { - DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK2) >> VKD3D_SM4_ADDRESSING_SHIFT2; - if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[2]))) - { - ERR("Failed to read register index.\n"); - return false; - } - } - - if (order > 3) - { - WARN("Unhandled order %u.\n", order); - return false; - } - - if (register_type == VKD3D_SM4_RT_IMMCONST || register_type == VKD3D_SM4_RT_IMMCONST64) - { - enum vkd3d_sm4_dimension dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT; - unsigned int dword_count; - - switch (dimension) - { - case VKD3D_SM4_DIMENSION_SCALAR: - param->immconst_type = VKD3D_IMMCONST_SCALAR; - dword_count = 1 + (register_type == VKD3D_SM4_RT_IMMCONST64); - if (end - *ptr < dword_count) - { - WARN("Invalid ptr %p, end %p.\n", *ptr, end); - return false; - } - memcpy(param->u.immconst_uint, *ptr, dword_count * sizeof(DWORD)); - *ptr += dword_count; - break; - - case VKD3D_SM4_DIMENSION_VEC4: - param->immconst_type = VKD3D_IMMCONST_VEC4; - if (end - *ptr < VKD3D_VEC4_SIZE) - { - WARN("Invalid ptr %p, end %p.\n", *ptr, end); - return false; - } - memcpy(param->u.immconst_uint, *ptr, VKD3D_VEC4_SIZE * sizeof(DWORD)); - *ptr += 4; - break; - - default: - FIXME("Unhandled dimension %#x.\n", dimension); - break; - } - } - else if (!shader_is_sm_5_1(priv) && sm4_register_is_descriptor(register_type)) - { - /* SM5.1 places a symbol identifier in idx[0] and moves - * other values up one slot. Normalize to SM5.1. */ - param->idx[2] = param->idx[1]; - param->idx[1] = param->idx[0]; - } - - map_register(priv, param); - - return true; -} - -static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) -{ - switch (reg->type) - { - case VKD3DSPR_COVERAGE: - case VKD3DSPR_DEPTHOUT: - case VKD3DSPR_DEPTHOUTGE: - case VKD3DSPR_DEPTHOUTLE: - case VKD3DSPR_GSINSTID: - case VKD3DSPR_LOCALTHREADINDEX: - case VKD3DSPR_OUTPOINTID: - case VKD3DSPR_PRIMID: - case VKD3DSPR_SAMPLEMASK: - case VKD3DSPR_OUTSTENCILREF: - return true; - default: - return false; - } -} - -static uint32_t swizzle_from_sm4(uint32_t s) -{ - return vkd3d_shader_create_swizzle(s & 0x3, (s >> 2) & 0x3, (s >> 4) & 0x3, (s >> 6) & 0x3); -} - -static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) +void dxbc_writer_init(struct dxbc_writer *dxbc) { - DWORD token; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - token = **ptr; - - if (!shader_sm4_read_param(priv, ptr, end, data_type, &src_param->reg, &src_param->modifiers)) - { - ERR("Failed to read parameter.\n"); - return false; - } - - if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) - { - src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; - } - else - { - enum vkd3d_sm4_swizzle_type swizzle_type = - (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; - - switch (swizzle_type) - { - case VKD3D_SM4_SWIZZLE_NONE: - if (shader_sm4_is_scalar_register(&src_param->reg)) - src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - else - src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; - break; - - case VKD3D_SM4_SWIZZLE_SCALAR: - src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; - src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; - break; - - case VKD3D_SM4_SWIZZLE_VEC4: - src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); - break; - - default: - FIXME("Unhandled swizzle type %#x.\n", swizzle_type); - break; - } - } - - return true; + memset(dxbc, 0, sizeof(*dxbc)); }
-static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) +void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void *data, size_t size) { - enum vkd3d_shader_src_modifier modifier; - DWORD token; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - token = **ptr; - - if (!shader_sm4_read_param(priv, ptr, end, data_type, &dst_param->reg, &modifier)) - { - ERR("Failed to read parameter.\n"); - return false; - } - - if (modifier != VKD3DSPSM_NONE) - { - ERR("Invalid source modifier %#x on destination register.\n", modifier); - return false; - } + struct vkd3d_shader_dxbc_section_desc *section;
- dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; - if (data_type == VKD3D_DATA_DOUBLE) - dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); - /* Scalar registers are declared with no write mask in shader bytecode. */ - if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) - dst_param->write_mask = VKD3DSP_WRITEMASK_0; - dst_param->modifiers = 0; - dst_param->shift = 0; + assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections));
- return true; + section = &dxbc->sections[dxbc->section_count++]; + section->tag = tag; + section->data.code = data; + section->data.size = size; }
-static void shader_sm4_read_instruction_modifier(DWORD modifier, struct vkd3d_shader_instruction *ins) +int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, + struct vkd3d_shader_code *dxbc, char **messages) { - enum vkd3d_sm4_instruction_modifier modifier_type = modifier & VKD3D_SM4_MODIFIER_MASK; - - switch (modifier_type) - { - case VKD3D_SM4_MODIFIER_AOFFIMMI: - { - static const DWORD recognized_bits = VKD3D_SM4_INSTRUCTION_MODIFIER - | VKD3D_SM4_MODIFIER_MASK - | VKD3D_SM4_AOFFIMMI_U_MASK - | VKD3D_SM4_AOFFIMMI_V_MASK - | VKD3D_SM4_AOFFIMMI_W_MASK; - - /* Bit fields are used for sign extension. */ - struct - { - int u : 4; - int v : 4; - int w : 4; - } aoffimmi; - - if (modifier & ~recognized_bits) - FIXME("Unhandled instruction modifier %#x.\n", modifier); - - aoffimmi.u = (modifier & VKD3D_SM4_AOFFIMMI_U_MASK) >> VKD3D_SM4_AOFFIMMI_U_SHIFT; - aoffimmi.v = (modifier & VKD3D_SM4_AOFFIMMI_V_MASK) >> VKD3D_SM4_AOFFIMMI_V_SHIFT; - aoffimmi.w = (modifier & VKD3D_SM4_AOFFIMMI_W_MASK) >> VKD3D_SM4_AOFFIMMI_W_SHIFT; - ins->texel_offset.u = aoffimmi.u; - ins->texel_offset.v = aoffimmi.v; - ins->texel_offset.w = aoffimmi.w; - break; - } - - case VKD3D_SM5_MODIFIER_DATA_TYPE: - { - DWORD components = (modifier & VKD3D_SM5_MODIFIER_DATA_TYPE_MASK) >> VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT; - unsigned int i; - - for (i = 0; i < VKD3D_VEC4_SIZE; i++) - { - enum vkd3d_sm4_data_type data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); - - if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) - { - FIXME("Unhandled data type %#x.\n", data_type); - ins->resource_data_type[i] = VKD3D_DATA_FLOAT; - } - else - { - ins->resource_data_type[i] = data_type_table[data_type]; - } - } - break; - } - - case VKD3D_SM5_MODIFIER_RESOURCE_TYPE: - { - enum vkd3d_sm4_resource_type resource_type - = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT; - - if (resource_type == VKD3D_SM4_RESOURCE_RAW_BUFFER) - ins->raw = true; - else if (resource_type == VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER) - ins->structured = true; - - if (resource_type < ARRAY_SIZE(resource_type_table)) - ins->resource_type = resource_type_table[resource_type]; - else - { - FIXME("Unhandled resource type %#x.\n", resource_type); - ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; - } - - ins->resource_stride - = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT; - break; - } + size_t size_position, offsets_position, checksum_position, i; + struct vkd3d_bytecode_buffer buffer = {0}; + uint32_t checksum[4];
- default: - FIXME("Unhandled instruction modifier %#x.\n", modifier); - } -} + TRACE("section_count %zu, sections %p, dxbc %p, messages %p.\n", section_count, sections, dxbc, messages);
-static void shader_sm4_read_instruction(struct vkd3d_shader_parser *parser, struct vkd3d_shader_instruction *ins) -{ - struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); - const struct vkd3d_sm4_opcode_info *opcode_info; - uint32_t opcode_token, opcode, previous_token; - struct vkd3d_shader_dst_param *dst_params; - struct vkd3d_shader_src_param *src_params; - const uint32_t **ptr = &parser->ptr; - unsigned int i, len; - size_t remaining; - const uint32_t *p; - DWORD precise; + if (messages) + *messages = NULL;
- if (*ptr >= sm4->end) - { - WARN("End of byte-code, failed to read opcode.\n"); - goto fail; - } - remaining = sm4->end - *ptr; + put_u32(&buffer, TAG_DXBC);
- ++parser->location.line; + checksum_position = bytecode_get_size(&buffer); + for (i = 0; i < 4; ++i) + put_u32(&buffer, 0);
- opcode_token = *(*ptr)++; - opcode = opcode_token & VKD3D_SM4_OPCODE_MASK; + put_u32(&buffer, 1); /* version */ + size_position = put_u32(&buffer, 0); + put_u32(&buffer, section_count);
- len = ((opcode_token & VKD3D_SM4_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); - if (!len) - { - if (remaining < 2) - { - WARN("End of byte-code, failed to read length token.\n"); - goto fail; - } - len = **ptr; - } - if (!len || remaining < len) - { - WARN("Read invalid length %u (remaining %zu).\n", len, remaining); - goto fail; - } - --len; + offsets_position = bytecode_get_size(&buffer); + for (i = 0; i < section_count; ++i) + put_u32(&buffer, 0);
- if (!(opcode_info = get_opcode_info(opcode))) + for (i = 0; i < section_count; ++i) { - FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); - ins->handler_idx = VKD3DSIH_INVALID; - *ptr += len; - return; - } - - ins->handler_idx = opcode_info->handler_idx; - ins->flags = 0; - ins->coissue = false; - ins->raw = false; - ins->structured = false; - ins->predicate = NULL; - ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); - ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); - ins->src = src_params = shader_parser_get_src_params(parser, ins->src_count); - if (!src_params && ins->src_count) - { - ERR("Failed to allocate src parameters.\n"); - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; - return; + set_u32(&buffer, offsets_position + i * sizeof(uint32_t), bytecode_align(&buffer)); + put_u32(&buffer, sections[i].tag); + put_u32(&buffer, sections[i].data.size); + bytecode_put_bytes(&buffer, sections[i].data.code, sections[i].data.size); } - ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; - ins->resource_stride = 0; - ins->resource_data_type[0] = VKD3D_DATA_FLOAT; - ins->resource_data_type[1] = VKD3D_DATA_FLOAT; - ins->resource_data_type[2] = VKD3D_DATA_FLOAT; - ins->resource_data_type[3] = VKD3D_DATA_FLOAT; - memset(&ins->texel_offset, 0, sizeof(ins->texel_offset)); + set_u32(&buffer, size_position, bytecode_get_size(&buffer));
- p = *ptr; - *ptr += len; + vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); + for (i = 0; i < 4; ++i) + set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]);
- if (opcode_info->read_opcode_func) - { - ins->dst = NULL; - ins->dst_count = 0; - opcode_info->read_opcode_func(ins, opcode, opcode_token, p, len, sm4); - } - else + if (!buffer.status) { - enum vkd3d_shader_dst_modifier instruction_dst_modifier = VKD3DSPDM_NONE; - - previous_token = opcode_token; - while (previous_token & VKD3D_SM4_INSTRUCTION_MODIFIER && p != *ptr) - shader_sm4_read_instruction_modifier(previous_token = *p++, ins); - - ins->flags = (opcode_token & VKD3D_SM4_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - if (ins->flags & VKD3D_SM4_INSTRUCTION_FLAG_SATURATE) - { - ins->flags &= ~VKD3D_SM4_INSTRUCTION_FLAG_SATURATE; - instruction_dst_modifier = VKD3DSPDM_SATURATE; - } - precise = (opcode_token & VKD3D_SM5_PRECISE_MASK) >> VKD3D_SM5_PRECISE_SHIFT; - ins->flags |= precise << VKD3DSI_PRECISE_SHIFT; - - ins->dst = dst_params = shader_parser_get_dst_params(parser, ins->dst_count); - if (!dst_params && ins->dst_count) - { - ERR("Failed to allocate dst parameters.\n"); - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - for (i = 0; i < ins->dst_count; ++i) - { - if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), - &dst_params[i]))) - { - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - dst_params[i].modifiers |= instruction_dst_modifier; - } - - for (i = 0; i < ins->src_count; ++i) - { - if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), - &src_params[i]))) - { - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - } + dxbc->code = buffer.data; + dxbc->size = buffer.size; } - - return; - -fail: - *ptr = sm4->end; - ins->handler_idx = VKD3DSIH_INVALID; - return; -} - -static bool shader_sm4_is_end(struct vkd3d_shader_parser *parser) -{ - struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); - - return parser->ptr == sm4->end; + return buffer.status; }
-static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = -{ - .parser_destroy = shader_sm4_destroy, -}; - -static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, - size_t byte_code_size, const char *source_name, const struct vkd3d_shader_signature *output_signature, - struct vkd3d_shader_message_context *message_context) +int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *out) { - struct vkd3d_shader_version version; - uint32_t version_token, token_count; - unsigned int i; - - if (byte_code_size / sizeof(*byte_code) < 2) - { - WARN("Invalid byte code size %lu.\n", (long)byte_code_size); - return false; - } - - version_token = byte_code[0]; - TRACE("Version: 0x%08x.\n", version_token); - token_count = byte_code[1]; - TRACE("Token count: %u.\n", token_count); - - if (token_count < 2 || byte_code_size / sizeof(*byte_code) < token_count) - { - WARN("Invalid token count %u.\n", token_count); - return false; - } - - sm4->start = &byte_code[2]; - sm4->end = &byte_code[token_count]; - - switch (version_token >> 16) - { - case VKD3D_SM4_PS: - version.type = VKD3D_SHADER_TYPE_PIXEL; - break; - - case VKD3D_SM4_VS: - version.type = VKD3D_SHADER_TYPE_VERTEX; - break; - - case VKD3D_SM4_GS: - version.type = VKD3D_SHADER_TYPE_GEOMETRY; - break; - - case VKD3D_SM5_HS: - version.type = VKD3D_SHADER_TYPE_HULL; - break; - - case VKD3D_SM5_DS: - version.type = VKD3D_SHADER_TYPE_DOMAIN; - break; - - case VKD3D_SM5_CS: - version.type = VKD3D_SHADER_TYPE_COMPUTE; - break; - - default: - FIXME("Unrecognised shader type %#x.\n", version_token >> 16); - } - version.major = VKD3D_SM4_VERSION_MAJOR(version_token); - version.minor = VKD3D_SM4_VERSION_MINOR(version_token); - - /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops, - token_count / 7u + 20)) - return false; - sm4->p.ptr = sm4->start; - - memset(sm4->output_map, 0xff, sizeof(sm4->output_map)); - for (i = 0; i < output_signature->element_count; ++i) - { - struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; - - if (version.type == VKD3D_SHADER_TYPE_PIXEL - && ascii_strcasecmp(e->semantic_name, "SV_Target")) - continue; - if (e->register_index >= ARRAY_SIZE(sm4->output_map)) - { - WARN("Invalid output index %u.\n", e->register_index); - continue; - } - - sm4->output_map[e->register_index] = e->semantic_index; - } - - return true; + return vkd3d_shader_serialize_dxbc(dxbc->section_count, dxbc->sections, out, NULL); }
static bool require_space(size_t offset, size_t count, size_t size, size_t data_size) @@ -1928,12 +322,12 @@ int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxbc, }
static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *section, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *s) + struct vkd3d_shader_message_context *message_context, struct shader_signature *s) { bool has_stream_index, has_min_precision; - struct vkd3d_shader_signature_element *e; const char *data = section->data.code; uint32_t count, header_size; + struct signature_element *e; const char *ptr = data; unsigned int i;
@@ -1979,6 +373,8 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s { uint32_t name_offset, mask;
+ e[i].sort_index = i; + if (has_stream_index) read_dword(&ptr, &e[i].stream_index); else @@ -1995,6 +391,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s read_dword(&ptr, &e[i].sysval_semantic); read_dword(&ptr, &e[i].component_type); read_dword(&ptr, &e[i].register_index); + e[i].register_count = 1; read_dword(&ptr, &mask); e[i].mask = mask & 0xff; e[i].used_mask = (mask >> 8) & 0xff; @@ -2029,7 +426,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s static int isgn_handler(const struct vkd3d_shader_dxbc_section_desc *section, struct vkd3d_shader_message_context *message_context, void *ctx) { - struct vkd3d_shader_signature *is = ctx; + struct shader_signature *is = ctx;
if (section->tag != TAG_ISGN) return VKD3D_OK; @@ -2037,13 +434,13 @@ static int isgn_handler(const struct vkd3d_shader_dxbc_section_desc *section, if (is->elements) { FIXME("Multiple input signatures.\n"); - vkd3d_shader_free_shader_signature(is); + shader_signature_cleanup(is); } return shader_parse_signature(section, message_context, is); }
int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *signature) + struct vkd3d_shader_message_context *message_context, struct shader_signature *signature) { int ret;
@@ -2122,12 +519,12 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section,
void free_shader_desc(struct vkd3d_shader_desc *desc) { - vkd3d_shader_free_shader_signature(&desc->input_signature); - vkd3d_shader_free_shader_signature(&desc->output_signature); - vkd3d_shader_free_shader_signature(&desc->patch_constant_signature); + shader_signature_cleanup(&desc->input_signature); + shader_signature_cleanup(&desc->output_signature); + shader_signature_cleanup(&desc->patch_constant_signature); }
-static int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, +int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc) { int ret; @@ -2151,66 +548,6 @@ static int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, return ret; }
-int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) -{ - struct vkd3d_shader_instruction_array *instructions; - struct vkd3d_shader_desc *shader_desc; - struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_sm4_parser *sm4; - int ret; - - if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) - { - ERR("Failed to allocate parser.\n"); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - - shader_desc = &sm4->p.shader_desc; - if ((ret = shader_extract_from_dxbc(&compile_info->source, - message_context, compile_info->source_name, shader_desc)) < 0) - { - WARN("Failed to extract shader, vkd3d result %d.\n", ret); - vkd3d_free(sm4); - return ret; - } - - if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, - compile_info->source_name, &shader_desc->output_signature, message_context)) - { - WARN("Failed to initialise shader parser.\n"); - free_shader_desc(shader_desc); - vkd3d_free(sm4); - return VKD3D_ERROR_INVALID_ARGUMENT; - } - - instructions = &sm4->p.instructions; - while (!shader_sm4_is_end(&sm4->p)) - { - if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) - { - ERR("Failed to allocate instructions.\n"); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - shader_sm4_destroy(&sm4->p); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - ins = &instructions->elements[instructions->count]; - shader_sm4_read_instruction(&sm4->p, ins); - - if (ins->handler_idx == VKD3DSIH_INVALID) - { - WARN("Encountered unrecognized or invalid instruction.\n"); - shader_sm4_destroy(&sm4->p); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - ++instructions->count; - } - - *parser = &sm4->p; - - return VKD3D_OK; -} - /* root signatures */ #define VKD3D_ROOT_SIGNATURE_1_0_ROOT_DESCRIPTOR_FLAGS VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE
@@ -2862,7 +1199,7 @@ static int shader_write_root_parameters(struct root_signature_writer_context *co size_t parameters_position; unsigned int i;
- parameters_position = bytecode_get_size(buffer); + parameters_position = bytecode_align(buffer); for (i = 0; i < parameter_count; ++i) { put_u32(buffer, versioned_root_signature_get_parameter_type(desc, i)); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 64d6e87065b..ba5bcfbfaf0 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -112,8 +112,12 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name)
void hlsl_free_var(struct hlsl_ir_var *decl) { + unsigned int k; + vkd3d_free((void *)decl->name); hlsl_cleanup_semantic(&decl->semantic); + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + vkd3d_free((void *)decl->objects_usage[k]); vkd3d_free(decl); }
@@ -126,7 +130,7 @@ bool hlsl_type_is_row_major(const struct hlsl_type *type)
unsigned int hlsl_type_minor_size(const struct hlsl_type *type) { - if (type->type != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) + if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) return type->dimx; else return type->dimy; @@ -134,7 +138,7 @@ unsigned int hlsl_type_minor_size(const struct hlsl_type *type)
unsigned int hlsl_type_major_size(const struct hlsl_type *type) { - if (type->type != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) + if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) return type->dimy; else return type->dimx; @@ -142,7 +146,7 @@ unsigned int hlsl_type_major_size(const struct hlsl_type *type)
unsigned int hlsl_type_element_count(const struct hlsl_type *type) { - switch (type->type) + switch (type->class) { case HLSL_CLASS_VECTOR: return type->dimx; @@ -157,16 +161,26 @@ unsigned int hlsl_type_element_count(const struct hlsl_type *type) } }
-static unsigned int get_array_size(const struct hlsl_type *type) +const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_ARRAY) + return hlsl_get_multiarray_element_type(type->e.array.type); + return type; +} + +unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type) { - if (type->type == HLSL_CLASS_ARRAY) - return get_array_size(type->e.array.type) * type->e.array.elements_count; + if (type->class == HLSL_CLASS_ARRAY) + return hlsl_get_multiarray_size(type->e.array.type) * type->e.array.elements_count; return 1; }
bool hlsl_type_is_resource(const struct hlsl_type *type) { - if (type->type == HLSL_CLASS_OBJECT) + if (type->class == HLSL_CLASS_ARRAY) + return hlsl_type_is_resource(type->e.array.type); + + if (type->class == HLSL_CLASS_OBJECT) { switch (type->base_type) { @@ -183,10 +197,13 @@ bool hlsl_type_is_resource(const struct hlsl_type *type)
enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) { - if (type->type <= HLSL_CLASS_LAST_NUMERIC) + if (type->class <= HLSL_CLASS_LAST_NUMERIC) return HLSL_REGSET_NUMERIC;
- if (type->type == HLSL_CLASS_OBJECT) + if (type->class == HLSL_CLASS_ARRAY) + return hlsl_type_get_regset(type->e.array.type); + + if (type->class == HLSL_CLASS_OBJECT) { switch (type->base_type) { @@ -203,8 +220,6 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) vkd3d_unreachable(); } } - else if (type->type == HLSL_CLASS_ARRAY) - return hlsl_type_get_regset(type->e.array.type);
vkd3d_unreachable(); } @@ -216,7 +231,8 @@ unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int * (b) the type would cross a vec4 boundary; i.e. a vec3 and a * vec1 can be packed together, but not a vec3 and a vec2. */ - if (type->type > HLSL_CLASS_LAST_NUMERIC || (offset & 3) + type->reg_size[HLSL_REGSET_NUMERIC] > 4) + if (type->class == HLSL_CLASS_STRUCT || type->class == HLSL_CLASS_ARRAY + || (offset & 3) + type->reg_size[HLSL_REGSET_NUMERIC] > 4) return align(offset, 4); return offset; } @@ -229,7 +245,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type for (k = 0; k <= HLSL_REGSET_LAST; ++k) type->reg_size[k] = 0;
- switch (type->type) + switch (type->class) { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: @@ -278,7 +294,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type type->reg_size[k] += field->type->reg_size[k]; }
- type->dimx += field->type->dimx * field->type->dimy * get_array_size(field->type); + type->dimx += field->type->dimx * field->type->dimy * hlsl_get_multiarray_size(field->type); } break; } @@ -317,7 +333,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e vkd3d_free(type); return NULL; } - type->type = type_class; + type->class = type_class; type->base_type = base_type; type->dimx = dimx; type->dimy = dimy; @@ -330,7 +346,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e
static bool type_is_single_component(const struct hlsl_type *type) { - return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_OBJECT; + return type->class == HLSL_CLASS_SCALAR || type->class == HLSL_CLASS_OBJECT; }
/* Given a type and a component index, this function moves one step through the path required to @@ -349,7 +365,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, assert(!type_is_single_component(type)); assert(index < hlsl_type_component_count(type));
- switch (type->type) + switch (type->class) { case HLSL_CLASS_VECTOR: assert(index < type->dimx); @@ -427,7 +443,7 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl return true; }
- if (!(deref->path = hlsl_alloc(ctx, sizeof(*deref->path) * deref->path_len))) + if (!(deref->path = hlsl_calloc(ctx, deref->path_len, sizeof(*deref->path)))) { deref->var = NULL; deref->path_len = 0; @@ -437,6 +453,71 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl return true; }
+bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *chain) +{ + struct hlsl_ir_index *index; + struct hlsl_ir_load *load; + unsigned int chain_len, i; + struct hlsl_ir_node *ptr; + + deref->path = NULL; + deref->path_len = 0; + deref->offset.node = NULL; + + assert(chain); + if (chain->type == HLSL_IR_INDEX) + assert(!hlsl_index_is_noncontiguous(hlsl_ir_index(chain))); + + /* Find the length of the index chain */ + chain_len = 0; + ptr = chain; + while (ptr->type == HLSL_IR_INDEX) + { + index = hlsl_ir_index(ptr); + + chain_len++; + ptr = index->val.node; + } + + if (ptr->type != HLSL_IR_LOAD) + { + hlsl_error(ctx, &chain->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE, "Invalid l-value."); + return false; + } + load = hlsl_ir_load(ptr); + + if (!init_deref(ctx, deref, load->src.var, load->src.path_len + chain_len)) + return false; + + for (i = 0; i < load->src.path_len; ++i) + hlsl_src_from_node(&deref->path[i], load->src.path[i].node); + + chain_len = 0; + ptr = chain; + while (ptr->type == HLSL_IR_INDEX) + { + unsigned int p = deref->path_len - 1 - chain_len; + + index = hlsl_ir_index(ptr); + if (hlsl_index_is_noncontiguous(index)) + { + hlsl_src_from_node(&deref->path[p], deref->path[p + 1].node); + hlsl_src_remove(&deref->path[p + 1]); + hlsl_src_from_node(&deref->path[p + 1], index->idx.node); + } + else + { + hlsl_src_from_node(&deref->path[p], index->idx.node); + } + + chain_len++; + ptr = index->val.node; + } + assert(deref->path_len == load->src.path_len + chain_len); + + return true; +} + struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) { struct hlsl_type *type; @@ -459,9 +540,9 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl { unsigned int path_len, path_index, deref_path_len, i; struct hlsl_type *path_type; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *c;
- list_init(&block->instrs); + hlsl_block_init(block);
path_len = 0; path_type = hlsl_deref_get_type(ctx, prefix); @@ -487,12 +568,12 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl
if (!(c = hlsl_new_uint_constant(ctx, next_index, loc))) { - hlsl_free_instr_list(&block->instrs); + hlsl_block_cleanup(block); return false; } - list_add_tail(&block->instrs, &c->node.entry); + hlsl_block_add_instr(block, c);
- hlsl_src_from_node(&deref->path[deref_path_len++], &c->node); + hlsl_src_from_node(&deref->path[deref_path_len++], c); }
assert(deref_path_len == deref->path_len); @@ -505,7 +586,7 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co { assert(idx);
- switch (type->type) + switch (type->class) { case HLSL_CLASS_VECTOR: return hlsl_get_scalar_type(ctx, type->base_type); @@ -523,8 +604,8 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co { struct hlsl_ir_constant *c = hlsl_ir_constant(idx);
- assert(c->value[0].u < type->e.record.field_count); - return type->e.record.fields[c->value[0].u].type; + assert(c->value.u[0].u < type->e.record.field_count); + return type->e.record.fields[c->value.u[0].u].type; }
default: @@ -539,7 +620,7 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL;
- type->type = HLSL_CLASS_ARRAY; + type->class = HLSL_CLASS_ARRAY; type->modifiers = basic_type->modifiers; type->e.array.elements_count = array_size; type->e.array.type = basic_type; @@ -559,7 +640,7 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name,
if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; - type->type = HLSL_CLASS_STRUCT; + type->class = HLSL_CLASS_STRUCT; type->base_type = HLSL_TYPE_VOID; type->name = name; type->dimy = 1; @@ -579,7 +660,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_
if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; - type->type = HLSL_CLASS_OBJECT; + type->class = HLSL_CLASS_OBJECT; type->base_type = HLSL_TYPE_TEXTURE; type->dimx = 4; type->dimy = 1; @@ -597,7 +678,7 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim
if (!(type = vkd3d_calloc(1, sizeof(*type)))) return NULL; - type->type = HLSL_CLASS_OBJECT; + type->class = HLSL_CLASS_OBJECT; type->base_type = HLSL_TYPE_UAV; type->dimx = format->dimx; type->dimy = 1; @@ -614,6 +695,8 @@ static const char * get_case_insensitive_typename(const char *name) { "dword", "float", + "matrix", + "vector", }; unsigned int i;
@@ -677,7 +760,7 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha
unsigned int hlsl_type_component_count(const struct hlsl_type *type) { - switch (type->type) + switch (type->class) { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: @@ -709,7 +792,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 if (t1 == t2) return true;
- if (t1->type != t2->type) + if (t1->class != t2->class) return false; if (t1->base_type != t2->base_type) return false; @@ -729,7 +812,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 return false; if (t1->dimy != t2->dimy) return false; - if (t1->type == HLSL_CLASS_STRUCT) + if (t1->class == HLSL_CLASS_STRUCT) { size_t i;
@@ -748,7 +831,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 return false; } } - if (t1->type == HLSL_CLASS_ARRAY) + if (t1->class == HLSL_CLASS_ARRAY) return t1->e.array.elements_count == t2->e.array.elements_count && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type);
@@ -772,7 +855,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, return NULL; } } - type->type = old->type; + type->class = old->class; type->base_type = old->base_type; type->dimx = old->dimx; type->dimy = old->dimy; @@ -781,7 +864,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, type->modifiers |= default_majority; type->sampler_dim = old->sampler_dim; type->is_minimum_precision = old->is_minimum_precision; - switch (old->type) + switch (old->class) { case HLSL_CLASS_ARRAY: if (!(type->e.array.type = hlsl_type_clone(ctx, old->e.array.type, default_majority, modifiers))) @@ -799,7 +882,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old,
type->e.record.field_count = field_count;
- if (!(type->e.record.fields = hlsl_alloc(ctx, field_count * sizeof(*type->e.record.fields)))) + if (!(type->e.record.fields = hlsl_calloc(ctx, field_count, sizeof(*type->e.record.fields)))) { vkd3d_free((void *)type->name); vkd3d_free(type); @@ -848,40 +931,58 @@ bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type) return true; }
-struct hlsl_ir_expr *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, +struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *cast;
- cast = hlsl_new_unary_expr(ctx, HLSL_OP1_CAST, node, *loc); + cast = hlsl_new_unary_expr(ctx, HLSL_OP1_CAST, node, loc); if (cast) cast->data_type = type; - return hlsl_ir_expr(cast); + return cast; }
-struct hlsl_ir_expr *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) +struct hlsl_ir_node *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) { /* Use a cast to the same type as a makeshift identity expression. */ return hlsl_new_cast(ctx, node, node->data_type, &node->loc); }
struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, - const struct vkd3d_shader_location loc, const struct hlsl_semantic *semantic, unsigned int modifiers, + const struct vkd3d_shader_location *loc, const struct hlsl_semantic *semantic, unsigned int modifiers, const struct hlsl_reg_reservation *reg_reservation) { struct hlsl_ir_var *var; + unsigned int k;
if (!(var = hlsl_alloc(ctx, sizeof(*var)))) return NULL;
var->name = name; var->data_type = type; - var->loc = loc; + var->loc = *loc; if (semantic) var->semantic = *semantic; var->storage_modifiers = modifiers; if (reg_reservation) var->reg_reservation = *reg_reservation; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { + unsigned int i, obj_count = type->reg_size[k]; + + if (obj_count == 0) + continue; + + if (!(var->objects_usage[k] = hlsl_calloc(ctx, obj_count, sizeof(*var->objects_usage[0])))) + { + for (i = 0; i < k; ++i) + vkd3d_free(var->objects_usage[i]); + vkd3d_free(var); + return NULL; + } + } + return var; }
@@ -901,7 +1002,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem hlsl_release_string_buffer(ctx, string); return NULL; } - var = hlsl_new_var(ctx, name, type, *loc, NULL, 0, NULL); + var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL); hlsl_release_string_buffer(ctx, string); if (var) list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); @@ -910,7 +1011,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem
static bool type_is_single_reg(const struct hlsl_type *type) { - return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_VECTOR; + return type->class == HLSL_CLASS_SCALAR || type->class == HLSL_CLASS_VECTOR; }
bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other) @@ -964,7 +1065,7 @@ static void init_node(struct hlsl_ir_node *node, enum hlsl_ir_node_type type, list_init(&node->uses); }
-struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs) +struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs) { struct hlsl_deref lhs_deref;
@@ -972,7 +1073,7 @@ struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir return hlsl_new_store_index(ctx, &lhs_deref, NULL, rhs, 0, &rhs->loc); }
-struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, +struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, unsigned int writemask, const struct vkd3d_shader_location *loc) { struct hlsl_ir_store *store; @@ -1001,35 +1102,35 @@ struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hl writemask = (1 << rhs->data_type->dimx) - 1; store->writemask = writemask;
- return store; + return &store->node; }
-struct hlsl_ir_store *hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, +bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs) { struct hlsl_block comp_path_block; struct hlsl_ir_store *store;
- list_init(&block->instrs); + hlsl_block_init(block);
if (!(store = hlsl_alloc(ctx, sizeof(*store)))) - return NULL; + return false; init_node(&store->node, HLSL_IR_STORE, NULL, &rhs->loc);
if (!init_deref_from_component_index(ctx, &comp_path_block, &store->lhs, lhs, comp, &rhs->loc)) { vkd3d_free(store); - return NULL; + return false; } - list_move_tail(&block->instrs, &comp_path_block.instrs); + hlsl_block_add_block(block, &comp_path_block); hlsl_src_from_node(&store->rhs, rhs);
if (type_is_single_reg(rhs->data_type)) store->writemask = (1 << rhs->data_type->dimx) - 1;
- list_add_tail(&block->instrs, &store->node.entry); + hlsl_block_add_instr(block, &store->node);
- return store; + return true; }
struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, @@ -1045,66 +1146,54 @@ struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function return &call->node; }
-struct hlsl_ir_constant *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, - const struct vkd3d_shader_location *loc) +struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, + const struct hlsl_constant_value *value, const struct vkd3d_shader_location *loc) { struct hlsl_ir_constant *c;
- assert(type->type <= HLSL_CLASS_VECTOR); + assert(type->class <= HLSL_CLASS_VECTOR);
if (!(c = hlsl_alloc(ctx, sizeof(*c)))) return NULL;
init_node(&c->node, HLSL_IR_CONSTANT, type, loc); + c->value = *value;
- return c; + return &c->node; }
-struct hlsl_ir_constant *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc) +struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; - - if ((c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), loc))) - c->value[0].u = b ? ~0u : 0; + struct hlsl_constant_value value;
- return c; + value.u[0].u = b ? ~0u : 0; + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &value, loc); }
-struct hlsl_ir_constant *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, +struct hlsl_ir_node *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; + struct hlsl_constant_value value;
- if ((c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) - c->value[0].f = f; - - return c; + value.u[0].f = f; + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), &value, loc); }
-struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n, - const struct vkd3d_shader_location *loc) +struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; - - c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc); - - if (c) - c->value[0].i = n; + struct hlsl_constant_value value;
- return c; + value.u[0].i = n; + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &value, loc); }
-struct hlsl_ir_constant *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, +struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; - - c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); + struct hlsl_constant_value value;
- if (c) - c->value[0].u = n; - - return c; + value.u[0].u = n; + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &value, loc); }
struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, @@ -1124,11 +1213,11 @@ struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op }
struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, - struct hlsl_ir_node *arg, struct vkd3d_shader_location loc) + struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg};
- return hlsl_new_expr(ctx, op, operands, arg->data_type, &loc); + return hlsl_new_expr(ctx, op, operands, arg->data_type, loc); }
struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, @@ -1140,17 +1229,21 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); }
-struct hlsl_ir_if *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct vkd3d_shader_location loc) +struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, + struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) { struct hlsl_ir_if *iff;
if (!(iff = hlsl_alloc(ctx, sizeof(*iff)))) return NULL; - init_node(&iff->node, HLSL_IR_IF, NULL, &loc); + init_node(&iff->node, HLSL_IR_IF, NULL, loc); hlsl_src_from_node(&iff->condition, condition); - list_init(&iff->then_instrs.instrs); - list_init(&iff->else_instrs.instrs); - return iff; + hlsl_block_init(&iff->then_block); + hlsl_block_add_block(&iff->then_block, then_block); + hlsl_block_init(&iff->else_block); + if (else_block) + hlsl_block_add_block(&iff->else_block, else_block); + return &iff->node; }
struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, @@ -1183,23 +1276,36 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl return load; }
+struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct vkd3d_shader_location *loc) +{ + /* This deref can only exists temporarily because it is not the real owner of its members. */ + struct hlsl_deref tmp_deref; + + assert(deref->path_len >= 1); + + tmp_deref = *deref; + tmp_deref.path_len = deref->path_len - 1; + return hlsl_new_load_index(ctx, &tmp_deref, NULL, loc); +} + struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, - struct vkd3d_shader_location loc) + const struct vkd3d_shader_location *loc) { struct hlsl_deref var_deref;
hlsl_init_simple_deref_from_var(&var_deref, var); - return hlsl_new_load_index(ctx, &var_deref, NULL, &loc); + return hlsl_new_load_index(ctx, &var_deref, NULL, loc); }
-struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, +struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc) { struct hlsl_type *type, *comp_type; struct hlsl_block comp_path_block; struct hlsl_ir_load *load;
- list_init(&block->instrs); + hlsl_block_init(block);
if (!(load = hlsl_alloc(ctx, sizeof(*load)))) return NULL; @@ -1213,14 +1319,14 @@ struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_b vkd3d_free(load); return NULL; } - list_move_tail(&block->instrs, &comp_path_block.instrs); + hlsl_block_add_block(block, &comp_path_block);
- list_add_tail(&block->instrs, &load->node.entry); + hlsl_block_add_instr(block, &load->node);
- return load; + return &load->node; }
-struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, +struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc) { struct hlsl_ir_resource_load *load; @@ -1229,24 +1335,37 @@ struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, return NULL; init_node(&load->node, HLSL_IR_RESOURCE_LOAD, params->format, loc); load->load_type = params->type; - if (!hlsl_copy_deref(ctx, &load->resource, ¶ms->resource)) + + if (!hlsl_init_deref_from_index_chain(ctx, &load->resource, params->resource)) { vkd3d_free(load); return NULL; } - if (!hlsl_copy_deref(ctx, &load->sampler, ¶ms->sampler)) + + if (params->sampler) { - hlsl_cleanup_deref(&load->resource); - vkd3d_free(load); - return NULL; + if (!hlsl_init_deref_from_index_chain(ctx, &load->sampler, params->sampler)) + { + hlsl_cleanup_deref(&load->resource); + vkd3d_free(load); + return NULL; + } } + hlsl_src_from_node(&load->coords, params->coords); + hlsl_src_from_node(&load->sample_index, params->sample_index); hlsl_src_from_node(&load->texel_offset, params->texel_offset); hlsl_src_from_node(&load->lod, params->lod); - return load; + hlsl_src_from_node(&load->ddx, params->ddx); + hlsl_src_from_node(&load->ddy, params->ddy); + hlsl_src_from_node(&load->cmp, params->cmp); + load->sampling_dim = params->sampling_dim; + if (load->sampling_dim == HLSL_SAMPLER_DIM_GENERIC) + load->sampling_dim = hlsl_deref_get_type(ctx, &load->resource)->sampler_dim; + return &load->node; }
-struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, +struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc) { struct hlsl_ir_resource_store *store; @@ -1257,10 +1376,10 @@ struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, con hlsl_copy_deref(ctx, &store->resource, resource); hlsl_src_from_node(&store->coords, coords); hlsl_src_from_node(&store->value, value); - return store; + return &store->node; }
-struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, +struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) { struct hlsl_ir_swizzle *swizzle; @@ -1275,29 +1394,66 @@ struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); hlsl_src_from_node(&swizzle->val, val); swizzle->swizzle = s; - return swizzle; + return &swizzle->node; +} + +bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) +{ + struct hlsl_type *type = index->val.node->data_type; + + return type->class == HLSL_CLASS_MATRIX && !hlsl_type_is_row_major(type); +} + +bool hlsl_index_is_resource_access(struct hlsl_ir_index *index) +{ + return index->val.node->data_type->class == HLSL_CLASS_OBJECT; +} + +struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, + struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) +{ + struct hlsl_type *type = val->data_type; + struct hlsl_ir_index *index; + + if (!(index = hlsl_alloc(ctx, sizeof(*index)))) + return NULL; + + if (type->class == HLSL_CLASS_OBJECT) + type = type->e.resource_format; + else if (type->class == HLSL_CLASS_MATRIX) + type = hlsl_get_vector_type(ctx, type->base_type, type->dimx); + else + type = hlsl_get_element_type_from_path_index(ctx, type, idx); + + init_node(&index->node, HLSL_IR_INDEX, type, loc); + hlsl_src_from_node(&index->val, val); + hlsl_src_from_node(&index->idx, idx); + return &index->node; }
-struct hlsl_ir_jump *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct vkd3d_shader_location loc) +struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, + const struct vkd3d_shader_location *loc) { struct hlsl_ir_jump *jump;
if (!(jump = hlsl_alloc(ctx, sizeof(*jump)))) return NULL; - init_node(&jump->node, HLSL_IR_JUMP, NULL, &loc); + init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); jump->type = type; - return jump; + return &jump->node; }
-struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_location loc) +struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, + struct hlsl_block *block, const struct vkd3d_shader_location *loc) { struct hlsl_ir_loop *loop;
if (!(loop = hlsl_alloc(ctx, sizeof(*loop)))) return NULL; - init_node(&loop->node, HLSL_IR_LOOP, NULL, &loc); - list_init(&loop->body.instrs); - return loop; + init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); + hlsl_block_init(&loop->body); + hlsl_block_add_block(&loop->body, block); + return &loop->node; }
struct clone_instr_map @@ -1319,11 +1475,13 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_ir_node *src; struct hlsl_ir_node *dst;
+ hlsl_block_init(dst_block); + LIST_FOR_EACH_ENTRY(src, &src_block->instrs, struct hlsl_ir_node, entry) { if (!(dst = clone_instr(ctx, map, src))) { - hlsl_free_instr_list(&dst_block->instrs); + hlsl_block_cleanup(dst_block); return false; } list_add_tail(&dst_block->instrs, &dst->entry); @@ -1332,7 +1490,7 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, { if (!vkd3d_array_reserve((void **)&map->instrs, &map->capacity, map->count + 1, sizeof(*map->instrs))) { - hlsl_free_instr_list(&dst_block->instrs); + hlsl_block_cleanup(dst_block); return false; }
@@ -1390,12 +1548,7 @@ static struct hlsl_ir_node *clone_call(struct hlsl_ctx *ctx, struct hlsl_ir_call
static struct hlsl_ir_node *clone_constant(struct hlsl_ctx *ctx, struct hlsl_ir_constant *src) { - struct hlsl_ir_constant *dst; - - if (!(dst = hlsl_new_constant(ctx, src->node.data_type, &src->node.loc))) - return NULL; - memcpy(dst->value, src->value, sizeof(src->value)); - return &dst->node; + return hlsl_new_constant(ctx, src->node.data_type, &src->value, &src->node.loc); }
static struct hlsl_ir_node *clone_expr(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_expr *src) @@ -1411,27 +1564,30 @@ static struct hlsl_ir_node *clone_expr(struct hlsl_ctx *ctx, struct clone_instr_
static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_if *src) { - struct hlsl_ir_if *dst; + struct hlsl_block then_block, else_block; + struct hlsl_ir_node *dst;
- if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), src->node.loc))) + if (!clone_block(ctx, &then_block, &src->then_block, map)) + return NULL; + if (!clone_block(ctx, &else_block, &src->else_block, map)) + { + hlsl_block_cleanup(&then_block); return NULL; + }
- if (!clone_block(ctx, &dst->then_instrs, &src->then_instrs, map) - || !clone_block(ctx, &dst->else_instrs, &src->else_instrs, map)) + if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), &then_block, &else_block, &src->node.loc))) { - hlsl_free_instr(&dst->node); + hlsl_block_cleanup(&then_block); + hlsl_block_cleanup(&else_block); return NULL; } - return &dst->node; + + return dst; }
static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) { - struct hlsl_ir_jump *dst; - - if (!(dst = hlsl_new_jump(ctx, src->type, src->node.loc))) - return NULL; - return &dst->node; + return hlsl_new_jump(ctx, src->type, &src->node.loc); }
static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) @@ -1452,16 +1608,18 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_
static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src) { - struct hlsl_ir_loop *dst; + struct hlsl_ir_node *dst; + struct hlsl_block body;
- if (!(dst = hlsl_new_loop(ctx, src->node.loc))) + if (!clone_block(ctx, &body, &src->body, map)) return NULL; - if (!clone_block(ctx, &dst->body, &src->body, map)) + + if (!(dst = hlsl_new_loop(ctx, &body, &src->node.loc))) { - hlsl_free_instr(&dst->node); + hlsl_block_cleanup(&body); return NULL; } - return &dst->node; + return dst; }
static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, @@ -1486,7 +1644,12 @@ static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, } clone_src(map, &dst->coords, &src->coords); clone_src(map, &dst->lod, &src->lod); + clone_src(map, &dst->ddx, &src->ddx); + clone_src(map, &dst->ddy, &src->ddy); + clone_src(map, &dst->sample_index, &src->sample_index); + clone_src(map, &dst->cmp, &src->cmp); clone_src(map, &dst->texel_offset, &src->texel_offset); + dst->sampling_dim = src->sampling_dim; return &dst->node; }
@@ -1529,12 +1692,19 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_swizzle *src) { - struct hlsl_ir_swizzle *dst; + return hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, + map_instr(map, src->val.node), &src->node.loc); +}
- if (!(dst = hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, - map_instr(map, src->val.node), &src->node.loc))) +static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr_map *map, + struct hlsl_ir_index *src) +{ + struct hlsl_ir_node *dst; + + if (!(dst = hlsl_new_index(ctx, map_instr(map, src->val.node), map_instr(map, src->idx.node), + &src->node.loc))) return NULL; - return &dst->node; + return dst; }
static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, @@ -1554,6 +1724,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_IF: return clone_if(ctx, map, hlsl_ir_if(instr));
+ case HLSL_IR_INDEX: + return clone_index(ctx, map, hlsl_ir_index(instr)); + case HLSL_IR_JUMP: return clone_jump(ctx, hlsl_ir_jump(instr));
@@ -1593,13 +1766,12 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc) { + struct hlsl_ir_node *constant, *store; struct hlsl_ir_function_decl *decl; - struct hlsl_ir_constant *constant; - struct hlsl_ir_store *store;
if (!(decl = hlsl_alloc(ctx, sizeof(*decl)))) return NULL; - list_init(&decl->body.instrs); + hlsl_block_init(&decl->body); decl->return_type = return_type; decl->parameters = *parameters; decl->loc = *loc; @@ -1620,17 +1792,17 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx,
if (!(constant = hlsl_new_bool_constant(ctx, false, loc))) return decl; - list_add_tail(&decl->body.instrs, &constant->node.entry); + hlsl_block_add_instr(&decl->body, constant);
- if (!(store = hlsl_new_simple_store(ctx, decl->early_return_var, &constant->node))) + if (!(store = hlsl_new_simple_store(ctx, decl->early_return_var, constant))) return decl; - list_add_tail(&decl->body.instrs, &store->node.entry); + hlsl_block_add_instr(&decl->body, store);
return decl; }
struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, - const struct hlsl_reg_reservation *reservation, struct vkd3d_shader_location loc) + const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc) { struct hlsl_buffer *buffer;
@@ -1640,7 +1812,7 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type buffer->name = name; if (reservation) buffer->reservation = *reservation; - buffer->loc = loc; + buffer->loc = *loc; list_add_tail(&ctx->buffers, &buffer->entry); return buffer; } @@ -1698,10 +1870,10 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls { int r;
- if ((r = vkd3d_u32_compare(t1->type, t2->type))) + if ((r = vkd3d_u32_compare(t1->class, t2->class))) { - if (!((t1->type == HLSL_CLASS_SCALAR && t2->type == HLSL_CLASS_VECTOR) - || (t1->type == HLSL_CLASS_VECTOR && t2->type == HLSL_CLASS_SCALAR))) + if (!((t1->class == HLSL_CLASS_SCALAR && t2->class == HLSL_CLASS_VECTOR) + || (t1->class == HLSL_CLASS_VECTOR && t2->class == HLSL_CLASS_SCALAR))) return r; } if ((r = vkd3d_u32_compare(t1->base_type, t2->base_type))) @@ -1718,7 +1890,7 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls return r; if ((r = vkd3d_u32_compare(t1->dimy, t2->dimy))) return r; - if (t1->type == HLSL_CLASS_STRUCT) + if (t1->class == HLSL_CLASS_STRUCT) { size_t i;
@@ -1738,7 +1910,7 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls } return 0; } - if (t1->type == HLSL_CLASS_ARRAY) + if (t1->class == HLSL_CLASS_ARRAY) { if ((r = vkd3d_u32_compare(t1->e.array.elements_count, t2->e.array.elements_count))) return r; @@ -1768,7 +1940,7 @@ static int compare_function_decl_rb(const void *key, const struct rb_entry *entr
struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type) { - struct vkd3d_string_buffer *string; + struct vkd3d_string_buffer *string, *inner_string;
static const char *const base_types[] = { @@ -1789,7 +1961,7 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru return string; }
- switch (type->type) + switch (type->class) { case HLSL_CLASS_SCALAR: assert(type->base_type < ARRAY_SIZE(base_types)); @@ -1808,10 +1980,9 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru
case HLSL_CLASS_ARRAY: { - struct vkd3d_string_buffer *inner_string; const struct hlsl_type *t;
- for (t = type; t->type == HLSL_CLASS_ARRAY; t = t->e.array.type) + for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) ;
if ((inner_string = hlsl_type_to_string(ctx, t))) @@ -1820,7 +1991,7 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru hlsl_release_string_buffer(ctx, inner_string); }
- for (t = type; t->type == HLSL_CLASS_ARRAY; t = t->e.array.type) + for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) { if (t->e.array.elements_count == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) vkd3d_string_buffer_printf(string, "[]"); @@ -1860,13 +2031,26 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru
assert(type->sampler_dim < ARRAY_SIZE(dimensions)); assert(type->e.resource_format->base_type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "Texture%s<%s%u>", dimensions[type->sampler_dim], - base_types[type->e.resource_format->base_type], type->e.resource_format->dimx); + vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); + if ((inner_string = hlsl_type_to_string(ctx, type->e.resource_format))) + { + vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); + hlsl_release_string_buffer(ctx, inner_string); + } return string;
case HLSL_TYPE_UAV: - vkd3d_string_buffer_printf(string, "RWTexture%s<%s%u>", dimensions[type->sampler_dim], - base_types[type->e.resource_format->base_type], type->e.resource_format->dimx); + if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) + vkd3d_string_buffer_printf(string, "RWBuffer"); + else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + vkd3d_string_buffer_printf(string, "RWStructuredBuffer"); + else + vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]); + if ((inner_string = hlsl_type_to_string(ctx, type->e.resource_format))) + { + vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); + hlsl_release_string_buffer(ctx, inner_string); + } return string;
default: @@ -1943,6 +2127,7 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) "HLSL_IR_CONSTANT", "HLSL_IR_EXPR", "HLSL_IR_IF", + "HLSL_IR_INDEX", "HLSL_IR_LOAD", "HLSL_IR_LOOP", "HLSL_IR_JUMP", @@ -2107,7 +2292,7 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl vkd3d_string_buffer_printf(buffer, "{"); for (x = 0; x < type->dimx; ++x) { - const union hlsl_constant_value *value = &constant->value[x]; + const union hlsl_constant_value_component *value = &constant->value.u[x];
switch (type->base_type) { @@ -2168,6 +2353,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_SIN] = "sin", [HLSL_OP1_SIN_REDUCED] = "sin_reduced", [HLSL_OP1_SQRT] = "sqrt", + [HLSL_OP1_TRUNC] = "trunc",
[HLSL_OP2_ADD] = "+", [HLSL_OP2_BIT_AND] = "&", @@ -2214,9 +2400,9 @@ static void dump_ir_if(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, vkd3d_string_buffer_printf(buffer, "if ("); dump_src(buffer, &if_node->condition); vkd3d_string_buffer_printf(buffer, ") {\n"); - dump_instr_list(ctx, buffer, &if_node->then_instrs.instrs); + dump_instr_list(ctx, buffer, &if_node->then_block.instrs); vkd3d_string_buffer_printf(buffer, " %10s } else {\n", ""); - dump_instr_list(ctx, buffer, &if_node->else_instrs.instrs); + dump_instr_list(ctx, buffer, &if_node->else_block.instrs); vkd3d_string_buffer_printf(buffer, " %10s }", ""); }
@@ -2255,7 +2441,11 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru { [HLSL_RESOURCE_LOAD] = "load_resource", [HLSL_RESOURCE_SAMPLE] = "sample", + [HLSL_RESOURCE_SAMPLE_CMP] = "sample_cmp", + [HLSL_RESOURCE_SAMPLE_CMP_LZ] = "sample_cmp_lz", [HLSL_RESOURCE_SAMPLE_LOD] = "sample_lod", + [HLSL_RESOURCE_SAMPLE_LOD_BIAS] = "sample_biased", + [HLSL_RESOURCE_SAMPLE_GRAD] = "sample_grad", [HLSL_RESOURCE_GATHER_RED] = "gather_red", [HLSL_RESOURCE_GATHER_GREEN] = "gather_green", [HLSL_RESOURCE_GATHER_BLUE] = "gather_blue", @@ -2269,6 +2459,11 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru dump_deref(buffer, &load->sampler); vkd3d_string_buffer_printf(buffer, ", coords = "); dump_src(buffer, &load->coords); + if (load->sample_index.node) + { + vkd3d_string_buffer_printf(buffer, ", sample index = "); + dump_src(buffer, &load->sample_index); + } if (load->texel_offset.node) { vkd3d_string_buffer_printf(buffer, ", offset = "); @@ -2279,6 +2474,21 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru vkd3d_string_buffer_printf(buffer, ", lod = "); dump_src(buffer, &load->lod); } + if (load->ddx.node) + { + vkd3d_string_buffer_printf(buffer, ", ddx = "); + dump_src(buffer, &load->ddx); + } + if (load->ddy.node) + { + vkd3d_string_buffer_printf(buffer, ", ddy = "); + dump_src(buffer, &load->ddy); + } + if (load->cmp.node) + { + vkd3d_string_buffer_printf(buffer, ", cmp = "); + dump_src(buffer, &load->cmp); + } vkd3d_string_buffer_printf(buffer, ")"); }
@@ -2321,6 +2531,14 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls } }
+static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_index *index) +{ + dump_src(buffer, &index->val); + vkd3d_string_buffer_printf(buffer, "[idx:"); + dump_src(buffer, &index->idx); + vkd3d_string_buffer_printf(buffer, "]"); +} + static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr) { if (instr->index) @@ -2348,6 +2566,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, dump_ir_if(ctx, buffer, hlsl_ir_if(instr)); break;
+ case HLSL_IR_INDEX: + dump_ir_index(buffer, hlsl_ir_index(instr)); + break; + case HLSL_IR_JUMP: dump_ir_jump(buffer, hlsl_ir_jump(instr)); break; @@ -2421,7 +2643,7 @@ void hlsl_free_type(struct hlsl_type *type) size_t i;
vkd3d_free((void *)type->name); - if (type->type == HLSL_CLASS_STRUCT) + if (type->class == HLSL_CLASS_STRUCT) { for (i = 0; i < type->e.record.field_count; ++i) { @@ -2447,6 +2669,11 @@ void hlsl_free_instr_list(struct list *list) hlsl_free_instr(node); }
+void hlsl_block_cleanup(struct hlsl_block *block) +{ + hlsl_free_instr_list(&block->instrs); +} + static void free_ir_call(struct hlsl_ir_call *call) { vkd3d_free(call); @@ -2468,8 +2695,8 @@ static void free_ir_expr(struct hlsl_ir_expr *expr)
static void free_ir_if(struct hlsl_ir_if *if_node) { - hlsl_free_instr_list(&if_node->then_instrs.instrs); - hlsl_free_instr_list(&if_node->else_instrs.instrs); + hlsl_block_cleanup(&if_node->then_block); + hlsl_block_cleanup(&if_node->else_block); hlsl_src_remove(&if_node->condition); vkd3d_free(if_node); } @@ -2487,7 +2714,7 @@ static void free_ir_load(struct hlsl_ir_load *load)
static void free_ir_loop(struct hlsl_ir_loop *loop) { - hlsl_free_instr_list(&loop->body.instrs); + hlsl_block_cleanup(&loop->body); vkd3d_free(loop); }
@@ -2497,7 +2724,11 @@ static void free_ir_resource_load(struct hlsl_ir_resource_load *load) hlsl_cleanup_deref(&load->resource); hlsl_src_remove(&load->coords); hlsl_src_remove(&load->lod); + hlsl_src_remove(&load->ddx); + hlsl_src_remove(&load->ddy); + hlsl_src_remove(&load->cmp); hlsl_src_remove(&load->texel_offset); + hlsl_src_remove(&load->sample_index); vkd3d_free(load); }
@@ -2522,6 +2753,13 @@ static void free_ir_swizzle(struct hlsl_ir_swizzle *swizzle) vkd3d_free(swizzle); }
+static void free_ir_index(struct hlsl_ir_index *index) +{ + hlsl_src_remove(&index->val); + hlsl_src_remove(&index->idx); + vkd3d_free(index); +} + void hlsl_free_instr(struct hlsl_ir_node *node) { assert(list_empty(&node->uses)); @@ -2544,6 +2782,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) free_ir_if(hlsl_ir_if(node)); break;
+ case HLSL_IR_INDEX: + free_ir_index(hlsl_ir_index(node)); + break; + case HLSL_IR_JUMP: free_ir_jump(hlsl_ir_jump(node)); break; @@ -2600,7 +2842,7 @@ static void free_function_decl(struct hlsl_ir_function_decl *decl) vkd3d_free((void *)decl->attrs);
vkd3d_free(decl->parameters.vars); - hlsl_free_instr_list(&decl->body.instrs); + hlsl_block_cleanup(&decl->body); vkd3d_free(decl); }
@@ -2826,11 +3068,12 @@ static void declare_predefined_types(struct hlsl_ctx *ctx)
static const char *const sampler_names[] = { - [HLSL_SAMPLER_DIM_GENERIC] = "sampler", - [HLSL_SAMPLER_DIM_1D] = "sampler1D", - [HLSL_SAMPLER_DIM_2D] = "sampler2D", - [HLSL_SAMPLER_DIM_3D] = "sampler3D", - [HLSL_SAMPLER_DIM_CUBE] = "samplerCUBE", + [HLSL_SAMPLER_DIM_GENERIC] = "sampler", + [HLSL_SAMPLER_DIM_COMPARISON] = "SamplerComparisonState", + [HLSL_SAMPLER_DIM_1D] = "sampler1D", + [HLSL_SAMPLER_DIM_2D] = "sampler2D", + [HLSL_SAMPLER_DIM_3D] = "sampler3D", + [HLSL_SAMPLER_DIM_CUBE] = "samplerCUBE", };
static const struct @@ -2844,8 +3087,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) { {"dword", HLSL_CLASS_SCALAR, HLSL_TYPE_UINT, 1, 1}, {"float", HLSL_CLASS_SCALAR, HLSL_TYPE_FLOAT, 1, 1}, - {"VECTOR", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, - {"MATRIX", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, + {"vector", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, + {"matrix", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, {"STRING", HLSL_CLASS_OBJECT, HLSL_TYPE_STRING, 1, 1}, {"TEXTURE", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, {"PIXELSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, @@ -2993,16 +3236,16 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name,
rb_init(&ctx->functions, compare_function_rb);
- list_init(&ctx->static_initializers); + hlsl_block_init(&ctx->static_initializers); list_init(&ctx->extern_vars);
list_init(&ctx->buffers);
if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, - hlsl_strdup(ctx, "$Globals"), NULL, ctx->location))) + hlsl_strdup(ctx, "$Globals"), NULL, &ctx->location))) return false; if (!(ctx->params_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, - hlsl_strdup(ctx, "$Params"), NULL, ctx->location))) + hlsl_strdup(ctx, "$Params"), NULL, &ctx->location))) return false; ctx->cur_buffer = ctx->globals_buffer;
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index ccbf22a5801..cd1ad37a542 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -24,7 +24,9 @@ #include "wine/rbtree.h" #include "d3dcommon.h" #include "d3dx9shader.h" -#include "sm4.h" + +enum vkd3d_sm4_register_type; +enum vkd3d_sm4_swizzle_type;
/* The general IR structure is inspired by Mesa GLSL hir, even though the code * ends up being quite different in practice. Anyway, here comes the relevant @@ -102,18 +104,22 @@ enum hlsl_base_type
enum hlsl_sampler_dim { - HLSL_SAMPLER_DIM_GENERIC, - HLSL_SAMPLER_DIM_1D, - HLSL_SAMPLER_DIM_2D, - HLSL_SAMPLER_DIM_3D, - HLSL_SAMPLER_DIM_CUBE, - HLSL_SAMPLER_DIM_LAST_SAMPLER = HLSL_SAMPLER_DIM_CUBE, - HLSL_SAMPLER_DIM_1DARRAY, - HLSL_SAMPLER_DIM_2DARRAY, - HLSL_SAMPLER_DIM_2DMS, - HLSL_SAMPLER_DIM_2DMSARRAY, - HLSL_SAMPLER_DIM_CUBEARRAY, - HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_CUBEARRAY, + HLSL_SAMPLER_DIM_GENERIC, + HLSL_SAMPLER_DIM_COMPARISON, + HLSL_SAMPLER_DIM_1D, + HLSL_SAMPLER_DIM_2D, + HLSL_SAMPLER_DIM_3D, + HLSL_SAMPLER_DIM_CUBE, + HLSL_SAMPLER_DIM_LAST_SAMPLER = HLSL_SAMPLER_DIM_CUBE, + HLSL_SAMPLER_DIM_1DARRAY, + HLSL_SAMPLER_DIM_2DARRAY, + HLSL_SAMPLER_DIM_2DMS, + HLSL_SAMPLER_DIM_2DMSARRAY, + HLSL_SAMPLER_DIM_CUBEARRAY, + HLSL_SAMPLER_DIM_LAST_TEXTURE = HLSL_SAMPLER_DIM_CUBEARRAY, + HLSL_SAMPLER_DIM_BUFFER, + HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, + HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, };
enum hlsl_regset @@ -134,16 +140,17 @@ struct hlsl_type /* Item entry in hlsl_scope->types. hlsl_type->name is used as key (if not NULL). */ struct rb_entry scope_entry;
- enum hlsl_type_class type; + enum hlsl_type_class class; /* If type is <= HLSL_CLASS_LAST_NUMERIC, then base_type is <= HLSL_TYPE_LAST_SCALAR. * If type is HLSL_CLASS_OBJECT, then base_type is > HLSL_TYPE_LAST_SCALAR. * Otherwise, base_type is not used. */ enum hlsl_base_type base_type;
/* If base_type is HLSL_TYPE_SAMPLER, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_SAMPLER. - * If base_type is HLSL_TYPE_TEXTURE, then sampler_dim can have any value of the enum. - * If base_type is HLSL_TYPE_UAV, them sampler_dim must be one of HLSL_SAMPLER_DIM_1D, - * HLSL_SAMPLER_DIM_2D, HLSL_SAMPLER_DIM_3D, HLSL_SAMPLER_DIM_1DARRAY, or HLSL_SAMPLER_DIM_2DARRAY. + * If base_type is HLSL_TYPE_TEXTURE, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_TEXTURE. + * If base_type is HLSL_TYPE_UAV, then sampler_dim must be one of HLSL_SAMPLER_DIM_1D, + * HLSL_SAMPLER_DIM_2D, HLSL_SAMPLER_DIM_3D, HLSL_SAMPLER_DIM_1DARRAY, HLSL_SAMPLER_DIM_2DARRAY, + * HLSL_SAMPLER_DIM_BUFFER, or HLSL_SAMPLER_DIM_STRUCTURED_BUFFER. * Otherwise, sampler_dim is not used */ enum hlsl_sampler_dim sampler_dim; /* Name, in case the type is a named struct or a typedef. */ @@ -207,6 +214,16 @@ struct hlsl_semantic { const char *name; uint32_t index; + + /* If the variable or field that stores this hlsl_semantic has already reported that it is missing. */ + bool reported_missing; + /* In case the variable or field that stores this semantic has already reported to use a + * duplicated output semantic, this value stores the last reported index + 1. Otherwise it is 0. */ + uint32_t reported_duplicated_output_next_index; + /* In case the variable or field that stores this semantic has already reported to use a + * duplicated input semantic with incompatible values, this value stores the last reported + * index + 1. Otherwise it is 0. */ + uint32_t reported_duplicated_input_incompatible_next_index; };
/* A field within a struct type declaration, used in hlsl_type.e.fields. */ @@ -228,16 +245,21 @@ struct hlsl_struct_field size_t name_bytecode_offset; };
-/* Information of the register allocated for an instruction node or variable. +/* Information of the register(s) allocated for an instruction node or variable. * These values are initialized at the end of hlsl_emit_bytecode(), after the compilation passes, * just before writing the bytecode. - * For numeric registers, a writemask can be provided to indicate the reservation of only some of the - * 4 components. * The type of register (register class) is implied from its use, so it is not stored in this * struct. */ struct hlsl_reg { + /* Index of the first register allocated. */ uint32_t id; + /* Number of registers to be allocated. + * Unlike the variable's type's regsize, it is not expressed in register components, but rather + * in whole registers, and may depend on which components are used within the shader. */ + uint32_t bind_count; + /* For numeric registers, a writemask can be provided to indicate the reservation of only some + * of the 4 components. */ unsigned int writemask; /* Whether the register has been allocated. */ bool allocated; @@ -254,6 +276,7 @@ enum hlsl_ir_node_type HLSL_IR_CONSTANT, HLSL_IR_EXPR, HLSL_IR_IF, + HLSL_IR_INDEX, HLSL_IR_LOAD, HLSL_IR_LOOP, HLSL_IR_JUMP, @@ -342,12 +365,17 @@ struct hlsl_attribute
#define HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT 0
-/* Reservation of a specific register to a variable, field, or buffer, written in the HLSL source - * using the register(·) syntax */ +/* Reservation of a register and/or an offset for objects inside constant buffers, to be used as a + * starting point of their allocation. They are available through the register(·) and the + * packoffset(·) syntaxes, respectivelly. + * The costant buffer offset is measured register components. */ struct hlsl_reg_reservation { - char type; - unsigned int index; + char reg_type; + unsigned int reg_index; + + char offset_type; + unsigned int offset_index; };
struct hlsl_ir_var @@ -360,8 +388,7 @@ struct hlsl_ir_var struct hlsl_buffer *buffer; /* Bitfield for storage modifiers (type modifiers are stored in data_type->modifiers). */ unsigned int storage_modifiers; - /* Optional register to be used as a starting point for the variable allocation, specified - * by the user via the register(·) syntax. */ + /* Optional reservations of registers and/or offsets for variables within constant buffers. */ struct hlsl_reg_reservation reg_reservation;
/* Item entry in hlsl_scope.vars. Specifically hlsl_ctx.globals.vars if the variable is global. */ @@ -384,6 +411,13 @@ struct hlsl_ir_var * and the buffer_offset instead. */ struct hlsl_reg regs[HLSL_REGSET_LAST + 1];
+ struct + { + bool used; + enum hlsl_sampler_dim sampler_dim; + struct vkd3d_shader_location first_sampler_dim_loc; + } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; + uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; @@ -446,8 +480,8 @@ struct hlsl_ir_if { struct hlsl_ir_node node; struct hlsl_src condition; - struct hlsl_block then_instrs; - struct hlsl_block else_instrs; + struct hlsl_block then_block; + struct hlsl_block else_block; };
struct hlsl_ir_loop @@ -485,6 +519,7 @@ enum hlsl_ir_expr_op HLSL_OP1_SIN, HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi] */ HLSL_OP1_SQRT, + HLSL_OP1_TRUNC,
HLSL_OP2_ADD, HLSL_OP2_BIT_AND, @@ -540,6 +575,12 @@ struct hlsl_ir_swizzle DWORD swizzle; };
+struct hlsl_ir_index +{ + struct hlsl_ir_node node; + struct hlsl_src val, idx; +}; + /* Reference to a variable, or a part of it (e.g. a vector within a matrix within a struct). */ struct hlsl_deref { @@ -574,7 +615,11 @@ enum hlsl_resource_load_type { HLSL_RESOURCE_LOAD, HLSL_RESOURCE_SAMPLE, + HLSL_RESOURCE_SAMPLE_CMP, + HLSL_RESOURCE_SAMPLE_CMP_LZ, HLSL_RESOURCE_SAMPLE_LOD, + HLSL_RESOURCE_SAMPLE_LOD_BIAS, + HLSL_RESOURCE_SAMPLE_GRAD, HLSL_RESOURCE_GATHER_RED, HLSL_RESOURCE_GATHER_GREEN, HLSL_RESOURCE_GATHER_BLUE, @@ -586,7 +631,8 @@ struct hlsl_ir_resource_load struct hlsl_ir_node node; enum hlsl_resource_load_type load_type; struct hlsl_deref resource, sampler; - struct hlsl_src coords, lod, texel_offset; + struct hlsl_src coords, lod, ddx, ddy, cmp, sample_index, texel_offset; + enum hlsl_sampler_dim sampling_dim; };
struct hlsl_ir_resource_store @@ -607,13 +653,16 @@ struct hlsl_ir_store struct hlsl_ir_constant { struct hlsl_ir_node node; - union hlsl_constant_value + struct hlsl_constant_value { - uint32_t u; - int32_t i; - float f; - double d; - } value[4]; + union hlsl_constant_value_component + { + uint32_t u; + int32_t i; + float f; + double d; + } u[4]; + } value; /* Constant register of type 'c' where the constant value is stored for SM1. */ struct hlsl_reg reg; }; @@ -674,6 +723,9 @@ struct hlsl_buffer unsigned size, used_size; /* Register of type 'b' on which the buffer is allocated. */ struct hlsl_reg reg; + + bool manually_packed_elements; + bool automatically_packed_elements; };
struct hlsl_ctx @@ -744,9 +796,8 @@ struct hlsl_ctx struct hlsl_type *Void; } builtin_types;
- /* List of the instruction nodes for initializing static variables; linked by the - * hlsl_ir_node.entry fields. */ - struct list static_initializers; + /* List of the instruction nodes for initializing static variables. */ + struct hlsl_block static_initializers;
/* Dynamic array of constant values that appear in the shader, associated to the 'c' registers. * Only used for SM1 profiles. */ @@ -780,8 +831,9 @@ struct hlsl_resource_load_params { struct hlsl_type *format; enum hlsl_resource_load_type type; - struct hlsl_deref resource, sampler; - struct hlsl_ir_node *coords, *lod, *texel_offset; + struct hlsl_ir_node *resource, *sampler; + struct hlsl_ir_node *coords, *lod, *ddx, *ddy, *cmp, *sample_index, *texel_offset; + enum hlsl_sampler_dim sampling_dim; };
static inline struct hlsl_ir_call *hlsl_ir_call(const struct hlsl_ir_node *node) @@ -850,6 +902,27 @@ static inline struct hlsl_ir_swizzle *hlsl_ir_swizzle(const struct hlsl_ir_node return CONTAINING_RECORD(node, struct hlsl_ir_swizzle, node); }
+static inline struct hlsl_ir_index *hlsl_ir_index(const struct hlsl_ir_node *node) +{ + assert(node->type == HLSL_IR_INDEX); + return CONTAINING_RECORD(node, struct hlsl_ir_index, node); +} + +static inline void hlsl_block_init(struct hlsl_block *block) +{ + list_init(&block->instrs); +} + +static inline void hlsl_block_add_instr(struct hlsl_block *block, struct hlsl_ir_node *instr) +{ + list_add_tail(&block->instrs, &instr->entry); +} + +static inline void hlsl_block_add_block(struct hlsl_block *block, struct hlsl_block *add) +{ + list_move_tail(&block->instrs, &add->instrs); +} + static inline void hlsl_src_from_node(struct hlsl_src *src, struct hlsl_ir_node *node) { src->node = node; @@ -873,6 +946,15 @@ static inline void *hlsl_alloc(struct hlsl_ctx *ctx, size_t size) return ptr; }
+static inline void *hlsl_calloc(struct hlsl_ctx *ctx, size_t count, size_t size) +{ + void *ptr = vkd3d_calloc(count, size); + + if (!ptr) + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return ptr; +} + static inline void *hlsl_realloc(struct hlsl_ctx *ctx, void *ptr, size_t size) { void *ret = vkd3d_realloc(ptr, size); @@ -948,6 +1030,8 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) switch (dim) { case HLSL_SAMPLER_DIM_1D: + case HLSL_SAMPLER_DIM_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: return 1; case HLSL_SAMPLER_DIM_1DARRAY: case HLSL_SAMPLER_DIM_2D: @@ -974,11 +1058,12 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type);
-struct hlsl_ir_load *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, +struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false); void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var);
+void hlsl_block_cleanup(struct hlsl_block *block); bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block);
void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); @@ -986,6 +1071,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out);
+bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *chain); bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other);
void hlsl_cleanup_deref(struct hlsl_deref *deref); @@ -1012,64 +1098,73 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type); struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, unsigned int array_size); struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2); -struct hlsl_ir_constant *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, - const struct hlsl_reg_reservation *reservation, struct vkd3d_shader_location loc); + const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, const struct vkd3d_shader_location *loc); -struct hlsl_ir_expr *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, - const struct vkd3d_shader_location *loc); -struct hlsl_ir_constant *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, +struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, const struct vkd3d_shader_location *loc); -struct hlsl_ir_expr *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node); +struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, + const struct hlsl_constant_value *value, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node); struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], struct hlsl_type *data_type, const struct vkd3d_shader_location *loc); -struct hlsl_ir_constant *hlsl_new_float_constant(struct hlsl_ctx *ctx, +struct hlsl_ir_node *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, const struct vkd3d_shader_location *loc); struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc); -struct hlsl_ir_if *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct vkd3d_shader_location loc); -struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n, - const struct vkd3d_shader_location *loc); -struct hlsl_ir_jump *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct vkd3d_shader_location loc); +struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, + struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, + enum hlsl_ir_jump_type type, const struct vkd3d_shader_location *loc);
void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var);
struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, - struct vkd3d_shader_location loc); + const struct vkd3d_shader_location *loc); struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); -struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, +struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc);
-struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); -struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, +struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); +struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, unsigned int writemask, const struct vkd3d_shader_location *loc); -struct hlsl_ir_store *hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, +bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs);
-struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_location loc); -struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, +bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index); +bool hlsl_index_is_resource_access(struct hlsl_ir_index *index); + +struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, + struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, + struct hlsl_block *block, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); -struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, +struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct hlsl_struct_field *fields, size_t field_count); -struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, +struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, struct hlsl_type *type, const struct vkd3d_shader_location *loc); struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, unsigned int sample_count); struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format); -struct hlsl_ir_constant *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, +struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, - struct vkd3d_shader_location loc); + const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, - const struct vkd3d_shader_location loc, const struct hlsl_semantic *semantic, unsigned int modifiers, + const struct vkd3d_shader_location *loc, const struct hlsl_semantic *semantic, unsigned int modifiers, const struct hlsl_reg_reservation *reg_reservation);
void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, @@ -1101,6 +1196,9 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type); unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2);
+const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type); +unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type); + unsigned int hlsl_combine_swizzles(unsigned int first, unsigned int second, unsigned int dim); unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second); unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask); @@ -1109,12 +1207,17 @@ unsigned int hlsl_swizzle_from_writemask(unsigned int writemask); struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *start, unsigned int *count); +bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + enum hlsl_regset regset, unsigned int *index); bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset); unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref);
+bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); +bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), + struct hlsl_block *block, void *context);
bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); @@ -1124,7 +1227,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, enum vkd3d_sm4_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); + bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out);
int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index adff1da04d8..e9ae3ccf3d3 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -37,6 +37,7 @@ static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); %option bison-locations %option extra-type="struct hlsl_ctx *" %option never-interactive +%option nodefault %option noinput %option nounput %option noyywrap @@ -95,6 +96,7 @@ matrix {return KW_MATRIX; } namespace {return KW_NAMESPACE; } nointerpolation {return KW_NOINTERPOLATION; } out {return KW_OUT; } +packoffset {return KW_PACKOFFSET; } pass {return KW_PASS; } PixelShader {return KW_PIXELSHADER; } precise {return KW_PRECISE; } @@ -102,6 +104,8 @@ RasterizerState {return KW_RASTERIZERSTATE; } RenderTargetView {return KW_RENDERTARGETVIEW; } return {return KW_RETURN; } register {return KW_REGISTER; } +RWBuffer {return KW_RWBUFFER; } +RWStructuredBuffer {return KW_RWSTRUCTUREDBUFFER; } RWTexture1D {return KW_RWTEXTURE1D; } RWTexture2D {return KW_RWTEXTURE2D; } RWTexture3D {return KW_RWTEXTURE3D; } @@ -265,6 +269,10 @@ row_major {return KW_ROW_MAJOR; } return STRING; } <pp_line>{WS}+ {} +<pp_line>{ANY} { + FIXME("Malformed preprocessor line directive?\n"); + BEGIN(INITIAL); + } <pp_line>{NEWLINE} { FIXME("Malformed preprocessor line directive?\n"); BEGIN(INITIAL); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index fd1eaf6ec95..0e07fe578e1 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -85,8 +85,8 @@ struct parse_function
struct parse_if_body { - struct list *then_instrs; - struct list *else_instrs; + struct list *then_block; + struct list *else_block; };
enum parse_assign_op @@ -164,7 +164,7 @@ static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct src_comp_type = hlsl_type_get_component_type(ctx, src, k); dst_comp_type = hlsl_type_get_component_type(ctx, dst, k);
- if ((src_comp_type->type != HLSL_CLASS_SCALAR || dst_comp_type->type != HLSL_CLASS_SCALAR) + if ((src_comp_type->class != HLSL_CLASS_SCALAR || dst_comp_type->class != HLSL_CLASS_SCALAR) && !hlsl_types_are_equal(src_comp_type, dst_comp_type)) return false; } @@ -196,9 +196,9 @@ static bool type_contains_only_numerics(struct hlsl_type *type) { unsigned int i;
- if (type->type == HLSL_CLASS_ARRAY) + if (type->class == HLSL_CLASS_ARRAY) return type_contains_only_numerics(type->e.array.type); - if (type->type == HLSL_CLASS_STRUCT) + if (type->class == HLSL_CLASS_STRUCT) { for (i = 0; i < type->e.record.field_count; ++i) { @@ -207,23 +207,23 @@ static bool type_contains_only_numerics(struct hlsl_type *type) } return true; } - return type->type <= HLSL_CLASS_LAST_NUMERIC; + return type->class <= HLSL_CLASS_LAST_NUMERIC; }
static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) { - if (src->type <= HLSL_CLASS_LAST_NUMERIC && src->dimx == 1 && src->dimy == 1 && type_contains_only_numerics(dst)) + if (src->class <= HLSL_CLASS_LAST_NUMERIC && src->dimx == 1 && src->dimy == 1 && type_contains_only_numerics(dst)) return true;
- if (src->type == HLSL_CLASS_MATRIX && dst->type == HLSL_CLASS_MATRIX + if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX && src->dimx >= dst->dimx && src->dimy >= dst->dimy) return true;
- if ((src->type == HLSL_CLASS_MATRIX && src->dimx > 1 && src->dimy > 1) + if ((src->class == HLSL_CLASS_MATRIX && src->dimx > 1 && src->dimy > 1) && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) return false;
- if ((dst->type == HLSL_CLASS_MATRIX && dst->dimy > 1) + if ((dst->class == HLSL_CLASS_MATRIX && dst->dimy > 1) && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) return false;
@@ -232,10 +232,10 @@ static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ
static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) { - if ((src->type <= HLSL_CLASS_LAST_NUMERIC) != (dst->type <= HLSL_CLASS_LAST_NUMERIC)) + if ((src->class <= HLSL_CLASS_LAST_NUMERIC) != (dst->class <= HLSL_CLASS_LAST_NUMERIC)) return false;
- if (src->type <= HLSL_CLASS_LAST_NUMERIC) + if (src->class <= HLSL_CLASS_LAST_NUMERIC) { /* Scalar vars can be converted to any other numeric data type */ if (src->dimx == 1 && src->dimy == 1) @@ -244,21 +244,21 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ if (dst->dimx == 1 && dst->dimy == 1) return true;
- if (src->type == HLSL_CLASS_MATRIX || dst->type == HLSL_CLASS_MATRIX) + if (src->class == HLSL_CLASS_MATRIX || dst->class == HLSL_CLASS_MATRIX) { - if (src->type == HLSL_CLASS_MATRIX && dst->type == HLSL_CLASS_MATRIX) + if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX) return src->dimx >= dst->dimx && src->dimy >= dst->dimy;
/* Matrix-vector conversion is apparently allowed if they have * the same components count, or if the matrix is 1xN or Nx1 * and we are reducing the component count */ - if (src->type == HLSL_CLASS_VECTOR || dst->type == HLSL_CLASS_VECTOR) + if (src->class == HLSL_CLASS_VECTOR || dst->class == HLSL_CLASS_VECTOR) { if (hlsl_type_component_count(src) == hlsl_type_component_count(dst)) return true;
- if ((src->type == HLSL_CLASS_VECTOR || src->dimx == 1 || src->dimy == 1) && - (dst->type == HLSL_CLASS_VECTOR || dst->dimx == 1 || dst->dimy == 1)) + if ((src->class == HLSL_CLASS_VECTOR || src->dimx == 1 || src->dimy == 1) && + (dst->class == HLSL_CLASS_VECTOR || dst->dimx == 1 || dst->dimy == 1)) return hlsl_type_component_count(src) >= hlsl_type_component_count(dst); }
@@ -273,19 +273,19 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ return hlsl_types_are_componentwise_equal(ctx, src, dst); }
-static struct hlsl_ir_load *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, +static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc);
static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) { struct hlsl_type *src_type = node->data_type; - struct hlsl_ir_expr *cast; + struct hlsl_ir_node *cast;
if (hlsl_types_are_equal(src_type, dst_type)) return node;
- if (src_type->type > HLSL_CLASS_VECTOR || dst_type->type > HLSL_CLASS_VECTOR) + if (src_type->class > HLSL_CLASS_VECTOR || dst_type->class > HLSL_CLASS_VECTOR) { unsigned int src_comp_count = hlsl_type_component_count(src_type); unsigned int dst_comp_count = hlsl_type_component_count(dst_type); @@ -295,9 +295,9 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var; unsigned int dst_idx;
- broadcast = src_type->type <= HLSL_CLASS_LAST_NUMERIC && src_type->dimx == 1 && src_type->dimy == 1; + broadcast = src_type->class <= HLSL_CLASS_LAST_NUMERIC && src_type->dimx == 1 && src_type->dimy == 1; matrix_cast = !broadcast && dst_comp_count != src_comp_count - && src_type->type == HLSL_CLASS_MATRIX && dst_type->type == HLSL_CLASS_MATRIX; + && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX; assert(src_comp_count >= dst_comp_count || broadcast); if (matrix_cast) { @@ -311,8 +311,8 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs,
for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx) { + struct hlsl_ir_node *component_load; struct hlsl_type *dst_comp_type; - struct hlsl_ir_store *store; struct hlsl_block block; unsigned int src_idx;
@@ -333,19 +333,19 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs,
dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx);
- if (!(load = add_load_component(ctx, instrs, node, src_idx, loc))) + if (!(component_load = add_load_component(ctx, instrs, node, src_idx, loc))) return NULL;
- if (!(cast = hlsl_new_cast(ctx, &load->node, dst_comp_type, loc))) + if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) return NULL; - list_add_tail(instrs, &cast->node.entry); + list_add_tail(instrs, &cast->entry);
- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, &cast->node))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, cast)) return NULL; list_move_tail(instrs, &block.instrs); }
- if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; list_add_tail(instrs, &load->node.entry);
@@ -355,8 +355,8 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, { if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) return NULL; - list_add_tail(instrs, &cast->node.entry); - return &cast->node; + list_add_tail(instrs, &cast->entry); + return cast; } }
@@ -384,19 +384,20 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct
if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy) hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", - src_type->type == HLSL_CLASS_VECTOR ? "vector" : "matrix"); + src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix");
return add_cast(ctx, instrs, node, dst_type, loc); }
-static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, const struct vkd3d_shader_location loc) +static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, + const struct vkd3d_shader_location *loc) { if (modifiers & mod) { struct vkd3d_string_buffer *string;
if ((string = hlsl_modifiers_to_string(ctx, mod))) - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Modifier '%s' was already specified.", string->buffer); hlsl_release_string_buffer(ctx, string); return modifiers; @@ -406,26 +407,27 @@ static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, con
static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_list) { - struct hlsl_ir_node *condition, *not; - struct hlsl_ir_jump *jump; - struct hlsl_ir_if *iff; + struct hlsl_ir_node *condition, *not, *iff, *jump; + struct hlsl_block then_block;
/* E.g. "for (i = 0; ; ++i)". */ if (list_empty(cond_list)) return true;
condition = node_from_list(cond_list); - if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, condition->loc))) + if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) return false; list_add_tail(cond_list, ¬->entry);
- if (!(iff = hlsl_new_if(ctx, not, condition->loc))) + hlsl_block_init(&then_block); + + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &condition->loc))) return false; - list_add_tail(cond_list, &iff->node.entry); + hlsl_block_add_instr(&then_block, jump);
- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, condition->loc))) + if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &condition->loc))) return false; - list_add_head(&iff->then_instrs.instrs, &jump->node.entry); + list_add_tail(cond_list, &iff->entry); return true; }
@@ -436,46 +438,87 @@ enum loop_type LOOP_DO_WHILE };
-static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, struct list *init, struct list *cond, - struct list *iter, struct list *body, struct vkd3d_shader_location loc) +static bool attribute_list_has_duplicates(const struct parse_attribute_list *attrs) { - struct list *list = NULL; - struct hlsl_ir_loop *loop = NULL; - struct hlsl_ir_if *cond_jump = NULL; + unsigned int i, j;
- if (!(list = make_empty_list(ctx))) - goto oom; + for (i = 0; i < attrs->count; ++i) + { + for (j = i + 1; j < attrs->count; ++j) + { + if (!strcmp(attrs->attrs[i]->name, attrs->attrs[j]->name)) + return true; + } + } + + return false; +} + +static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct parse_attribute_list *attributes, struct list *init, struct list *cond, + struct list *iter, struct list *body, const struct vkd3d_shader_location *loc) +{ + struct hlsl_block body_block; + struct hlsl_ir_node *loop; + unsigned int i; + + if (attribute_list_has_duplicates(attributes)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute.");
- if (init) - list_move_head(list, init); + /* Ignore unroll(0) attribute, and any invalid attribute. */ + for (i = 0; i < attributes->count; ++i) + { + const struct hlsl_attribute *attr = attributes->attrs[i]; + if (!strcmp(attr->name, "unroll")) + { + if (attr->args_count) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unroll attribute with iteration count."); + } + else + { + hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented.\n"); + } + } + else if (!strcmp(attr->name, "loop") + || !strcmp(attr->name, "fastopt") + || !strcmp(attr->name, "allow_uav_condition")) + { + hlsl_fixme(ctx, loc, "Unhandled attribute %s.", attr->name); + } + else + { + hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unrecognized attribute %s.", attr->name); + } + }
- if (!(loop = hlsl_new_loop(ctx, loc))) + if (!init && !(init = make_empty_list(ctx))) goto oom; - list_add_tail(list, &loop->node.entry);
if (!append_conditional_break(ctx, cond)) goto oom;
+ hlsl_block_init(&body_block); + if (type != LOOP_DO_WHILE) - list_move_tail(&loop->body.instrs, cond); + list_move_tail(&body_block.instrs, cond);
- list_move_tail(&loop->body.instrs, body); + list_move_tail(&body_block.instrs, body);
if (iter) - list_move_tail(&loop->body.instrs, iter); + list_move_tail(&body_block.instrs, iter);
if (type == LOOP_DO_WHILE) - list_move_tail(&loop->body.instrs, cond); + list_move_tail(&body_block.instrs, cond); + + if (!(loop = hlsl_new_loop(ctx, &body_block, loc))) + goto oom; + list_add_tail(init, &loop->entry);
- vkd3d_free(init); vkd3d_free(cond); vkd3d_free(body); - return list; + return init;
oom: - vkd3d_free(loop); - vkd3d_free(cond_jump); - vkd3d_free(list); destroy_instr_list(init); destroy_instr_list(cond); destroy_instr_list(iter); @@ -500,14 +543,14 @@ static void free_parse_initializer(struct parse_initializer *initializer) vkd3d_free(initializer->args); }
-static struct hlsl_ir_swizzle *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_node *value, const char *swizzle, +static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_node *value, const char *swizzle, struct vkd3d_shader_location *loc) { unsigned int len = strlen(swizzle), component = 0; unsigned int i, set, swiz = 0; bool valid;
- if (value->data_type->type == HLSL_CLASS_MATRIX) + if (value->data_type->class == HLSL_CLASS_MATRIX) { /* Matrix swizzle */ bool m_swizzle; @@ -582,224 +625,102 @@ static struct hlsl_ir_swizzle *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_ return NULL; }
-static struct hlsl_ir_jump *add_return(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_ir_node *return_value, struct vkd3d_shader_location loc) +static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, + struct hlsl_ir_node *return_value, const struct vkd3d_shader_location *loc) { struct hlsl_type *return_type = ctx->cur_function->return_type; - struct hlsl_ir_jump *jump; + struct hlsl_ir_node *jump;
if (ctx->cur_function->return_var) { if (return_value) { - struct hlsl_ir_store *store; + struct hlsl_ir_node *store;
- if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, &loc))) - return NULL; + if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, loc))) + return false;
if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) - return NULL; - list_add_after(&return_value->entry, &store->node.entry); + return false; + list_add_after(&return_value->entry, &store->entry); } else { - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Non-void functions must return a value."); - return NULL; + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Non-void functions must return a value."); + return false; } } else { if (return_value) - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); }
if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) - return NULL; - list_add_tail(instrs, &jump->node.entry); - - return jump; -} - -static struct hlsl_ir_load *add_load_index(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, - struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) -{ - const struct hlsl_deref *src; - struct hlsl_ir_load *load; - - if (var_instr->type == HLSL_IR_LOAD) - { - src = &hlsl_ir_load(var_instr)->src; - } - else - { - struct hlsl_ir_store *store; - struct hlsl_ir_var *var; - - if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) - return NULL; - - if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) - return NULL; - list_add_tail(instrs, &store->node.entry); - - src = &store->lhs; - } - - if (!(load = hlsl_new_load_index(ctx, src, idx, loc))) - return NULL; - list_add_tail(instrs, &load->node.entry); + return false; + list_add_tail(instrs, &jump->entry);
- return load; + return true; }
-static struct hlsl_ir_load *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, +static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc) { - const struct hlsl_deref *src; - struct hlsl_ir_load *load; + struct hlsl_ir_node *load, *store; struct hlsl_block block; + struct hlsl_ir_var *var; + struct hlsl_deref src;
- if (var_instr->type == HLSL_IR_LOAD) - { - src = &hlsl_ir_load(var_instr)->src; - } - else - { - struct hlsl_ir_store *store; - struct hlsl_ir_var *var; - - if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) - return NULL; - - if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) - return NULL; - list_add_tail(instrs, &store->node.entry); + if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) + return NULL;
- src = &store->lhs; - } + if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) + return NULL; + list_add_tail(instrs, &store->entry);
- if (!(load = hlsl_new_load_component(ctx, &block, src, comp, loc))) + hlsl_init_simple_deref_from_var(&src, var); + if (!(load = hlsl_new_load_component(ctx, &block, &src, comp, loc))) return NULL; list_move_tail(instrs, &block.instrs);
return load; }
-static bool add_record_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, - unsigned int idx, const struct vkd3d_shader_location loc) +static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, + unsigned int idx, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; + struct hlsl_ir_node *index, *c;
assert(idx < record->data_type->e.record.field_count);
- if (!(c = hlsl_new_uint_constant(ctx, idx, &loc))) - return false; - list_add_tail(instrs, &c->node.entry); - - return !!add_load_index(ctx, instrs, record, &c->node, &loc); -} - -static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc); - -static bool add_matrix_index(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_ir_node *matrix, struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) -{ - struct hlsl_type *mat_type = matrix->data_type, *ret_type; - struct hlsl_deref var_deref; - struct hlsl_ir_load *load; - struct hlsl_ir_var *var; - unsigned int i; - - if (hlsl_type_is_row_major(mat_type)) - return add_load_index(ctx, instrs, matrix, index, loc); - - ret_type = hlsl_get_vector_type(ctx, mat_type->base_type, mat_type->dimx); - - if (!(var = hlsl_new_synthetic_var(ctx, "index", ret_type, loc))) + if (!(c = hlsl_new_uint_constant(ctx, idx, loc))) return false; - hlsl_init_simple_deref_from_var(&var_deref, var); - - for (i = 0; i < mat_type->dimx; ++i) - { - struct hlsl_ir_load *column, *value; - struct hlsl_ir_store *store; - struct hlsl_ir_constant *c; - struct hlsl_block block; - - if (!(c = hlsl_new_uint_constant(ctx, i, loc))) - return false; - list_add_tail(instrs, &c->node.entry); - - if (!(column = add_load_index(ctx, instrs, matrix, &c->node, loc))) - return false; - - if (!(value = add_load_index(ctx, instrs, &column->node, index, loc))) - return false; + list_add_tail(instrs, &c->entry);
- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, i, &value->node))) - return false; - list_move_tail(instrs, &block.instrs); - } - - if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(index = hlsl_new_index(ctx, record, c, loc))) return false; - list_add_tail(instrs, &load->node.entry); + list_add_tail(instrs, &index->entry);
return true; }
-static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_ir_node *index, unsigned int dim_count, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_load *coords_load; - struct hlsl_deref coords_deref; - struct hlsl_ir_constant *zero; - struct hlsl_ir_store *store; - struct hlsl_ir_var *coords; - - if (!(coords = hlsl_new_synthetic_var(ctx, "coords", - hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count + 1), loc))) - return NULL; - - hlsl_init_simple_deref_from_var(&coords_deref, coords); - if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, index, (1u << dim_count) - 1, loc))) - return NULL; - list_add_tail(instrs, &store->node.entry); - - if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) - return NULL; - list_add_tail(instrs, &zero->node.entry); - - if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, &zero->node, 1u << dim_count, loc))) - return NULL; - list_add_tail(instrs, &store->node.entry); - - if (!(coords_load = hlsl_new_var_load(ctx, coords, *loc))) - return NULL; - list_add_tail(instrs, &coords_load->node.entry); - - return &coords_load->node; -} +static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc);
-static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, +static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) { const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; - struct hlsl_ir_expr *cast; + struct hlsl_ir_node *return_index, *cast;
- if (expr_type->type == HLSL_CLASS_OBJECT + if (expr_type->class == HLSL_CLASS_OBJECT && (expr_type->base_type == HLSL_TYPE_TEXTURE || expr_type->base_type == HLSL_TYPE_UAV) && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) { - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; unsigned int dim_count = hlsl_sampler_dim_count(expr_type->sampler_dim); - /* Only HLSL_IR_LOAD can return an object. */ - struct hlsl_ir_load *object_load = hlsl_ir_load(array); - struct hlsl_ir_resource_load *resource_load;
- if (index_type->type > HLSL_CLASS_VECTOR || index_type->dimx != dim_count) + if (index_type->class > HLSL_CLASS_VECTOR || index_type->dimx != dim_count) { struct vkd3d_string_buffer *string;
@@ -814,20 +735,14 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc))) return false;
- if (!(index = add_zero_mipmap_level(ctx, instrs, index, dim_count, loc))) + if (!(return_index = hlsl_new_index(ctx, array, index, loc))) return false; + list_add_tail(instrs, &return_index->entry);
- load_params.format = expr_type->e.resource_format; - load_params.resource = object_load->src; - load_params.coords = index; - - if (!(resource_load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &resource_load->node.entry); return true; }
- if (index_type->type != HLSL_CLASS_SCALAR) + if (index_type->class != HLSL_CLASS_SCALAR) { hlsl_error(ctx, &index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Array index is not scalar."); return false; @@ -835,23 +750,21 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls
if (!(cast = hlsl_new_cast(ctx, index, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &index->loc))) return false; - list_add_tail(instrs, &cast->node.entry); - index = &cast->node; + list_add_tail(instrs, &cast->entry); + index = cast;
- if (expr_type->type == HLSL_CLASS_MATRIX) - return add_matrix_index(ctx, instrs, array, index, loc); - - if (expr_type->type != HLSL_CLASS_ARRAY && expr_type->type != HLSL_CLASS_VECTOR) + if (expr_type->class != HLSL_CLASS_ARRAY && expr_type->class != HLSL_CLASS_VECTOR && expr_type->class != HLSL_CLASS_MATRIX) { - if (expr_type->type == HLSL_CLASS_SCALAR) + if (expr_type->class == HLSL_CLASS_SCALAR) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, "Scalar expressions cannot be array-indexed."); else hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, "Expression cannot be array-indexed."); return false; }
- if (!add_load_index(ctx, instrs, array, index, loc)) + if (!(return_index = hlsl_new_index(ctx, array, index, loc))) return false; + list_add_tail(instrs, &return_index->entry);
return true; } @@ -877,12 +790,12 @@ static struct hlsl_type *apply_type_modifiers(struct hlsl_ctx *ctx, struct hlsl_
if (!(*modifiers & HLSL_MODIFIERS_MAJORITY_MASK) && !(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK) - && type->type == HLSL_CLASS_MATRIX) + && type->class == HLSL_CLASS_MATRIX) { if (!(default_majority = ctx->matrix_majority) && force_majority) default_majority = HLSL_MODIFIER_COLUMN_MAJOR; } - else if (type->type != HLSL_CLASS_MATRIX && (*modifiers & HLSL_MODIFIERS_MAJORITY_MASK)) + else if (type->class != HLSL_CLASS_MATRIX && (*modifiers & HLSL_MODIFIERS_MAJORITY_MASK)) { hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "'row_major' and 'column_major' modifiers are only allowed for matrices."); @@ -923,7 +836,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, struct parse_variable_def *v, *v_next; size_t i = 0;
- if (type->type == HLSL_CLASS_MATRIX) + if (type->class == HLSL_CLASS_MATRIX) assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
memset(fields, 0, sizeof(*fields)); @@ -939,7 +852,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields,
field->type = type;
- if (shader_is_sm_5_1(ctx) && type->type == HLSL_CLASS_OBJECT) + if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) { for (k = 0; k < v->arrays.count; ++k) unbounded_res_array |= (v->arrays.sizes[k] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); @@ -983,6 +896,9 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Illegal initializer on a struct field."); free_parse_initializer(&v->initializer); } + if (v->reg_reservation.offset_type) + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is not allowed inside struct definitions."); vkd3d_free(v); } vkd3d_free(defs); @@ -1052,18 +968,23 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, }
static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, - struct parse_parameter *param, const struct vkd3d_shader_location loc) + struct parse_parameter *param, const struct vkd3d_shader_location *loc) { struct hlsl_ir_var *var;
- if (param->type->type == HLSL_CLASS_MATRIX) + if (param->type->class == HLSL_CLASS_MATRIX) assert(param->type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
if ((param->modifiers & HLSL_STORAGE_OUT) && (param->modifiers & HLSL_STORAGE_UNIFORM)) - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Parameter '%s' is declared as both "out" and "uniform".", param->name);
- if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, ¶m->reg_reservation))) + if (param->reg_reservation.offset_type) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is not allowed on function parameters."); + + if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, + ¶m->reg_reservation))) return false; var->is_param = 1;
@@ -1084,12 +1005,61 @@ static struct hlsl_reg_reservation parse_reg_reservation(const char *reg_string) { struct hlsl_reg_reservation reservation = {0};
- if (!sscanf(reg_string + 1, "%u", &reservation.index)) + if (!sscanf(reg_string + 1, "%u", &reservation.reg_index)) { FIXME("Unsupported register reservation syntax.\n"); return reservation; } - reservation.type = reg_string[0]; + reservation.reg_type = ascii_tolower(reg_string[0]); + return reservation; +} + +static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const char *reg_string, + const char *swizzle, const struct vkd3d_shader_location *loc) +{ + struct hlsl_reg_reservation reservation = {0}; + char *endptr; + + if (ctx->profile->major_version < 4) + return reservation; + + reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); + if (*endptr) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid packoffset() syntax."); + return reservation; + } + + reservation.offset_type = ascii_tolower(reg_string[0]); + if (reservation.offset_type != 'c') + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Only 'c' registers are allowed in packoffset()."); + return reservation; + } + + reservation.offset_index *= 4; + + if (swizzle) + { + if (strlen(swizzle) != 1) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid packoffset() component "%s".", swizzle); + + if (swizzle[0] == 'x' || swizzle[0] == 'r') + reservation.offset_index += 0; + else if (swizzle[0] == 'y' || swizzle[0] == 'g') + reservation.offset_index += 1; + else if (swizzle[0] == 'z' || swizzle[0] == 'b') + reservation.offset_index += 2; + else if (swizzle[0] == 'w' || swizzle[0] == 'a') + reservation.offset_index += 3; + else + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid packoffset() component "%s".", swizzle); + } + return reservation; }
@@ -1122,53 +1092,37 @@ static struct list *make_list(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) return list; }
-static unsigned int evaluate_static_expression(struct hlsl_ir_node *node) +static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct vkd3d_shader_location *loc) { - if (node->data_type->type != HLSL_CLASS_SCALAR) + struct hlsl_ir_constant *constant; + struct hlsl_ir_node *node; + unsigned int ret = 0; + bool progress; + + if (!add_implicit_conversion(ctx, &block->instrs, node_from_list(&block->instrs), + hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) return 0;
- switch (node->type) + do { - case HLSL_IR_CONSTANT: - { - struct hlsl_ir_constant *constant = hlsl_ir_constant(node); - const union hlsl_constant_value *value = &constant->value[0]; - - switch (constant->node.data_type->base_type) - { - case HLSL_TYPE_UINT: - return value->u; - case HLSL_TYPE_INT: - return value->i; - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return value->f; - case HLSL_TYPE_DOUBLE: - return value->d; - case HLSL_TYPE_BOOL: - return !!value->u; - default: - vkd3d_unreachable(); - } - } - - case HLSL_IR_EXPR: - case HLSL_IR_LOAD: - case HLSL_IR_RESOURCE_LOAD: - case HLSL_IR_SWIZZLE: - FIXME("Unhandled type %s.\n", hlsl_node_type_to_string(node->type)); - return 0; + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); + progress |= hlsl_copy_propagation_execute(ctx, block); + } while (progress);
- case HLSL_IR_CALL: - case HLSL_IR_IF: - case HLSL_IR_JUMP: - case HLSL_IR_LOOP: - case HLSL_IR_RESOURCE_STORE: - case HLSL_IR_STORE: - vkd3d_unreachable(); + node = node_from_list(&block->instrs); + if (node->type == HLSL_IR_CONSTANT) + { + constant = hlsl_ir_constant(node); + ret = constant->value.u[0].u; + } + else + { + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Failed to evaluate constant expression %d.", node->type); }
- vkd3d_unreachable(); + return ret; }
static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) @@ -1180,20 +1134,20 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1)) return true;
- if (t1->type == HLSL_CLASS_VECTOR && t2->type == HLSL_CLASS_VECTOR) + if (t1->class == HLSL_CLASS_VECTOR && t2->class == HLSL_CLASS_VECTOR) return true;
- if (t1->type == HLSL_CLASS_MATRIX || t2->type == HLSL_CLASS_MATRIX) + if (t1->class == HLSL_CLASS_MATRIX || t2->class == HLSL_CLASS_MATRIX) { /* Matrix-vector conversion is apparently allowed if either they have the same components count or the matrix is nx1 or 1xn */ - if (t1->type == HLSL_CLASS_VECTOR || t2->type == HLSL_CLASS_VECTOR) + if (t1->class == HLSL_CLASS_VECTOR || t2->class == HLSL_CLASS_VECTOR) { if (hlsl_type_component_count(t1) == hlsl_type_component_count(t2)) return true;
- return (t1->type == HLSL_CLASS_MATRIX && (t1->dimx == 1 || t1->dimy == 1)) - || (t2->type == HLSL_CLASS_MATRIX && (t2->dimx == 1 || t2->dimy == 1)); + return (t1->class == HLSL_CLASS_MATRIX && (t1->dimx == 1 || t1->dimy == 1)) + || (t2->class == HLSL_CLASS_MATRIX && (t2->dimx == 1 || t2->dimy == 1)); }
/* Both matrices */ @@ -1226,7 +1180,7 @@ static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hl static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct hlsl_type *t2, const struct vkd3d_shader_location *loc, enum hlsl_type_class *type, unsigned int *dimx, unsigned int *dimy) { - if (t1->type > HLSL_CLASS_LAST_NUMERIC) + if (t1->class > HLSL_CLASS_LAST_NUMERIC) { struct vkd3d_string_buffer *string;
@@ -1237,7 +1191,7 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct return false; }
- if (t2->type > HLSL_CLASS_LAST_NUMERIC) + if (t2->class > HLSL_CLASS_LAST_NUMERIC) { struct vkd3d_string_buffer *string;
@@ -1264,17 +1218,17 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct
if (t1->dimx == 1 && t1->dimy == 1) { - *type = t2->type; + *type = t2->class; *dimx = t2->dimx; *dimy = t2->dimy; } else if (t2->dimx == 1 && t2->dimy == 1) { - *type = t1->type; + *type = t1->class; *dimx = t1->dimx; *dimy = t1->dimy; } - else if (t1->type == HLSL_CLASS_MATRIX && t2->type == HLSL_CLASS_MATRIX) + else if (t1->class == HLSL_CLASS_MATRIX && t2->class == HLSL_CLASS_MATRIX) { *type = HLSL_CLASS_MATRIX; *dimx = min(t1->dimx, t2->dimx); @@ -1284,13 +1238,13 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct { if (t1->dimx * t1->dimy <= t2->dimx * t2->dimy) { - *type = t1->type; + *type = t1->class; *dimx = t1->dimx; *dimy = t1->dimy; } else { - *type = t2->type; + *type = t2->class; *dimx = t2->dimx; *dimy = t2->dimy; } @@ -1306,55 +1260,50 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *expr; unsigned int i;
- if (type->type == HLSL_CLASS_MATRIX) + if (type->class == HLSL_CLASS_MATRIX) { - struct hlsl_type *vector_type; + struct hlsl_type *scalar_type; + struct hlsl_ir_load *var_load; struct hlsl_deref var_deref; - struct hlsl_ir_load *load; + struct hlsl_ir_node *load; struct hlsl_ir_var *var;
- vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); + scalar_type = hlsl_get_scalar_type(ctx, type->base_type);
if (!(var = hlsl_new_synthetic_var(ctx, "split_op", type, loc))) return NULL; hlsl_init_simple_deref_from_var(&var_deref, var);
- for (i = 0; i < hlsl_type_major_size(type); ++i) + for (i = 0; i < type->dimy * type->dimx; ++i) { - struct hlsl_ir_node *value, *vector_operands[HLSL_MAX_OPERANDS] = { NULL }; - struct hlsl_ir_store *store; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; + struct hlsl_block block; unsigned int j;
- if (!(c = hlsl_new_uint_constant(ctx, i, loc))) - return NULL; - list_add_tail(instrs, &c->node.entry); - for (j = 0; j < HLSL_MAX_OPERANDS; j++) { if (operands[j]) { - struct hlsl_ir_load *load; - - if (!(load = add_load_index(ctx, instrs, operands[j], &c->node, loc))) + if (!(load = add_load_component(ctx, instrs, operands[j], i, loc))) return NULL; - vector_operands[j] = &load->node; + + cell_operands[j] = load; } }
- if (!(value = add_expr(ctx, instrs, op, vector_operands, vector_type, loc))) + if (!(value = add_expr(ctx, instrs, op, cell_operands, scalar_type, loc))) return NULL;
- if (!(store = hlsl_new_store_index(ctx, &var_deref, &c->node, value, 0, loc))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, i, value)) return NULL; - list_add_tail(instrs, &store->node.entry); + list_move_tail(instrs, &block.instrs); }
- if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return NULL; - list_add_tail(instrs, &load->node.entry); + list_add_tail(instrs, &var_load->node.entry);
- return &load->node; + return &var_load->node; }
if (!(expr = hlsl_new_expr(ctx, op, operands, type, loc))) @@ -1407,7 +1356,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; struct hlsl_type *bool_type;
- bool_type = hlsl_get_numeric_type(ctx, arg->data_type->type, HLSL_TYPE_BOOL, + bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, arg->data_type->dimx, arg->data_type->dimy);
if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) @@ -1416,20 +1365,27 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct return add_expr(ctx, instrs, op, args, bool_type, loc); }
-static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) +static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *arg1, + const struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { - struct hlsl_type *common_type; enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); enum hlsl_type_class type; unsigned int dimx, dimy; - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0};
if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) return NULL;
- common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); +} + +static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *common_type; + + common_type = get_common_numeric_type(ctx, arg1, arg2, loc);
if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) return NULL; @@ -1441,13 +1397,13 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str }
static struct list *add_binary_arithmetic_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, struct vkd3d_shader_location loc) + enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2);
list_move_tail(list1, list2); vkd3d_free(list2); - add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, &loc); + add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, loc); return list1; }
@@ -1499,13 +1455,13 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str }
static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location loc) + enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2);
list_move_tail(list1, list2); vkd3d_free(list2); - add_binary_comparison_expr(ctx, list1, op, arg1, arg2, &loc); + add_binary_comparison_expr(ctx, list1, op, arg1, arg2, loc); return list1; }
@@ -1596,7 +1552,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis enum hlsl_ir_expr_op op; unsigned dim;
- if (arg1->data_type->type == HLSL_CLASS_MATRIX) + if (arg1->data_type->class == HLSL_CLASS_MATRIX) { struct vkd3d_string_buffer *string;
@@ -1607,7 +1563,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis return NULL; }
- if (arg2->data_type->type == HLSL_CLASS_MATRIX) + if (arg2->data_type->class == HLSL_CLASS_MATRIX) { struct vkd3d_string_buffer *string;
@@ -1618,9 +1574,9 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis return NULL; }
- if (arg1->data_type->type == HLSL_CLASS_SCALAR) + if (arg1->data_type->class == HLSL_CLASS_SCALAR) dim = arg2->data_type->dimx; - else if (arg2->data_type->type == HLSL_CLASS_SCALAR) + else if (arg2->data_type->class == HLSL_CLASS_SCALAR) dim = arg1->data_type->dimx; else dim = min(arg1->data_type->dimx, arg2->data_type->dimx); @@ -1702,7 +1658,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) { struct hlsl_type *lhs_type = lhs->data_type; - struct hlsl_ir_expr *copy; + struct hlsl_ir_node *copy; unsigned int writemask = 0;
if (assign_op == ASSIGN_OP_SUB) @@ -1720,13 +1676,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in return NULL; }
- if (lhs_type->type <= HLSL_CLASS_LAST_NUMERIC) + if (lhs_type->class <= HLSL_CLASS_LAST_NUMERIC) writemask = (1 << lhs_type->dimx) - 1;
if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) return NULL;
- while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_RESOURCE_LOAD) + while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX) { if (lhs->type == HLSL_IR_EXPR && hlsl_ir_expr(lhs)->op == HLSL_OP1_CAST) { @@ -1735,10 +1691,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in } else if (lhs->type == HLSL_IR_SWIZZLE) { - struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs), *new_swizzle; + struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); unsigned int width, s = swizzle->swizzle; + struct hlsl_ir_node *new_swizzle;
- if (lhs->data_type->type == HLSL_CLASS_MATRIX) + if (lhs->data_type->class == HLSL_CLASS_MATRIX) hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask.");
if (!invert_swizzle(&s, &writemask, &width)) @@ -1751,10 +1708,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in { return NULL; } - list_add_tail(instrs, &new_swizzle->node.entry); + list_add_tail(instrs, &new_swizzle->entry);
lhs = swizzle->val.node; - rhs = &new_swizzle->node; + rhs = new_swizzle; } else { @@ -1763,18 +1720,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in } }
- if (lhs->type == HLSL_IR_RESOURCE_LOAD) + if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_resource_access(hlsl_ir_index(lhs))) { - struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(lhs); - struct hlsl_ir_resource_store *store; + struct hlsl_ir_node *coords = hlsl_ir_index(lhs)->idx.node; + struct hlsl_deref resource_deref; struct hlsl_type *resource_type; - struct hlsl_ir_swizzle *coords; + struct hlsl_ir_node *store; unsigned int dim_count;
- /* Such an lvalue was produced by an index expression. */ - assert(load->load_type == HLSL_RESOURCE_LOAD); - resource_type = hlsl_deref_get_type(ctx, &load->resource); - assert(resource_type->type == HLSL_CLASS_OBJECT); + if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, hlsl_ir_index(lhs)->val.node)) + return NULL; + + resource_type = hlsl_deref_get_type(ctx, &resource_deref); + assert(resource_type->class == HLSL_CLASS_OBJECT); assert(resource_type->base_type == HLSL_TYPE_TEXTURE || resource_type->base_type == HLSL_TYPE_UAV);
if (resource_type->base_type != HLSL_TYPE_UAV) @@ -1787,25 +1745,70 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Resource store expressions must write to all components.");
- /* Remove the (implicit) mipmap level from the load expression. */ - assert(load->coords.node->data_type->type == HLSL_CLASS_VECTOR); - assert(load->coords.node->data_type->base_type == HLSL_TYPE_UINT); - assert(load->coords.node->data_type->dimx == dim_count + 1); - if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dim_count, load->coords.node, &lhs->loc))) - return NULL; - list_add_tail(instrs, &coords->node.entry); + assert(coords->data_type->class == HLSL_CLASS_VECTOR); + assert(coords->data_type->base_type == HLSL_TYPE_UINT); + assert(coords->data_type->dimx == dim_count);
- if (!(store = hlsl_new_resource_store(ctx, &load->resource, &coords->node, rhs, &lhs->loc))) + if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) + { + hlsl_cleanup_deref(&resource_deref); return NULL; - list_add_tail(instrs, &store->node.entry); + } + list_add_tail(instrs, &store->entry); + hlsl_cleanup_deref(&resource_deref); + } + else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) + { + struct hlsl_ir_index *row = hlsl_ir_index(lhs); + struct hlsl_ir_node *mat = row->val.node; + unsigned int i, k = 0; + + for (i = 0; i < mat->data_type->dimx; ++i) + { + struct hlsl_ir_node *cell, *load, *store, *c; + struct hlsl_deref deref; + + if (!(writemask & (1 << i))) + continue; + + if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) + return NULL; + list_add_tail(instrs, &c->entry); + + if (!(cell = hlsl_new_index(ctx, &row->node, c, &lhs->loc))) + return NULL; + list_add_tail(instrs, &cell->entry); + + if (!(load = add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) + return NULL; + + if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) + return NULL; + + if (!(store = hlsl_new_store_index(ctx, &deref, NULL, load, 0, &rhs->loc))) + { + hlsl_cleanup_deref(&deref); + return NULL; + } + list_add_tail(instrs, &store->entry); + hlsl_cleanup_deref(&deref); + } } else { - struct hlsl_ir_store *store; + struct hlsl_ir_node *store; + struct hlsl_deref deref;
- if (!(store = hlsl_new_store_index(ctx, &hlsl_ir_load(lhs)->src, NULL, rhs, writemask, &rhs->loc))) + if (!hlsl_init_deref_from_index_chain(ctx, &deref, lhs)) return NULL; - list_add_tail(instrs, &store->node.entry); + + if (!(store = hlsl_new_store_index(ctx, &deref, NULL, rhs, writemask, &rhs->loc))) + { + hlsl_cleanup_deref(&deref); + return NULL; + } + list_add_tail(instrs, &store->entry); + hlsl_cleanup_deref(&deref); }
/* Don't use the instruction itself as a source, as this makes structure @@ -1813,37 +1816,37 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in * the last instruction in the list, we do need to copy. */ if (!(copy = hlsl_new_copy(ctx, rhs))) return NULL; - list_add_tail(instrs, ©->node.entry); - return ©->node; + list_add_tail(instrs, ©->entry); + return copy; }
static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrement, bool post, - struct vkd3d_shader_location loc) + const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *lhs = node_from_list(instrs); - struct hlsl_ir_constant *one; + struct hlsl_ir_node *one;
if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Argument to %s%screment operator is const.", post ? "post" : "pre", decrement ? "de" : "in");
- if (!(one = hlsl_new_int_constant(ctx, 1, &loc))) + if (!(one = hlsl_new_int_constant(ctx, 1, loc))) return false; - list_add_tail(instrs, &one->node.entry); + list_add_tail(instrs, &one->entry);
- if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, &one->node)) + if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) return false;
if (post) { - struct hlsl_ir_expr *copy; + struct hlsl_ir_node *copy;
if (!(copy = hlsl_new_copy(ctx, lhs))) return false; - list_add_tail(instrs, ©->node.entry); + list_add_tail(instrs, ©->entry);
/* Post increment/decrement expressions are considered const. */ - if (!(copy->node.data_type = hlsl_type_clone(ctx, copy->node.data_type, 0, HLSL_MODIFIER_CONST))) + if (!(copy->data_type = hlsl_type_clone(ctx, copy->data_type, 0, HLSL_MODIFIER_CONST))) return false; }
@@ -1861,10 +1864,8 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs,
for (k = 0; k < src_comp_count; ++k) { + struct hlsl_ir_node *conv, *load; struct hlsl_type *dst_comp_type; - struct hlsl_ir_store *store; - struct hlsl_ir_load *load; - struct hlsl_ir_node *conv; struct hlsl_block block;
if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) @@ -1872,10 +1873,10 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs,
dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index);
- if (!(conv = add_implicit_conversion(ctx, instrs, &load->node, dst_comp_type, &src->loc))) + if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) return;
- if (!(store = hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv))) + if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) return; list_move_tail(instrs, &block.instrs);
@@ -1885,12 +1886,12 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs,
static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_struct) { - if (type->type == HLSL_CLASS_OBJECT) + if (type->class == HLSL_CLASS_OBJECT) return !must_be_in_struct; - if (type->type == HLSL_CLASS_ARRAY) + if (type->class == HLSL_CLASS_ARRAY) return type_has_object_components(type->e.array.type, must_be_in_struct);
- if (type->type == HLSL_CLASS_STRUCT) + if (type->class == HLSL_CLASS_STRUCT) { unsigned int i;
@@ -1905,12 +1906,12 @@ static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_s
static bool type_has_numeric_components(struct hlsl_type *type) { - if (type->type <= HLSL_CLASS_LAST_NUMERIC) + if (type->class <= HLSL_CLASS_LAST_NUMERIC) return true; - if (type->type == HLSL_CLASS_ARRAY) + if (type->class == HLSL_CLASS_ARRAY) return type_has_numeric_components(type->e.array.type);
- if (type->type == HLSL_CLASS_STRUCT) + if (type->class == HLSL_CLASS_STRUCT) { unsigned int i;
@@ -1934,7 +1935,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t struct hlsl_type *type; bool local = true;
- if (basic_type->type == HLSL_CLASS_MATRIX) + if (basic_type->class == HLSL_CLASS_MATRIX) assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
if (!(statements_list = make_empty_list(ctx))) @@ -1966,7 +1967,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t
type = basic_type;
- if (shader_is_sm_5_1(ctx) && type->type == HLSL_CLASS_OBJECT) + if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) { for (i = 0; i < v->arrays.count; ++i) unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); @@ -2035,7 +2036,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t } vkd3d_free(v->arrays.sizes);
- if (!(var = hlsl_new_var(ctx, v->name, type, v->loc, &v->semantic, modifiers, &v->reg_reservation))) + if (!(var = hlsl_new_var(ctx, v->name, type, &v->loc, &v->semantic, modifiers, &v->reg_reservation))) { free_parse_variable_def(v); continue; @@ -2043,6 +2044,13 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t
var->buffer = ctx->cur_buffer;
+ if (var->buffer == ctx->globals_buffer) + { + if (var->reg_reservation.offset_type) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is only allowed inside constant buffer declarations."); + } + if (ctx->cur_scope == ctx->globals) { local = false; @@ -2148,7 +2156,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t } else { - struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, var->loc); + struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc);
assert(v->initializer.args_count == 1); list_add_tail(v->initializer.instrs, &load->node.entry); @@ -2156,7 +2164,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t }
if (modifiers & HLSL_STORAGE_STATIC) - list_move_tail(&ctx->static_initializers, v->initializer.instrs); + list_move_tail(&ctx->static_initializers.instrs, v->initializer.instrs); else list_move_tail(statements_list, v->initializer.instrs); vkd3d_free(v->initializer.args); @@ -2164,9 +2172,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t } else if (var->storage_modifiers & HLSL_STORAGE_STATIC) { - struct hlsl_ir_constant *zero; - struct hlsl_ir_store *store; - struct hlsl_ir_node *cast; + struct hlsl_ir_node *cast, *store, *zero;
/* Initialize statics to zero by default. */
@@ -2181,9 +2187,9 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t vkd3d_free(v); continue; } - list_add_tail(&ctx->static_initializers, &zero->node.entry); + hlsl_block_add_instr(&ctx->static_initializers, zero);
- if (!(cast = add_cast(ctx, &ctx->static_initializers, &zero->node, var->data_type, &var->loc))) + if (!(cast = add_cast(ctx, &ctx->static_initializers.instrs, zero, var->data_type, &var->loc))) { vkd3d_free(v); continue; @@ -2194,7 +2200,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t vkd3d_free(v); continue; } - list_add_tail(&ctx->static_initializers, &store->node.entry); + hlsl_block_add_instr(&ctx->static_initializers, store); } vkd3d_free(v); } @@ -2279,7 +2285,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, if (type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF) return arg;
- type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); return add_implicit_conversion(ctx, params->instrs, arg, type, loc); }
@@ -2315,12 +2321,12 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx *
base = expr_common_base_type(base, arg_type->base_type);
- if (arg_type->type == HLSL_CLASS_VECTOR) + if (arg_type->class == HLSL_CLASS_VECTOR) { vectors = true; dimx = min(dimx, arg_type->dimx); } - else if (arg_type->type == HLSL_CLASS_MATRIX) + else if (arg_type->class == HLSL_CLASS_MATRIX) { matrices = true; dimx = min(dimx, arg_type->dimx); @@ -2369,7 +2375,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false;
- type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
return convert_args(ctx, params, type, loc); } @@ -2383,20 +2389,18 @@ static bool intrinsic_abs(struct hlsl_ctx *ctx, static bool intrinsic_all(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *arg = params->args[0], *mul; - struct hlsl_ir_constant *one, *zero; - struct hlsl_ir_load *load; + struct hlsl_ir_node *arg = params->args[0], *mul, *one, *zero, *load; unsigned int i, count;
if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) return false; - list_add_tail(params->instrs, &one->node.entry); + list_add_tail(params->instrs, &one->entry);
if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->node.entry); + list_add_tail(params->instrs, &zero->entry);
- mul = &one->node; + mul = one;
count = hlsl_type_component_count(arg->data_type); for (i = 0; i < count; ++i) @@ -2404,52 +2408,123 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) return false;
- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &load->node, mul, loc))) + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) return false; }
- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, &zero->node, loc); -} - -/* Find the type corresponding to the given source type, with the same - * dimensions but a different base type. */ -static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, - const struct hlsl_type *type, enum hlsl_base_type base_type) -{ - return hlsl_get_numeric_type(ctx, type->type, base_type, type->dimx, type->dimy); + return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); }
-static bool intrinsic_asuint(struct hlsl_ctx *ctx, +static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; - struct hlsl_type *data_type; + struct hlsl_ir_node *arg = params->args[0], *dot, *or, *zero, *bfalse, *load; + unsigned int i, count;
- if (params->args_count != 1 && params->args_count != 3) + if (arg->data_type->class != HLSL_CLASS_VECTOR && arg->data_type->class != HLSL_CLASS_SCALAR) { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to function 'asuint': expected 1 or 3, but got %u.", params->args_count); + hlsl_fixme(ctx, loc, "any() implementation for non-vector, non-scalar"); return false; }
- if (params->args_count == 3) + if (arg->data_type->base_type == HLSL_TYPE_FLOAT) { - hlsl_fixme(ctx, loc, "Double-to-integer conversion."); - return false; - } + if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) + return false; + list_add_tail(params->instrs, &zero->entry);
- data_type = params->args[0]->data_type; - if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) - { - struct vkd3d_string_buffer *string; + if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) + return false;
- if ((string = hlsl_type_to_string(ctx, data_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 0 of asuint(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", - string->buffer); - hlsl_release_string_buffer(ctx, string); + return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); } - data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT); + else if (arg->data_type->base_type == HLSL_TYPE_BOOL) + { + if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) + return false; + list_add_tail(params->instrs, &bfalse->entry); + + or = bfalse; + + count = hlsl_type_component_count(arg->data_type); + for (i = 0; i < count; ++i) + { + if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) + return false; + + if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) + return false; + } + + return true; + } + + hlsl_fixme(ctx, loc, "any() implementation for non-float, non-bool"); + return false; +} + +/* Find the type corresponding to the given source type, with the same + * dimensions but a different base type. */ +static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, + const struct hlsl_type *type, enum hlsl_base_type base_type) +{ + return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); +} + +static bool intrinsic_asfloat(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *data_type; + + data_type = params->args[0]->data_type; + if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, data_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong argument type of asfloat(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", + string->buffer); + hlsl_release_string_buffer(ctx, string); + } + data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_FLOAT); + + operands[0] = params->args[0]; + return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); +} + +static bool intrinsic_asuint(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *data_type; + + if (params->args_count != 1 && params->args_count != 3) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to function 'asuint': expected 1 or 3, but got %u.", params->args_count); + return false; + } + + if (params->args_count == 3) + { + hlsl_fixme(ctx, loc, "Double-to-integer conversion."); + return false; + } + + data_type = params->args[0]->data_type; + if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, data_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of asuint(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", + string->buffer); + hlsl_release_string_buffer(ctx, string); + } + data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT);
operands[0] = params->args[0]; return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); @@ -2483,7 +2558,7 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, static bool intrinsic_cross(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_swizzle *arg1_swzl1, *arg1_swzl2, *arg2_swzl1, *arg2_swzl2; + struct hlsl_ir_node *arg1_swzl1, *arg1_swzl2, *arg2_swzl1, *arg2_swzl2; struct hlsl_ir_node *arg1 = params->args[0], *arg2 = params->args[1]; struct hlsl_ir_node *arg1_cast, *arg2_cast, *mul1_neg, *mul1, *mul2; struct hlsl_type *cast_type; @@ -2504,35 +2579,55 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx,
if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) return false; - list_add_tail(params->instrs, &arg1_swzl1->node.entry); + list_add_tail(params->instrs, &arg1_swzl1->entry);
if (!(arg2_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc))) return false; - list_add_tail(params->instrs, &arg2_swzl1->node.entry); + list_add_tail(params->instrs, &arg2_swzl1->entry);
- if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, - &arg1_swzl1->node, &arg2_swzl1->node, loc))) + if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl1, arg2_swzl1, loc))) return false;
- if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, *loc))) + if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, loc))) return false; list_add_tail(params->instrs, &mul1_neg->entry);
if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) return false; - list_add_tail(params->instrs, &arg1_swzl2->node.entry); + list_add_tail(params->instrs, &arg1_swzl2->entry);
if (!(arg2_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc))) return false; - list_add_tail(params->instrs, &arg2_swzl2->node.entry); + list_add_tail(params->instrs, &arg2_swzl2->entry);
- if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, - &arg1_swzl2->node, &arg2_swzl2->node, loc))) + if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl2, arg2_swzl2, loc))) return false;
return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, mul2, mul1_neg, loc); }
+static bool intrinsic_ddx(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); +} + +static bool intrinsic_ddy(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); +} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2565,8 +2660,7 @@ static bool intrinsic_dot(struct hlsl_ctx *ctx, static bool intrinsic_exp(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *coeff; - struct hlsl_ir_node *arg, *mul; + struct hlsl_ir_node *arg, *mul, *coeff;
if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) return false; @@ -2574,9 +2668,9 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, /* 1/ln(2) */ if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) return false; - list_add_tail(params->instrs, &coeff->node.entry); + list_add_tail(params->instrs, &coeff->entry);
- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &coeff->node, params->args[0], loc))) + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) return false;
return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, mul, loc); @@ -2604,6 +2698,43 @@ static bool intrinsic_floor(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FLOOR, arg, loc); }
+static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *x, *y, *div, *abs, *frac, *neg_frac, *ge, *select, *zero; + static const struct hlsl_constant_value zero_value; + + if (!(x = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + if (!(y = intrinsic_float_convert_arg(ctx, params, params->args[1], loc))) + return false; + + if (!(div = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, x, y, loc))) + return false; + + if (!(zero = hlsl_new_constant(ctx, div->data_type, &zero_value, loc))) + return false; + list_add_tail(params->instrs, &zero->entry); + + if (!(abs = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, div, loc))) + return false; + + if (!(frac = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FRACT, abs, loc))) + return false; + + if (!(neg_frac = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, frac, loc))) + return false; + + if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, div, zero, loc))) + return false; + + if (!(select = hlsl_add_conditional(ctx, params->instrs, ge, frac, neg_frac))) + return false; + + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, select, y, loc); +} + static bool intrinsic_frac(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2635,7 +2766,7 @@ static bool intrinsic_length(struct hlsl_ctx *ctx, struct hlsl_type *type = params->args[0]->data_type; struct hlsl_ir_node *arg, *dot;
- if (type->type == HLSL_CLASS_MATRIX) + if (type->class == HLSL_CLASS_MATRIX) { struct vkd3d_string_buffer *string;
@@ -2692,20 +2823,18 @@ static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, static bool intrinsic_lit(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *n_l_neg, *n_h_neg, *specular_or, *specular_pow; - struct hlsl_ir_constant *init, *zero; - struct hlsl_ir_node *n_l, *n_h, *m; - struct hlsl_ir_node *diffuse; - struct hlsl_ir_store *store; + struct hlsl_ir_node *n_l_neg, *n_h_neg, *specular_or, *specular_pow, *load; + struct hlsl_ir_node *n_l, *n_h, *m, *diffuse, *zero, *store, *init; + struct hlsl_constant_value init_value; + struct hlsl_ir_load *var_load; struct hlsl_deref var_deref; struct hlsl_type *ret_type; - struct hlsl_ir_load *load; struct hlsl_ir_var *var; struct hlsl_block block;
- if (params->args[0]->data_type->type != HLSL_CLASS_SCALAR - || params->args[1]->data_type->type != HLSL_CLASS_SCALAR - || params->args[2]->data_type->type != HLSL_CLASS_SCALAR) + if (params->args[0]->data_type->class != HLSL_CLASS_SCALAR + || params->args[1]->data_type->class != HLSL_CLASS_SCALAR + || params->args[2]->data_type->class != HLSL_CLASS_SCALAR) { hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid argument type."); return false; @@ -2726,37 +2855,35 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, return false; hlsl_init_simple_deref_from_var(&var_deref, var);
- if (!(init = hlsl_new_constant(ctx, ret_type, loc))) + init_value.u[0].f = 1.0f; + init_value.u[1].f = 0.0f; + init_value.u[2].f = 0.0f; + init_value.u[3].f = 1.0f; + if (!(init = hlsl_new_constant(ctx, ret_type, &init_value, loc))) return false; - init->value[0].f = 1.0f; - init->value[1].f = 0.0f; - init->value[2].f = 0.0f; - init->value[3].f = 1.0f; - list_add_tail(params->instrs, &init->node.entry); + list_add_tail(params->instrs, &init->entry);
- if (!(store = hlsl_new_simple_store(ctx, var, &init->node))) + if (!(store = hlsl_new_simple_store(ctx, var, init))) return false; - list_add_tail(params->instrs, &store->node.entry); + list_add_tail(params->instrs, &store->entry);
if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->node.entry); + list_add_tail(params->instrs, &zero->entry);
/* Diffuse component. */ - if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, &zero->node, loc))) + if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, zero, loc))) return false;
- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse)) return false; list_move_tail(params->instrs, &block.instrs);
/* Specular component. */ - if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, - n_h, &zero->node, loc))) + if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_h, zero, loc))) return false;
- if (!(n_l_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, - n_l, &zero->node, loc))) + if (!(n_l_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_l, zero, loc))) return false;
if (!(specular_or = add_binary_logical_expr(ctx, params->instrs, HLSL_OP2_LOGIC_OR, n_l_neg, n_h_neg, loc))) @@ -2765,20 +2892,67 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, if (!(specular_pow = add_pow_expr(ctx, params->instrs, n_h, m, loc))) return false;
- if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, &zero->node, specular_pow))) + if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, zero, specular_pow))) return false;
- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, 2, &load->node))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, 2, load)) return false; list_move_tail(params->instrs, &block.instrs);
- if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &load->node.entry); + list_add_tail(params->instrs, &var_load->node.entry);
return true; }
+static bool intrinsic_log(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *log, *arg, *coeff; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) + return false; + + /* ln(2) */ + if (!(coeff = hlsl_new_float_constant(ctx, 0.69314718055f, loc))) + return false; + + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, log, coeff, loc); +} + +static bool intrinsic_log10(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *log, *arg, *coeff; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) + return false; + + /* 1 / log2(10) */ + if (!(coeff = hlsl_new_float_constant(ctx, 0.301029996f, loc))) + return false; + + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, log, coeff, loc); +} + +static bool intrinsic_log2(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc); +} + static bool intrinsic_max(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2808,15 +2982,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, struct hlsl_ir_load *load; struct hlsl_ir_var *var;
- if (arg1->data_type->type == HLSL_CLASS_SCALAR || arg2->data_type->type == HLSL_CLASS_SCALAR) + if (arg1->data_type->class == HLSL_CLASS_SCALAR || arg2->data_type->class == HLSL_CLASS_SCALAR) return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1, arg2, loc);
- if (arg1->data_type->type == HLSL_CLASS_VECTOR) + if (arg1->data_type->class == HLSL_CLASS_VECTOR) { vect_count++; cast_type1 = hlsl_get_matrix_type(ctx, base, arg1->data_type->dimx, 1); } - if (arg2->data_type->type == HLSL_CLASS_VECTOR) + if (arg2->data_type->class == HLSL_CLASS_VECTOR) { vect_count++; cast_type2 = hlsl_get_matrix_type(ctx, base, 1, arg2->data_type->dimx); @@ -2854,13 +3028,11 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, for (j = 0; j < matrix_type->dimy; ++j) { struct hlsl_ir_node *instr = NULL; - struct hlsl_ir_store *store; struct hlsl_block block;
for (k = 0; k < cast_type1->dimx && k < cast_type2->dimy; ++k) { - struct hlsl_ir_load *value1, *value2; - struct hlsl_ir_node *mul; + struct hlsl_ir_node *value1, *value2, *mul;
if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) return false; @@ -2868,7 +3040,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) return false;
- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &value1->node, &value2->node, loc))) + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) return false;
if (instr) @@ -2882,13 +3054,13 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, } }
- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) return false; list_move_tail(params->instrs, &block.instrs); } }
- if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(load = hlsl_new_var_load(ctx, var, loc))) return false; list_add_tail(params->instrs, &load->node.entry);
@@ -2901,7 +3073,7 @@ static bool intrinsic_normalize(struct hlsl_ctx *ctx, struct hlsl_type *type = params->args[0]->data_type; struct hlsl_ir_node *dot, *rsq, *arg;
- if (type->type == HLSL_CLASS_MATRIX) + if (type->class == HLSL_CLASS_MATRIX) { struct vkd3d_string_buffer *string;
@@ -2986,6 +3158,42 @@ static bool intrinsic_saturate(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, arg, loc); }
+static bool intrinsic_sign(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *lt, *neg, *op1, *op2, *zero, *arg = params->args[0]; + static const struct hlsl_constant_value zero_value; + + struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, + arg->data_type->dimx, arg->data_type->dimy); + + if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) + return false; + list_add_tail(params->instrs, &zero->entry); + + /* Check if 0 < arg, cast bool to int */ + + if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, zero, arg, loc))) + return false; + + if (!(op1 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) + return false; + + /* Check if arg < 0, cast bool to int and invert (meaning true is -1) */ + + if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, arg, zero, loc))) + return false; + + if (!(op2 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) + return false; + + if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, op2, loc))) + return false; + + /* Adding these two together will make 1 when > 0, -1 when < 0, and 0 when neither */ + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, neg, op1, loc); +} + static bool intrinsic_sin(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3001,8 +3209,7 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *min_arg, *max_arg, *x_arg, *p, *p_num, *p_denom, *res; - struct hlsl_ir_constant *one, *minus_two, *three; + struct hlsl_ir_node *min_arg, *max_arg, *x_arg, *p, *p_num, *p_denom, *res, *one, *minus_two, *three;
if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; @@ -3022,9 +3229,9 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx,
if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) return false; - list_add_tail(params->instrs, &one->node.entry); + list_add_tail(params->instrs, &one->entry);
- if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, &one->node, p_denom, loc))) + if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, one, p_denom, loc))) return false;
if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p_num, p_denom, loc))) @@ -3035,16 +3242,16 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx,
if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) return false; - list_add_tail(params->instrs, &minus_two->node.entry); + list_add_tail(params->instrs, &minus_two->entry);
if (!(three = hlsl_new_float_constant(ctx, 3.0, loc))) return false; - list_add_tail(params->instrs, &three->node.entry); + list_add_tail(params->instrs, &three->entry);
- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &minus_two->node, p, loc))) + if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, minus_two, p, loc))) return false;
- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, &three->node, res, loc))) + if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, three, res, loc))) return false;
if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, p, loc))) @@ -3081,7 +3288,7 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, return false;
type = ge->data_type; - type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); }
@@ -3090,9 +3297,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * { struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; const struct hlsl_type *sampler_type; - struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *sampler_load; - struct hlsl_ir_node *coords; + struct hlsl_ir_node *coords, *load;
if (params->args_count != 2 && params->args_count != 4) { @@ -3107,7 +3312,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * }
sampler_type = params->args[0]->data_type; - if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) { struct vkd3d_string_buffer *string; @@ -3118,24 +3323,19 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * name, ctx->builtin_types.sampler[dim]->name, string->buffer); hlsl_release_string_buffer(ctx, string); } - else - { - /* Only HLSL_IR_LOAD can return an object. */ - sampler_load = hlsl_ir_load(params->args[0]); - - load_params.resource = sampler_load->src; - }
if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) coords = params->args[1];
load_params.coords = coords; + load_params.resource = params->args[0]; load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); + load_params.sampling_dim = dim;
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(params->instrs, &load->node.entry); + list_add_tail(params->instrs, &load->entry); return true; }
@@ -3156,13 +3356,14 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { struct hlsl_ir_node *arg = params->args[0]; struct hlsl_type *arg_type = arg->data_type; + struct hlsl_ir_load *var_load; struct hlsl_deref var_deref; struct hlsl_type *mat_type; - struct hlsl_ir_load *load; + struct hlsl_ir_node *load; struct hlsl_ir_var *var; unsigned int i, j;
- if (arg_type->type != HLSL_CLASS_SCALAR && arg_type->type != HLSL_CLASS_MATRIX) + if (arg_type->class != HLSL_CLASS_SCALAR && arg_type->class != HLSL_CLASS_MATRIX) { struct vkd3d_string_buffer *string;
@@ -3174,7 +3375,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, return false; }
- if (arg_type->type == HLSL_CLASS_SCALAR) + if (arg_type->class == HLSL_CLASS_SCALAR) { list_add_tail(params->instrs, &arg->entry); return true; @@ -3190,21 +3391,75 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { for (j = 0; j < arg_type->dimy; ++j) { - struct hlsl_ir_store *store; struct hlsl_block block;
if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) return false;
- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, &load->node))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) return false; list_move_tail(params->instrs, &block.instrs); } }
- if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &load->node.entry); + list_add_tail(params->instrs, &var_load->node.entry); + + return true; +} + +static bool intrinsic_trunc(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, arg, loc); +} + +static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg = params->args[0], *ret, *c, *swizzle; + struct hlsl_type *arg_type = arg->data_type; + + if (arg_type->class != HLSL_CLASS_SCALAR && !(arg_type->class == HLSL_CLASS_VECTOR && arg_type->dimx == 4)) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, arg_type))) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Wrong argument type '%s'.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } + + return false; + } + + if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) + return false; + + if (!(c = hlsl_new_float_constant(ctx, 255.0f + (0.5f / 256.0f), loc))) + return false; + list_add_tail(params->instrs, &c->entry); + + if (arg_type->class == HLSL_CLASS_VECTOR) + { + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc))) + return false; + list_add_tail(params->instrs, &swizzle->entry); + + arg = swizzle; + } + + if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) + return false; + + if (ctx->profile->major_version >= 4) + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc);
return true; } @@ -3220,22 +3475,31 @@ static const struct intrinsic_function intrinsic_functions[] = { /* Note: these entries should be kept in alphabetical order. */ + {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, {"abs", 1, true, intrinsic_abs}, {"all", 1, true, intrinsic_all}, + {"any", 1, true, intrinsic_any}, + {"asfloat", 1, true, intrinsic_asfloat}, {"asuint", -1, true, intrinsic_asuint}, {"clamp", 3, true, intrinsic_clamp}, {"cos", 1, true, intrinsic_cos}, {"cross", 2, true, intrinsic_cross}, + {"ddx", 1, true, intrinsic_ddx}, + {"ddy", 1, true, intrinsic_ddy}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, {"exp2", 1, true, intrinsic_exp2}, {"floor", 1, true, intrinsic_floor}, + {"fmod", 2, true, intrinsic_fmod}, {"frac", 1, true, intrinsic_frac}, {"ldexp", 2, true, intrinsic_ldexp}, {"length", 1, true, intrinsic_length}, {"lerp", 3, true, intrinsic_lerp}, {"lit", 3, true, intrinsic_lit}, + {"log", 1, true, intrinsic_log}, + {"log10", 1, true, intrinsic_log10}, + {"log2", 1, true, intrinsic_log2}, {"max", 2, true, intrinsic_max}, {"min", 2, true, intrinsic_min}, {"mul", 2, true, intrinsic_mul}, @@ -3245,6 +3509,7 @@ intrinsic_functions[] = {"round", 1, true, intrinsic_round}, {"rsqrt", 1, true, intrinsic_rsqrt}, {"saturate", 1, true, intrinsic_saturate}, + {"sign", 1, true, intrinsic_sign}, {"sin", 1, true, intrinsic_sin}, {"smoothstep", 3, true, intrinsic_smoothstep}, {"sqrt", 1, true, intrinsic_sqrt}, @@ -3252,6 +3517,7 @@ intrinsic_functions[] = {"tex2D", -1, false, intrinsic_tex2D}, {"tex3D", -1, false, intrinsic_tex3D}, {"transpose", 1, true, intrinsic_transpose}, + {"trunc", 1, true, intrinsic_trunc}, };
static int intrinsic_function_name_compare(const void *a, const void *b) @@ -3291,11 +3557,11 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name,
if (param->storage_modifiers & HLSL_STORAGE_IN) { - struct hlsl_ir_store *store; + struct hlsl_ir_node *store;
if (!(store = hlsl_new_simple_store(ctx, param, arg))) goto fail; - list_add_tail(args->instrs, &store->node.entry); + list_add_tail(args->instrs, &store->entry); } }
@@ -3316,7 +3582,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Output argument to "%s" is const.", decl->func->name);
- if (!(load = hlsl_new_var_load(ctx, param, arg->loc))) + if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) goto fail; list_add_tail(args->instrs, &load->node.entry);
@@ -3329,7 +3595,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, { struct hlsl_ir_load *load;
- if (!(load = hlsl_new_var_load(ctx, decl->return_var, *loc))) + if (!(load = hlsl_new_var_load(ctx, decl->return_var, loc))) goto fail; list_add_tail(args->instrs, &load->node.entry); } @@ -3360,7 +3626,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name,
for (i = 0; i < args->args_count; ++i) { - if (args->args[i]->data_type->type > HLSL_CLASS_LAST_NUMERIC) + if (args->args[i]->data_type->class > HLSL_CLASS_LAST_NUMERIC) { struct vkd3d_string_buffer *string;
@@ -3397,20 +3663,20 @@ fail: }
static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, - struct parse_initializer *params, struct vkd3d_shader_location loc) + struct parse_initializer *params, const struct vkd3d_shader_location *loc) { struct hlsl_ir_load *load; struct hlsl_ir_var *var; unsigned int i, idx = 0;
- if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, &loc))) + if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) return NULL;
for (i = 0; i < params->args_count; ++i) { struct hlsl_ir_node *arg = params->args[i];
- if (arg->data_type->type == HLSL_CLASS_OBJECT) + if (arg->data_type->class == HLSL_CLASS_OBJECT) { struct vkd3d_string_buffer *string;
@@ -3455,320 +3721,526 @@ static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) } }
-static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -{ - const struct hlsl_type *object_type = object->data_type; - struct hlsl_ir_load *object_load; +static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct hlsl_type *object_type, + const char *method, const struct vkd3d_shader_location *loc) +{ + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, object_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, + "Method '%s' is not defined on type '%s'.", method, string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; +} + +static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; + struct hlsl_ir_node *load; + bool multisampled; + + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBE + || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBEARRAY) + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } + + multisampled = object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; + + if (params->args_count < 1 + multisampled || params->args_count > 3 + multisampled) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method 'Load': expected between %u and %u, but got %u.", + 1 + multisampled, 3 + multisampled, params->args_count); + return false; + } + if (multisampled) + { + if (!(load_params.sample_index = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc))) + return false; + } + + assert(offset_dim); + if (params->args_count > 1 + multisampled) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[1 + multisampled], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } + if (params->args_count > 2 + multisampled) + { + hlsl_fixme(ctx, loc, "Tiled resource status argument."); + } + + /* +1 for the mipmap level for non-multisampled textures */ + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + !multisampled), loc))) + return false; + + load_params.format = object_type->e.resource_format; + load_params.resource = object; + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + return true; +} + +static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load; + + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } + + if (params->args_count < 2 || params->args_count > 4 + !!offset_dim) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method 'Sample': expected from 2 to %u, but got %u.", + 4 + !!offset_dim, params->args_count); + return false; + } + + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, sampler_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of Sample(): expected 'sampler', but got '%s'.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; + + if (offset_dim && params->args_count > 2) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } + + if (params->args_count > 2 + !!offset_dim) + hlsl_fixme(ctx, loc, "Sample() clamp parameter."); + if (params->args_count > 3 + !!offset_dim) + hlsl_fixme(ctx, loc, "Tiled resource status argument."); + + load_params.format = object_type->e.resource_format; + load_params.resource = object; + load_params.sampler = params->args[0]; + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + + return true; +} + +static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + struct hlsl_resource_load_params load_params = { 0 }; + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load; + + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } + + if (!strcmp(name, "SampleCmpLevelZero")) + load_params.type = HLSL_RESOURCE_SAMPLE_CMP_LZ; + else + load_params.type = HLSL_RESOURCE_SAMPLE_CMP; + + if (params->args_count < 3 || params->args_count > 5 + !!offset_dim) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected from 3 to %u, but got %u.", + name, 5 + !!offset_dim, params->args_count); + return false; + } + + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_COMPARISON) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, sampler_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of %s(): expected 'SamplerComparisonState', but got '%s'.", + name, string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; + + if (!(load_params.cmp = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) + load_params.cmp = params->args[2]; + + if (offset_dim && params->args_count > 3) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } + + if (params->args_count > 3 + !!offset_dim) + hlsl_fixme(ctx, loc, "%s() clamp parameter.", name); + if (params->args_count > 4 + !!offset_dim) + hlsl_fixme(ctx, loc, "Tiled resource status argument."); + + load_params.format = object_type->e.resource_format; + load_params.resource = object; + load_params.sampler = params->args[0]; + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + + return true; +} + +static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + struct hlsl_resource_load_params load_params = {0}; + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load; + unsigned int read_channel; + + if (object_type->sampler_dim != HLSL_SAMPLER_DIM_2D + && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DARRAY + && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBE + && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBEARRAY) + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } + + if (!strcmp(name, "GatherGreen")) + { + load_params.type = HLSL_RESOURCE_GATHER_GREEN; + read_channel = 1; + } + else if (!strcmp(name, "GatherBlue")) + { + load_params.type = HLSL_RESOURCE_GATHER_BLUE; + read_channel = 2; + } + else if (!strcmp(name, "GatherAlpha")) + { + load_params.type = HLSL_RESOURCE_GATHER_ALPHA; + read_channel = 3; + } + else + { + load_params.type = HLSL_RESOURCE_GATHER_RED; + read_channel = 0; + } + + if (!strcmp(name, "Gather") || !offset_dim) + { + if (params->args_count < 2 || params->args_count > 3 + !!offset_dim) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected from 2 to %u, but got %u.", + name, 3 + !!offset_dim, params->args_count); + return false; + } + } + else if (params->args_count < 2 || params->args_count == 5 || params->args_count > 7) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected 2, 3, 4, 6 or 7, but got %u.", + name, params->args_count); + return false; + } + + if (params->args_count == 3 + !!offset_dim || params->args_count == 7) + hlsl_fixme(ctx, loc, "Tiled resource status argument.");
- if (object_type->type != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE - || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + if (params->args_count == 6 || params->args_count == 7) + { + hlsl_fixme(ctx, loc, "Multiple %s() offset parameters.", name); + } + else if (offset_dim && params->args_count > 2) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } + + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) { struct vkd3d_string_buffer *string;
- if ((string = hlsl_type_to_string(ctx, object_type))) + if ((string = hlsl_type_to_string(ctx, sampler_type))) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Type '%s' does not have methods.", string->buffer); + "Wrong type for argument 1 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); hlsl_release_string_buffer(ctx, string); return false; }
- /* Only HLSL_IR_LOAD can return an object. */ - object_load = hlsl_ir_load(object); - - if (!strcmp(name, "Load") - && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBE - && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBEARRAY) + if (read_channel >= object_type->e.resource_format->dimx) { - const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; - struct hlsl_ir_resource_load *load; - bool multisampled; - - multisampled = object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS - || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Method %s() requires at least %u channels.", name, read_channel + 1); + return false; + }
- if (params->args_count < 1 + multisampled || params->args_count > 3 + multisampled) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method 'Load': expected between %u and %u, but got %u.", - 1 + multisampled, 3 + multisampled, params->args_count); - return false; - } - if (multisampled) - { - hlsl_fixme(ctx, loc, "Load() sampling index parameter."); - } + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false;
- assert(offset_dim); - if (params->args_count > 1 + multisampled) - { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[1 + multisampled], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } - if (params->args_count > 2 + multisampled) - { - hlsl_fixme(ctx, loc, "Tiled resource status argument."); - } + load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource_format->base_type, 4); + load_params.resource = object; + load_params.sampler = params->args[0];
- /* +1 for the mipmap level */ - if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + 1), loc))) - return false; + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + return true; +}
- load_params.format = object_type->e.resource_format; - load_params.resource = object_load->src; +static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + struct hlsl_resource_load_params load_params = { 0 }; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load;
- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &load->node.entry); - return true; + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); } - else if (!strcmp(name, "Sample") - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMS - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMSARRAY) + + if (!strcmp(name, "SampleLevel")) + load_params.type = HLSL_RESOURCE_SAMPLE_LOD; + else + load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; + + if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) { - const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; - const struct hlsl_type *sampler_type; - struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *sampler_load; + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected from 3 to %u, but got %u.", + name, 4 + !!offset_dim, params->args_count); + return false; + }
- if (params->args_count < 2 || params->args_count > 4 + !!offset_dim) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method 'Sample': expected from 2 to %u, but got %u.", - 4 + !!offset_dim, params->args_count); - return false; - } + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string;
- sampler_type = params->args[0]->data_type; - if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; + if ((string = hlsl_type_to_string(ctx, sampler_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + }
- if ((string = hlsl_type_to_string(ctx, sampler_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 0 of Sample(): expected 'sampler', but got '%s'.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; - } + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.coords = params->args[1];
- /* Only HLSL_IR_LOAD can return an object. */ - sampler_load = hlsl_ir_load(params->args[0]); + if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) + load_params.lod = params->args[2];
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + if (offset_dim && params->args_count > 3) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[3], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; + }
- if (offset_dim && params->args_count > 2) - { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } + if (params->args_count > 3 + !!offset_dim) + hlsl_fixme(ctx, loc, "Tiled resource status argument.");
- if (params->args_count > 2 + !!offset_dim) - hlsl_fixme(ctx, loc, "Sample() clamp parameter."); - if (params->args_count > 3 + !!offset_dim) - hlsl_fixme(ctx, loc, "Tiled resource status argument."); + load_params.format = object_type->e.resource_format; + load_params.resource = object; + load_params.sampler = params->args[0];
- load_params.format = object_type->e.resource_format; - load_params.resource = object_load->src; - load_params.sampler = sampler_load->src; + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + return true; +}
- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &load->node.entry); +static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + struct hlsl_resource_load_params load_params = { 0 }; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load;
- return true; - } - else if ((!strcmp(name, "Gather") || !strcmp(name, "GatherRed") || !strcmp(name, "GatherBlue") - || !strcmp(name, "GatherGreen") || !strcmp(name, "GatherAlpha")) - && (object_type->sampler_dim == HLSL_SAMPLER_DIM_2D - || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DARRAY - || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBE - || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBEARRAY)) + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) { - const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); - struct hlsl_resource_load_params load_params = {0}; - const struct hlsl_type *sampler_type; - struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *sampler_load; - unsigned int read_channel; - - if (!strcmp(name, "GatherGreen")) - { - load_params.type = HLSL_RESOURCE_GATHER_GREEN; - read_channel = 1; - } - else if (!strcmp(name, "GatherBlue")) - { - load_params.type = HLSL_RESOURCE_GATHER_BLUE; - read_channel = 2; - } - else if (!strcmp(name, "GatherAlpha")) - { - load_params.type = HLSL_RESOURCE_GATHER_ALPHA; - read_channel = 3; - } - else - { - load_params.type = HLSL_RESOURCE_GATHER_RED; - read_channel = 0; - } - - if (!strcmp(name, "Gather") || !offset_dim) - { - if (params->args_count < 2 || params->args_count > 3 + !!offset_dim) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method '%s': expected from 2 to %u, but got %u.", - name, 3 + !!offset_dim, params->args_count); - return false; - } - } - else if (params->args_count < 2 || params->args_count == 5 || params->args_count > 7) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method '%s': expected 2, 3, 4, 6 or 7, but got %u.", - name, params->args_count); - return false; - } - - if (params->args_count == 3 + !!offset_dim || params->args_count == 7) - hlsl_fixme(ctx, loc, "Tiled resource status argument."); + return raise_invalid_method_object_type(ctx, object_type, name, loc); + }
- if (params->args_count == 6 || params->args_count == 7) - { - hlsl_fixme(ctx, loc, "Multiple %s() offset parameters.", name); - } - else if (offset_dim && params->args_count > 2) - { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } + load_params.type = HLSL_RESOURCE_SAMPLE_GRAD;
- sampler_type = params->args[0]->data_type; - if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; + if (params->args_count < 4 || params->args_count > 5 + !!offset_dim) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected from 4 to %u, but got %u.", + name, 5 + !!offset_dim, params->args_count); + return false; + }
- if ((string = hlsl_type_to_string(ctx, sampler_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 1 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; - } + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string;
- if (read_channel >= object_type->e.resource_format->dimx) - { + if ((string = hlsl_type_to_string(ctx, sampler_type))) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Method %s() requires at least %u channels.", name, read_channel + 1); - return false; - } + "Wrong type for argument 0 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + }
- /* Only HLSL_IR_LOAD can return an object. */ - sampler_load = hlsl_ir_load(params->args[0]); + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.coords = params->args[1];
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - return false; + if (!(load_params.ddx = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.ddx = params->args[2];
- load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource_format->base_type, 4); - load_params.resource = object_load->src; - load_params.sampler = sampler_load->src; + if (!(load_params.ddy = add_implicit_conversion(ctx, instrs, params->args[3], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.ddy = params->args[3];
- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &load->node.entry); - return true; - } - else if (!strcmp(name, "SampleLevel") - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMS - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMSARRAY) + if (offset_dim && params->args_count > 4) { - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE_LOD}; - const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); - const struct hlsl_type *sampler_type; - struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *sampler_load; - - if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method 'SampleLevel': expected from 3 to %u, but got %u.", - 4 + !!offset_dim, params->args_count); + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[4], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; - } - - sampler_type = params->args[0]->data_type; - if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; + }
- if ((string = hlsl_type_to_string(ctx, sampler_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 0 of SampleLevel(): expected 'sampler', but got '%s'.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; - } + if (params->args_count > 4 + !!offset_dim) + hlsl_fixme(ctx, loc, "Tiled resource status argument.");
- /* Only HLSL_IR_LOAD can return an object. */ - sampler_load = hlsl_ir_load(params->args[0]); + load_params.format = object_type->e.resource_format; + load_params.resource = object; + load_params.sampler = params->args[0];
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - load_params.coords = params->args[1]; + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + return true; +}
- if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], - hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) - load_params.lod = params->args[2]; +static const struct method_function +{ + const char *name; + bool (*handler)(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc); +} +object_methods[] = +{ + { "Gather", add_gather_method_call }, + { "GatherAlpha", add_gather_method_call }, + { "GatherBlue", add_gather_method_call }, + { "GatherGreen", add_gather_method_call }, + { "GatherRed", add_gather_method_call }, + + { "Load", add_load_method_call }, + + { "Sample", add_sample_method_call }, + { "SampleBias", add_sample_lod_method_call }, + { "SampleCmp", add_sample_cmp_method_call }, + { "SampleCmpLevelZero", add_sample_cmp_method_call }, + { "SampleGrad", add_sample_grad_method_call }, + { "SampleLevel", add_sample_lod_method_call }, +};
- if (offset_dim && params->args_count > 3) - { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[3], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } +static int object_method_function_name_compare(const void *a, const void *b) +{ + const struct method_function *func = b;
- if (params->args_count > 3 + !!offset_dim) - hlsl_fixme(ctx, loc, "Tiled resource status argument."); + return strcmp(a, func->name); +}
- load_params.format = object_type->e.resource_format; - load_params.resource = object_load->src; - load_params.sampler = sampler_load->src; +static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + const struct method_function *method;
- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &load->node.entry); - return true; - } - else + if (object_type->class != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE + || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) { struct vkd3d_string_buffer *string;
if ((string = hlsl_type_to_string(ctx, object_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, - "Method '%s' is not defined on type '%s'.", name, string->buffer); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Type '%s' does not have methods.", string->buffer); hlsl_release_string_buffer(ctx, string); return false; } + + if ((method = bsearch(name, object_methods, ARRAY_SIZE(object_methods), + sizeof(*method), object_method_function_name_compare))) + { + return method->handler(ctx, instrs, object, name, params, loc); + } + else + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } }
static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type *format, const struct vkd3d_shader_location *loc) { - if (format->type > HLSL_CLASS_VECTOR) + if (format->class > HLSL_CLASS_VECTOR) { struct vkd3d_string_buffer *string;
@@ -3846,6 +4318,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %token KW_NAMESPACE %token KW_NOINTERPOLATION %token KW_OUT +%token KW_PACKOFFSET %token KW_PASS %token KW_PIXELSHADER %token KW_PRECISE @@ -3854,6 +4327,8 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %token KW_RETURN %token KW_REGISTER %token KW_ROW_MAJOR +%token KW_RWBUFFER +%token KW_RWSTRUCTUREDBUFFER %token KW_RWTEXTURE1D %token KW_RWTEXTURE2D %token KW_RWTEXTURE3D @@ -3933,6 +4408,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type <list> conditional_expr %type <list> declaration %type <list> declaration_statement +%type <list> discard_statement %type <list> equality_expr %type <list> expr %type <list> expr_optional @@ -3968,6 +4444,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type <attr> attribute
%type <attr_list> attribute_list +%type <attr_list> attribute_list_optional
%type <boolval> boolean
@@ -3999,6 +4476,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type <parameters> parameters
%type <reg_reservation> register_opt +%type <reg_reservation> packoffset_opt
%type <sampler_dim> texture_type texture_ms_type uav_type
@@ -4037,7 +4515,7 @@ buffer_declaration: if ($3.semantic.name) hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers.");
- if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, @2))) + if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, &@2))) YYABORT; }
@@ -4261,6 +4739,14 @@ attribute_list: $$.attrs[$$.count++] = $2; }
+attribute_list_optional: + %empty + { + $$.count = 0; + $$.attrs = NULL; + } + | attribute_list + func_declaration: func_prototype compound_statement { @@ -4349,8 +4835,11 @@ func_prototype_no_attrs: "Semantics are not allowed on void functions."); }
- if ($7.reg_reservation.type) + if ($7.reg_reservation.reg_type) FIXME("Unexpected register reservation for a function.\n"); + if ($7.reg_reservation.offset_type) + hlsl_error(ctx, &@5, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is not allowed on functions.");
if (($$.decl = get_func_decl(&ctx->functions, $3, &$5))) { @@ -4476,17 +4965,24 @@ var_identifier: colon_attribute: %empty { - $$.semantic.name = NULL; - $$.reg_reservation.type = 0; + $$.semantic = (struct hlsl_semantic){0}; + $$.reg_reservation.reg_type = 0; + $$.reg_reservation.offset_type = 0; } | semantic { $$.semantic = $1; - $$.reg_reservation.type = 0; + $$.reg_reservation.reg_type = 0; + $$.reg_reservation.offset_type = 0; } | register_opt { - $$.semantic.name = NULL; + $$.semantic = (struct hlsl_semantic){0}; + $$.reg_reservation = $1; + } + | packoffset_opt + { + $$.semantic = (struct hlsl_semantic){0}; $$.reg_reservation = $1; }
@@ -4499,6 +4995,9 @@ semantic: ; $$.name = $2; $$.index = atoi(p); + $$.reported_missing = false; + $$.reported_duplicated_output_next_index = 0; + $$.reported_duplicated_input_incompatible_next_index = 0; *p = 0; }
@@ -4518,6 +5017,21 @@ register_opt: vkd3d_free($6); }
+packoffset_opt: + ':' KW_PACKOFFSET '(' any_identifier ')' + { + $$ = parse_packoffset(ctx, $4, NULL, &@$); + + vkd3d_free($4); + } + | ':' KW_PACKOFFSET '(' any_identifier '.' any_identifier ')' + { + $$ = parse_packoffset(ctx, $4, $6, &@$); + + vkd3d_free($4); + vkd3d_free($6); + } + parameters: scope_start { @@ -4536,7 +5050,7 @@ param_list: parameter { memset(&$$, 0, sizeof($$)); - if (!add_func_parameter(ctx, &$$, &$1, @1)) + if (!add_func_parameter(ctx, &$$, &$1, &@1)) { ERR("Error adding function parameter %s.\n", $1.name); YYABORT; @@ -4545,7 +5059,7 @@ param_list: | param_list ',' parameter { $$ = $1; - if (!add_func_parameter(ctx, &$$, &$3, @3)) + if (!add_func_parameter(ctx, &$$, &$3, &@3)) { hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_REDEFINED, "Parameter "%s" is already declared.", $3.name); @@ -4624,7 +5138,15 @@ texture_ms_type: }
uav_type: - KW_RWTEXTURE1D + KW_RWBUFFER + { + $$ = HLSL_SAMPLER_DIM_BUFFER; + } + | KW_RWSTRUCTUREDBUFFER + { + $$ = HLSL_SAMPLER_DIM_STRUCTURED_BUFFER; + } + | KW_RWTEXTURE1D { $$ = HLSL_SAMPLER_DIM_1D; } @@ -4640,7 +5162,7 @@ uav_type: type_no_void: KW_VECTOR '<' type ',' C_INTEGER '>' { - if ($3->type != HLSL_CLASS_SCALAR) + if ($3->class != HLSL_CLASS_SCALAR) { struct vkd3d_string_buffer *string;
@@ -4667,7 +5189,7 @@ type_no_void: } | KW_MATRIX '<' type ',' C_INTEGER ',' C_INTEGER '>' { - if ($3->type != HLSL_CLASS_SCALAR) + if ($3->class != HLSL_CLASS_SCALAR) { struct vkd3d_string_buffer *string;
@@ -4702,6 +5224,10 @@ type_no_void: { $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_GENERIC]; } + | KW_SAMPLERCOMPARISONSTATE + { + $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_COMPARISON]; + } | KW_SAMPLER1D { $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_1D]; @@ -4716,7 +5242,7 @@ type_no_void: } | KW_SAMPLERCUBE { - $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_3D]; + $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_CUBE]; } | KW_TEXTURE { @@ -4740,23 +5266,58 @@ type_no_void: } | texture_ms_type '<' type ',' shift_expr '>' { - unsigned int sample_count = evaluate_static_expression(node_from_list($5)); - destroy_instr_list($5); + unsigned int sample_count; + struct hlsl_block block; + + hlsl_block_init(&block); + list_move_tail(&block.instrs, $5); + + sample_count = evaluate_static_expression_as_uint(ctx, &block, &@5); + + hlsl_block_cleanup(&block); + + vkd3d_free($5);
$$ = hlsl_new_texture_type(ctx, $1, $3, sample_count); } | uav_type '<' type '>' { - if ($3->type > HLSL_CLASS_VECTOR) - { - struct vkd3d_string_buffer *string; + struct vkd3d_string_buffer *string = hlsl_type_to_string(ctx, $3);
- string = hlsl_type_to_string(ctx, $3); + if (!type_contains_only_numerics($3)) + { if (string) hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "UAV data type %s is not scalar or vector.", string->buffer); - hlsl_release_string_buffer(ctx, string); + "UAV type %s is not numeric.", string->buffer); + } + + switch ($1) + { + case HLSL_SAMPLER_DIM_BUFFER: + case HLSL_SAMPLER_DIM_1D: + case HLSL_SAMPLER_DIM_2D: + case HLSL_SAMPLER_DIM_3D: + if ($3->class == HLSL_CLASS_ARRAY) + { + if (string) + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "This type of UAV does not support array type."); + } + else if (hlsl_type_component_count($3) > 4) + { + if (string) + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "UAV data type %s size exceeds maximum size.", string->buffer); + } + break; + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + break; + default: + vkd3d_unreachable(); } + + hlsl_release_string_buffer(ctx, string); + $$ = hlsl_new_uav_type(ctx, $1, $3); } | TYPE_IDENTIFIER @@ -4779,7 +5340,7 @@ type_no_void: | KW_STRUCT TYPE_IDENTIFIER { $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); - if ($$->type != HLSL_CLASS_STRUCT) + if ($$->class != HLSL_CLASS_STRUCT) hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_REDEFINED, ""%s" redefined as a structure.", $2); vkd3d_free($2); } @@ -4934,10 +5495,17 @@ arrays: } | '[' expr ']' arrays { - unsigned int size = evaluate_static_expression(node_from_list($2)); + struct hlsl_block block; uint32_t *new_array; + unsigned int size;
- destroy_instr_list($2); + hlsl_clone_block(ctx, &block, &ctx->static_initializers); + list_move_tail(&block.instrs, $2); + + size = evaluate_static_expression_as_uint(ctx, &block, &@2); + + hlsl_block_cleanup(&block); + vkd3d_free($2);
$$ = $4;
@@ -4988,59 +5556,59 @@ var_modifiers: } | KW_EXTERN var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_EXTERN, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_EXTERN, &@1); } | KW_NOINTERPOLATION var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOINTERPOLATION, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOINTERPOLATION, &@1); } | KW_PRECISE var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, @1); + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); } | KW_SHARED var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, &@1); } | KW_GROUPSHARED var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_GROUPSHARED, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_GROUPSHARED, &@1); } | KW_STATIC var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_STATIC, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_STATIC, &@1); } | KW_UNIFORM var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_UNIFORM, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_UNIFORM, &@1); } | KW_VOLATILE var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_VOLATILE, @1); + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_VOLATILE, &@1); } | KW_CONST var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_CONST, @1); + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_CONST, &@1); } | KW_ROW_MAJOR var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_ROW_MAJOR, @1); + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_ROW_MAJOR, &@1); } | KW_COLUMN_MAJOR var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_COLUMN_MAJOR, @1); + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_COLUMN_MAJOR, &@1); } | KW_IN var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN, &@1); } | KW_OUT var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_OUT, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_OUT, &@1); } | KW_INOUT var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, &@1); }
@@ -5145,6 +5713,7 @@ statement: declaration_statement | expr_statement | compound_statement + | discard_statement | jump_statement | selection_statement | loop_statement @@ -5152,7 +5721,7 @@ statement: jump_statement: KW_RETURN expr ';' { - if (!add_return(ctx, $2, node_from_list($2), @1)) + if (!add_return(ctx, $2, node_from_list($2), &@1)) YYABORT; $$ = $2; } @@ -5160,65 +5729,81 @@ jump_statement: { if (!($$ = make_empty_list(ctx))) YYABORT; - if (!add_return(ctx, $$, NULL, @1)) + if (!add_return(ctx, $$, NULL, &@1)) + YYABORT; + } + +discard_statement: + KW_DISCARD ';' + { + struct hlsl_ir_node *discard; + + if (!($$ = make_empty_list(ctx))) YYABORT; + if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, &@1))) + return false; + list_add_tail($$, &discard->entry); }
selection_statement: KW_IF '(' expr ')' if_body { struct hlsl_ir_node *condition = node_from_list($3); - struct hlsl_ir_if *instr; - - if (!(instr = hlsl_new_if(ctx, condition, @1))) + struct hlsl_block then_block, else_block; + struct hlsl_ir_node *instr; + + hlsl_block_init(&then_block); + list_move_tail(&then_block.instrs, $5.then_block); + hlsl_block_init(&else_block); + if ($5.else_block) + list_move_tail(&else_block.instrs, $5.else_block); + vkd3d_free($5.then_block); + vkd3d_free($5.else_block); + + if (!(instr = hlsl_new_if(ctx, condition, &then_block, &else_block, &@1))) YYABORT; - list_move_tail(&instr->then_instrs.instrs, $5.then_instrs); - if ($5.else_instrs) - list_move_tail(&instr->else_instrs.instrs, $5.else_instrs); - vkd3d_free($5.then_instrs); - vkd3d_free($5.else_instrs); if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) { struct vkd3d_string_buffer *string;
if ((string = hlsl_type_to_string(ctx, condition->data_type))) - hlsl_error(ctx, &instr->node.loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "if condition type %s is not scalar.", string->buffer); hlsl_release_string_buffer(ctx, string); } $$ = $3; - list_add_tail($$, &instr->node.entry); + list_add_tail($$, &instr->entry); }
if_body: statement { - $$.then_instrs = $1; - $$.else_instrs = NULL; + $$.then_block = $1; + $$.else_block = NULL; } | statement KW_ELSE statement { - $$.then_instrs = $1; - $$.else_instrs = $3; + $$.then_block = $1; + $$.else_block = $3; }
loop_statement: - KW_WHILE '(' expr ')' statement + attribute_list_optional KW_WHILE '(' expr ')' statement { - $$ = create_loop(ctx, LOOP_WHILE, NULL, $3, NULL, $5, @1); + $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $4, NULL, $6, &@2); } - | KW_DO statement KW_WHILE '(' expr ')' ';' + | attribute_list_optional KW_DO statement KW_WHILE '(' expr ')' ';' { - $$ = create_loop(ctx, LOOP_DO_WHILE, NULL, $5, NULL, $2, @1); + $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $6, NULL, $3, &@2); } - | KW_FOR '(' scope_start expr_statement expr_statement expr_optional ')' statement + | attribute_list_optional KW_FOR '(' scope_start expr_statement expr_statement expr_optional ')' statement { - $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); + $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@2); hlsl_pop_scope(ctx); } - | KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement + | attribute_list_optional KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement { - $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); + $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@2); hlsl_pop_scope(ctx); }
@@ -5250,31 +5835,31 @@ func_arguments: primary_expr: C_FLOAT { - struct hlsl_ir_constant *c; + struct hlsl_ir_node *c;
if (!(c = hlsl_new_float_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, &c->node))) + if (!($$ = make_list(ctx, c))) YYABORT; } | C_INTEGER { - struct hlsl_ir_constant *c; + struct hlsl_ir_node *c;
if (!(c = hlsl_new_int_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, &c->node))) + if (!($$ = make_list(ctx, c))) YYABORT; } | boolean { - struct hlsl_ir_constant *c; + struct hlsl_ir_node *c;
if (!(c = hlsl_new_bool_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, &c->node))) + if (!($$ = make_list(ctx, c))) { - hlsl_free_instr(&c->node); + hlsl_free_instr(c); YYABORT; } } @@ -5288,7 +5873,7 @@ primary_expr: hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Variable "%s" is not defined.", $1); YYABORT; } - if (!(load = hlsl_new_var_load(ctx, var, @1))) + if (!(load = hlsl_new_var_load(ctx, var, &@1))) YYABORT; if (!($$ = make_list(ctx, &load->node))) YYABORT; @@ -5316,7 +5901,7 @@ primary_expr: if (!(var = hlsl_new_synthetic_var(ctx, "state_block_expr", hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &@1))) YYABORT; - if (!(load = hlsl_new_var_load(ctx, var, @1))) + if (!(load = hlsl_new_var_load(ctx, var, &@1))) YYABORT; if (!($$ = make_list(ctx, &load->node))) YYABORT; @@ -5332,7 +5917,7 @@ postfix_expr: primary_expr | postfix_expr OP_INC { - if (!add_increment(ctx, $1, false, true, @2)) + if (!add_increment(ctx, $1, false, true, &@2)) { destroy_instr_list($1); YYABORT; @@ -5341,7 +5926,7 @@ postfix_expr: } | postfix_expr OP_DEC { - if (!add_increment(ctx, $1, true, true, @2)) + if (!add_increment(ctx, $1, true, true, &@2)) { destroy_instr_list($1); YYABORT; @@ -5352,7 +5937,7 @@ postfix_expr: { struct hlsl_ir_node *node = node_from_list($1);
- if (node->data_type->type == HLSL_CLASS_STRUCT) + if (node->data_type->class == HLSL_CLASS_STRUCT) { struct hlsl_type *type = node->data_type; const struct hlsl_struct_field *field; @@ -5365,20 +5950,20 @@ postfix_expr: }
field_idx = field - type->e.record.fields; - if (!add_record_load(ctx, $1, node, field_idx, @2)) + if (!add_record_access(ctx, $1, node, field_idx, &@2)) YYABORT; $$ = $1; } - else if (node->data_type->type <= HLSL_CLASS_LAST_NUMERIC) + else if (node->data_type->class <= HLSL_CLASS_LAST_NUMERIC) { - struct hlsl_ir_swizzle *swizzle; + struct hlsl_ir_node *swizzle;
if (!(swizzle = get_swizzle(ctx, node, $3, &@3))) { hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid swizzle "%s".", $3); YYABORT; } - list_add_tail($1, &swizzle->node.entry); + list_add_tail($1, &swizzle->entry); $$ = $1; } else @@ -5391,10 +5976,10 @@ postfix_expr: { struct hlsl_ir_node *array = node_from_list($1), *index = node_from_list($3);
- list_move_tail($1, $3); + list_move_head($1, $3); vkd3d_free($3);
- if (!add_array_load(ctx, $1, array, index, &@2)) + if (!add_array_access(ctx, $1, array, index, &@2)) { destroy_instr_list($1); YYABORT; @@ -5412,7 +5997,7 @@ postfix_expr: free_parse_initializer(&$4); YYABORT; } - if ($2->type > HLSL_CLASS_LAST_NUMERIC) + if ($2->class > HLSL_CLASS_LAST_NUMERIC) { struct vkd3d_string_buffer *string;
@@ -5432,7 +6017,7 @@ postfix_expr: YYABORT; }
- if (!($$ = add_constructor(ctx, $2, &$4, @2))) + if (!($$ = add_constructor(ctx, $2, &$4, &@2))) { free_parse_initializer(&$4); YYABORT; @@ -5459,7 +6044,7 @@ unary_expr: postfix_expr | OP_INC unary_expr { - if (!add_increment(ctx, $2, false, false, @1)) + if (!add_increment(ctx, $2, false, false, &@1)) { destroy_instr_list($2); YYABORT; @@ -5468,7 +6053,7 @@ unary_expr: } | OP_DEC unary_expr { - if (!add_increment(ctx, $2, true, false, @1)) + if (!add_increment(ctx, $2, true, false, &@1)) { destroy_instr_list($2); YYABORT; @@ -5545,31 +6130,31 @@ mul_expr: unary_expr | mul_expr '*' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, @2); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); } | mul_expr '/' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, @2); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); } | mul_expr '%' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, @2); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); }
add_expr: mul_expr | add_expr '+' mul_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, @2); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); } | add_expr '-' mul_expr { struct hlsl_ir_node *neg;
- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), @2))) + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), &@2))) YYABORT; list_add_tail($3, &neg->entry); - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, @2); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); }
shift_expr: @@ -5587,30 +6172,30 @@ relational_expr: shift_expr | relational_expr '<' shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, @2); + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); } | relational_expr '>' shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, @2); + $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); } | relational_expr OP_LE shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, @2); + $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); } | relational_expr OP_GE shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, @2); + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); }
equality_expr: relational_expr | equality_expr OP_EQ relational_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, @2); + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); } | equality_expr OP_NE relational_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, @2); + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); }
bitand_expr: @@ -5652,7 +6237,26 @@ conditional_expr: logicor_expr | logicor_expr '?' expr ':' assignment_expr { - hlsl_fixme(ctx, &@$, "Ternary operator."); + struct hlsl_ir_node *cond = node_from_list($1), *first = node_from_list($3), *second = node_from_list($5); + struct hlsl_type *common_type; + + list_move_tail($1, $3); + list_move_tail($1, $5); + vkd3d_free($3); + vkd3d_free($5); + + if (!(common_type = get_common_numeric_type(ctx, first, second, &@3))) + YYABORT; + + if (!(first = add_implicit_conversion(ctx, $1, first, common_type, &@3))) + YYABORT; + + if (!(second = add_implicit_conversion(ctx, $1, second, common_type, &@5))) + YYABORT; + + if (!hlsl_add_conditional(ctx, $1, cond, first, second)) + YYABORT; + $$ = $1; }
assignment_expr: diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index ab59875738c..765b1907426 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -27,11 +27,11 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str enum hlsl_regset regset, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *idx_offset = NULL; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *c;
- list_init(&block->instrs); + hlsl_block_init(block);
- switch (type->type) + switch (type->class) { case HLSL_CLASS_VECTOR: idx_offset = idx; @@ -41,11 +41,11 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str { if (!(c = hlsl_new_uint_constant(ctx, 4, loc))) return NULL; - list_add_tail(&block->instrs, &c->node.entry); + hlsl_block_add_instr(block, c);
- if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, &c->node, idx))) + if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, c, idx))) return NULL; - list_add_tail(&block->instrs, &idx_offset->entry); + hlsl_block_add_instr(block, idx_offset);
break; } @@ -56,25 +56,25 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str
if (!(c = hlsl_new_uint_constant(ctx, size, loc))) return NULL; - list_add_tail(&block->instrs, &c->node.entry); + hlsl_block_add_instr(block, c);
- if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, &c->node, idx))) + if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, c, idx))) return NULL; - list_add_tail(&block->instrs, &idx_offset->entry); + hlsl_block_add_instr(block, idx_offset);
break; }
case HLSL_CLASS_STRUCT: { - unsigned int field_idx = hlsl_ir_constant(idx)->value[0].u; + unsigned int field_idx = hlsl_ir_constant(idx)->value.u[0].u; struct hlsl_struct_field *field = &type->e.record.fields[field_idx];
if (!(c = hlsl_new_uint_constant(ctx, field->reg_offset[regset], loc))) return NULL; - list_add_tail(&block->instrs, &c->node.entry); + hlsl_block_add_instr(block, c);
- idx_offset = &c->node; + idx_offset = c;
break; } @@ -87,7 +87,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str { if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, offset, idx_offset))) return NULL; - list_add_tail(&block->instrs, &idx_offset->entry); + hlsl_block_add_instr(block, idx_offset); }
return idx_offset; @@ -101,7 +101,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st struct hlsl_type *type; unsigned int i;
- list_init(&block->instrs); + hlsl_block_init(block);
assert(deref->var); type = deref->var->data_type; @@ -114,7 +114,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st deref->offset_regset, loc))) return NULL;
- list_move_tail(&block->instrs, &idx_block.instrs); + hlsl_block_add_block(block, &idx_block);
type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node); } @@ -123,15 +123,14 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st }
/* TODO: remove when no longer needed, only used for transform_deref_paths_into_offsets() */ -static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, +static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *instr) { const struct hlsl_type *type; struct hlsl_ir_node *offset; struct hlsl_block block;
- if (!deref->var) - return; + assert(deref->var);
/* register offsets shouldn't be used before this point is reached. */ assert(!deref->offset.node); @@ -140,48 +139,22 @@ static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der
/* Instructions that directly refer to structs or arrays (instead of single-register components) * are removed later by dce. So it is not a problem to just cleanup their derefs. */ - if (type->type == HLSL_CLASS_STRUCT || type->type == HLSL_CLASS_ARRAY) + if (type->class == HLSL_CLASS_STRUCT || type->class == HLSL_CLASS_ARRAY) { hlsl_cleanup_deref(deref); - return; + return true; }
deref->offset_regset = hlsl_type_get_regset(type);
if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &instr->loc))) - return; + return false; list_move_before(&instr->entry, &block.instrs);
hlsl_cleanup_deref(deref); hlsl_src_from_node(&deref->offset, offset); -} - -/* TODO: remove when no longer needed. */ -static bool transform_deref_paths_into_offsets(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -{ - switch(instr->type) - { - case HLSL_IR_LOAD: - replace_deref_path_with_offset(ctx, &hlsl_ir_load(instr)->src, instr); - return true; - - case HLSL_IR_STORE: - replace_deref_path_with_offset(ctx, &hlsl_ir_store(instr)->lhs, instr); - return true;
- case HLSL_IR_RESOURCE_LOAD: - replace_deref_path_with_offset(ctx, &hlsl_ir_resource_load(instr)->resource, instr); - replace_deref_path_with_offset(ctx, &hlsl_ir_resource_load(instr)->sampler, instr); - return true; - - case HLSL_IR_RESOURCE_STORE: - replace_deref_path_with_offset(ctx, &hlsl_ir_resource_store(instr)->resource, instr); - return true; - - default: - return false; - } - return false; + return true; }
/* Split uniforms into two variables representing the constant and temp @@ -191,14 +164,14 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru { struct vkd3d_string_buffer *name; struct hlsl_ir_var *uniform; - struct hlsl_ir_store *store; + struct hlsl_ir_node *store; struct hlsl_ir_load *load;
/* Use the synthetic name for the temp, rather than the uniform, so that we * can write the uniform name into the shader reflection data. */
if (!(uniform = hlsl_new_var(ctx, temp->name, temp->data_type, - temp->loc, NULL, temp->storage_modifiers, &temp->reg_reservation))) + &temp->loc, NULL, temp->storage_modifiers, &temp->reg_reservation))) return; list_add_before(&temp->scope_entry, &uniform->scope_entry); list_add_tail(&ctx->extern_vars, &uniform->extern_entry); @@ -212,17 +185,53 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru temp->name = hlsl_strdup(ctx, name->buffer); hlsl_release_string_buffer(ctx, name);
- if (!(load = hlsl_new_var_load(ctx, uniform, temp->loc))) + if (!(load = hlsl_new_var_load(ctx, uniform, &temp->loc))) return; list_add_head(instrs, &load->node.entry);
if (!(store = hlsl_new_simple_store(ctx, temp, &load->node))) return; - list_add_after(&load->node.entry, &store->node.entry); + list_add_after(&load->node.entry, &store->entry); +} + +static void validate_field_semantic(struct hlsl_ctx *ctx, struct hlsl_struct_field *field) +{ + if (!field->semantic.name && hlsl_get_multiarray_element_type(field->type)->class <= HLSL_CLASS_LAST_NUMERIC + && !field->semantic.reported_missing) + { + hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, + "Field '%s' is missing a semantic.", field->name); + field->semantic.reported_missing = true; + } +} + +static enum hlsl_base_type base_type_get_semantic_equivalent(enum hlsl_base_type base) +{ + if (base == HLSL_TYPE_BOOL) + return HLSL_TYPE_UINT; + if (base == HLSL_TYPE_INT) + return HLSL_TYPE_UINT; + if (base == HLSL_TYPE_HALF) + return HLSL_TYPE_FLOAT; + return base; +} + +static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hlsl_type *type1, + const struct hlsl_type *type2) +{ + if (ctx->profile->major_version < 4) + return true; + + if (type1->dimx != type2->dimx) + return false; + + return base_type_get_semantic_equivalent(type1->base_type) + == base_type_get_semantic_equivalent(type2->base_type); }
static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, - struct hlsl_type *type, unsigned int modifiers, const struct hlsl_semantic *semantic, bool output) + struct hlsl_type *type, unsigned int modifiers, struct hlsl_semantic *semantic, + uint32_t index, bool output, const struct vkd3d_shader_location *loc) { struct hlsl_semantic new_semantic; struct vkd3d_string_buffer *name; @@ -230,15 +239,50 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir
if (!(name = hlsl_get_string_buffer(ctx))) return NULL; - vkd3d_string_buffer_printf(name, "<%s-%s%u>", output ? "output" : "input", semantic->name, semantic->index); + vkd3d_string_buffer_printf(name, "<%s-%s%u>", output ? "output" : "input", semantic->name, index); + + LIST_FOR_EACH_ENTRY(ext_var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!ascii_strcasecmp(ext_var->name, name->buffer)) + { + if (output) + { + if (index >= semantic->reported_duplicated_output_next_index) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Output semantic "%s%u" is used multiple times.", semantic->name, index); + hlsl_note(ctx, &ext_var->loc, HLSL_LEVEL_ERROR, + "First use of "%s%u" is here.", semantic->name, index); + semantic->reported_duplicated_output_next_index = index + 1; + } + } + else + { + if (index >= semantic->reported_duplicated_input_incompatible_next_index + && !types_are_semantic_equivalent(ctx, ext_var->data_type, type)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Input semantic "%s%u" is used multiple times with incompatible types.", + semantic->name, index); + hlsl_note(ctx, &ext_var->loc, HLSL_LEVEL_ERROR, + "First declaration of "%s%u" is here.", semantic->name, index); + semantic->reported_duplicated_input_incompatible_next_index = index + 1; + } + } + + hlsl_release_string_buffer(ctx, name); + return ext_var; + } + } + if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) { hlsl_release_string_buffer(ctx, name); return NULL; } - new_semantic.index = semantic->index; - if (!(ext_var = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), - type, var->loc, &new_semantic, modifiers, NULL))) + new_semantic.index = index; + if (!(ext_var = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), type, loc, &new_semantic, + modifiers, NULL))) { hlsl_release_string_buffer(ctx, name); hlsl_cleanup_semantic(&new_semantic); @@ -257,80 +301,116 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir }
static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, - unsigned int modifiers, const struct hlsl_semantic *semantic) + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { - struct hlsl_type *type = lhs->node.data_type, *vector_type; + struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; + struct vkd3d_shader_location *loc = &lhs->node.loc; struct hlsl_ir_var *var = lhs->src.var; + struct hlsl_ir_node *c; unsigned int i;
- vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); + if (type->class > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; + if (!(string = hlsl_type_to_string(ctx, type))) + return; + hlsl_fixme(ctx, &var->loc, "Input semantics for type %s.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } + if (!semantic->name) + return; + + vector_type_src = hlsl_get_vector_type(ctx, type->base_type, + (ctx->profile->major_version < 4) ? 4 : hlsl_type_minor_size(type)); + vector_type_dst = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type));
for (i = 0; i < hlsl_type_major_size(type); ++i) { - struct hlsl_semantic semantic_copy = *semantic; - struct hlsl_ir_store *store; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *store, *cast; struct hlsl_ir_var *input; struct hlsl_ir_load *load;
- semantic_copy.index = semantic->index + i; - - if (!(input = add_semantic_var(ctx, var, vector_type, modifiers, &semantic_copy, false))) + if (!(input = add_semantic_var(ctx, var, vector_type_src, modifiers, semantic, + semantic_index + i, false, loc))) return;
- if (!(load = hlsl_new_var_load(ctx, input, var->loc))) + if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) return; list_add_after(&lhs->node.entry, &load->node.entry);
- if (type->type == HLSL_CLASS_MATRIX) + if (!(cast = hlsl_new_cast(ctx, &load->node, vector_type_dst, &var->loc))) + return; + list_add_after(&load->node.entry, &cast->entry); + + if (type->class == HLSL_CLASS_MATRIX) { if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - list_add_after(&load->node.entry, &c->node.entry); + list_add_after(&cast->entry, &c->entry);
- if (!(store = hlsl_new_store_index(ctx, &lhs->src, &c->node, &load->node, 0, &var->loc))) + if (!(store = hlsl_new_store_index(ctx, &lhs->src, c, cast, 0, &var->loc))) return; - list_add_after(&c->node.entry, &store->node.entry); + list_add_after(&c->entry, &store->entry); } else { assert(i == 0);
- if (!(store = hlsl_new_store_index(ctx, &lhs->src, NULL, &load->node, 0, &var->loc))) + if (!(store = hlsl_new_store_index(ctx, &lhs->src, NULL, cast, 0, &var->loc))) return; - list_add_after(&load->node.entry, &store->node.entry); + list_add_after(&cast->entry, &store->entry); } } }
-static void prepend_input_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs) +static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { + struct vkd3d_shader_location *loc = &lhs->node.loc; struct hlsl_type *type = lhs->node.data_type; struct hlsl_ir_var *var = lhs->src.var; - size_t i; + struct hlsl_ir_node *c; + unsigned int i;
- for (i = 0; i < type->e.record.field_count; ++i) + if (type->class == HLSL_CLASS_ARRAY || type->class == HLSL_CLASS_STRUCT) { - const struct hlsl_struct_field *field = &type->e.record.fields[i]; - struct hlsl_ir_load *field_load; - struct hlsl_ir_constant *c; + struct hlsl_ir_load *element_load; + struct hlsl_struct_field *field; + uint32_t elem_semantic_index;
- if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; - list_add_after(&lhs->node.entry, &c->node.entry); + for (i = 0; i < hlsl_type_element_count(type); ++i) + { + if (type->class == HLSL_CLASS_ARRAY) + { + elem_semantic_index = semantic_index + + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; + } + else + { + field = &type->e.record.fields[i]; + if (hlsl_type_is_resource(field->type)) + continue; + validate_field_semantic(ctx, field); + semantic = &field->semantic; + elem_semantic_index = semantic->index; + loc = &field->loc; + }
- /* This redundant load is expected to be deleted later by DCE. */ - if (!(field_load = hlsl_new_load_index(ctx, &lhs->src, &c->node, &var->loc))) - return; - list_add_after(&c->node.entry, &field_load->node.entry); + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; + list_add_after(&lhs->node.entry, &c->entry);
- if (field->type->type == HLSL_CLASS_STRUCT) - prepend_input_struct_copy(ctx, instrs, field_load); - else if (field->semantic.name) - prepend_input_copy(ctx, instrs, field_load, field->storage_modifiers, &field->semantic); - else - hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, - "Field '%s' is missing a semantic.", field->name); + /* This redundant load is expected to be deleted later by DCE. */ + if (!(element_load = hlsl_new_load_index(ctx, &lhs->src, c, loc))) + return; + list_add_after(&c->entry, &element_load->node.entry); + + prepend_input_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); + } + } + else + { + prepend_input_copy(ctx, instrs, lhs, modifiers, semantic, semantic_index); } }
@@ -341,45 +421,51 @@ static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct list *instrs, st struct hlsl_ir_load *load;
/* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, var->loc))) + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; list_add_head(instrs, &load->node.entry);
- if (var->data_type->type == HLSL_CLASS_STRUCT) - prepend_input_struct_copy(ctx, instrs, load); - else if (var->semantic.name) - prepend_input_copy(ctx, instrs, load, var->storage_modifiers, &var->semantic); + prepend_input_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); }
static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, - unsigned int modifiers, const struct hlsl_semantic *semantic) + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct hlsl_type *type = rhs->node.data_type, *vector_type; + struct vkd3d_shader_location *loc = &rhs->node.loc; struct hlsl_ir_var *var = rhs->src.var; + struct hlsl_ir_node *c; unsigned int i;
+ if (type->class > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; + if (!(string = hlsl_type_to_string(ctx, type))) + return; + hlsl_fixme(ctx, &var->loc, "Output semantics for type %s.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } + if (!semantic->name) + return; + vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type));
for (i = 0; i < hlsl_type_major_size(type); ++i) { - struct hlsl_semantic semantic_copy = *semantic; - struct hlsl_ir_store *store; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *store; struct hlsl_ir_var *output; struct hlsl_ir_load *load;
- semantic_copy.index = semantic->index + i; - - if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, &semantic_copy, true))) + if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, semantic, semantic_index + i, true, loc))) return;
- if (type->type == HLSL_CLASS_MATRIX) + if (type->class == HLSL_CLASS_MATRIX) { if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - list_add_tail(instrs, &c->node.entry); + list_add_tail(instrs, &c->entry);
- if (!(load = hlsl_new_load_index(ctx, &rhs->src, &c->node, &var->loc))) + if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) return; list_add_tail(instrs, &load->node.entry); } @@ -394,38 +480,57 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct
if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) return; - list_add_tail(instrs, &store->node.entry); + list_add_tail(instrs, &store->entry); } }
-static void append_output_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs) +static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { + struct vkd3d_shader_location *loc = &rhs->node.loc; struct hlsl_type *type = rhs->node.data_type; struct hlsl_ir_var *var = rhs->src.var; - size_t i; + struct hlsl_ir_node *c; + unsigned int i;
- for (i = 0; i < type->e.record.field_count; ++i) + if (type->class == HLSL_CLASS_ARRAY || type->class == HLSL_CLASS_STRUCT) { - const struct hlsl_struct_field *field = &type->e.record.fields[i]; - struct hlsl_ir_load *field_load; - struct hlsl_ir_constant *c; + struct hlsl_ir_load *element_load; + struct hlsl_struct_field *field; + uint32_t elem_semantic_index;
- if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; - list_add_tail(instrs, &c->node.entry); + for (i = 0; i < hlsl_type_element_count(type); ++i) + { + if (type->class == HLSL_CLASS_ARRAY) + { + elem_semantic_index = semantic_index + + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; + } + else + { + field = &type->e.record.fields[i]; + if (hlsl_type_is_resource(field->type)) + continue; + validate_field_semantic(ctx, field); + semantic = &field->semantic; + elem_semantic_index = semantic->index; + loc = &field->loc; + }
- /* This redundant load is expected to be deleted later by DCE. */ - if (!(field_load = hlsl_new_load_index(ctx, &rhs->src, &c->node, &var->loc))) - return; - list_add_tail(instrs, &field_load->node.entry); + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; + list_add_tail(instrs, &c->entry);
- if (field->type->type == HLSL_CLASS_STRUCT) - append_output_struct_copy(ctx, instrs, field_load); - else if (field->semantic.name) - append_output_copy(ctx, instrs, field_load, field->storage_modifiers, &field->semantic); - else - hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, - "Field '%s' is missing a semantic.", field->name); + if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) + return; + list_add_tail(instrs, &element_load->node.entry); + + append_output_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); + } + } + else + { + append_output_copy(ctx, instrs, rhs, modifiers, semantic, semantic_index); } }
@@ -437,17 +542,14 @@ static void append_output_var_copy(struct hlsl_ctx *ctx, struct list *instrs, st struct hlsl_ir_load *load;
/* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, var->loc))) + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; list_add_tail(instrs, &load->node.entry);
- if (var->data_type->type == HLSL_CLASS_STRUCT) - append_output_struct_copy(ctx, instrs, load); - else if (var->semantic.name) - append_output_copy(ctx, instrs, load, var->storage_modifiers, &var->semantic); + append_output_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); }
-static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), +bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), struct hlsl_block *block, void *context) { struct hlsl_ir_node *instr, *next; @@ -459,11 +561,11 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx { struct hlsl_ir_if *iff = hlsl_ir_if(instr);
- progress |= transform_ir(ctx, func, &iff->then_instrs, context); - progress |= transform_ir(ctx, func, &iff->else_instrs, context); + progress |= hlsl_transform_ir(ctx, func, &iff->then_block, context); + progress |= hlsl_transform_ir(ctx, func, &iff->else_block, context); } else if (instr->type == HLSL_IR_LOOP) - progress |= transform_ir(ctx, func, &hlsl_ir_loop(instr)->body, context); + progress |= hlsl_transform_ir(ctx, func, &hlsl_ir_loop(instr)->body, context);
progress |= func(ctx, instr, context); } @@ -471,6 +573,44 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx return progress; }
+static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + bool res; + bool (*func)(struct hlsl_ctx *ctx, struct hlsl_deref *, struct hlsl_ir_node *) = context; + + switch(instr->type) + { + case HLSL_IR_LOAD: + res = func(ctx, &hlsl_ir_load(instr)->src, instr); + return res; + + case HLSL_IR_STORE: + res = func(ctx, &hlsl_ir_store(instr)->lhs, instr); + return res; + + case HLSL_IR_RESOURCE_LOAD: + res = func(ctx, &hlsl_ir_resource_load(instr)->resource, instr); + if (hlsl_ir_resource_load(instr)->sampler.var) + res |= func(ctx, &hlsl_ir_resource_load(instr)->sampler, instr); + return res; + + case HLSL_IR_RESOURCE_STORE: + res = func(ctx, &hlsl_ir_resource_store(instr)->resource, instr); + return res; + + default: + return false; + } + return false; +} + +static bool transform_derefs(struct hlsl_ctx *ctx, + bool (*func)(struct hlsl_ctx *ctx, struct hlsl_deref *, struct hlsl_ir_node *), + struct hlsl_block *block) +{ + return hlsl_transform_ir(ctx, transform_instr_derefs, block, func); +} + struct recursive_call_ctx { const struct hlsl_ir_function_decl **backtrace; @@ -506,7 +646,7 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst return false; call_ctx->backtrace[call_ctx->count++] = decl;
- transform_ir(ctx, find_recursive_calls, &decl->body, call_ctx); + hlsl_transform_ir(ctx, find_recursive_calls, &decl->body, call_ctx);
--call_ctx->count;
@@ -516,21 +656,23 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst static void insert_early_return_break(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_node *cf_instr) { - struct hlsl_ir_jump *jump; + struct hlsl_ir_node *iff, *jump; + struct hlsl_block then_block; struct hlsl_ir_load *load; - struct hlsl_ir_if *iff;
- if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) + hlsl_block_init(&then_block); + + if (!(load = hlsl_new_var_load(ctx, func->early_return_var, &cf_instr->loc))) return; list_add_after(&cf_instr->entry, &load->node.entry);
- if (!(iff = hlsl_new_if(ctx, &load->node, cf_instr->loc))) + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &cf_instr->loc))) return; - list_add_after(&load->node.entry, &iff->node.entry); + hlsl_block_add_instr(&then_block, jump);
- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, cf_instr->loc))) + if (!(iff = hlsl_new_if(ctx, &load->node, &then_block, NULL, &cf_instr->loc))) return; - list_add_tail(&iff->then_instrs.instrs, &jump->node.entry); + list_add_after(&load->node.entry, &iff->entry); }
/* Remove HLSL_IR_JUMP_RETURN calls by altering subsequent control flow. */ @@ -566,7 +708,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun * the CF instruction, shove it into an if block, and then lower that if * block. * - * (We could return a "did we make progress" boolean like transform_ir() + * (We could return a "did we make progress" boolean like hlsl_transform_ir() * and run this pass multiple times, but we already know the only block * that still needs to be addressed, so there's not much point.) * @@ -591,8 +733,8 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun { struct hlsl_ir_if *iff = hlsl_ir_if(instr);
- has_early_return |= lower_return(ctx, func, &iff->then_instrs, in_loop); - has_early_return |= lower_return(ctx, func, &iff->else_instrs, in_loop); + has_early_return |= lower_return(ctx, func, &iff->then_block, in_loop); + has_early_return |= lower_return(ctx, func, &iff->else_block, in_loop);
if (has_early_return) { @@ -628,18 +770,17 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun else if (instr->type == HLSL_IR_JUMP) { struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); - struct hlsl_ir_constant *constant; - struct hlsl_ir_store *store; + struct hlsl_ir_node *constant, *store;
if (jump->type == HLSL_IR_JUMP_RETURN) { if (!(constant = hlsl_new_bool_constant(ctx, true, &jump->node.loc))) return false; - list_add_before(&jump->node.entry, &constant->node.entry); + list_add_before(&jump->node.entry, &constant->entry);
- if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, &constant->node))) + if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, constant))) return false; - list_add_after(&constant->node.entry, &store->node.entry); + list_add_after(&constant->entry, &store->entry);
has_early_return = true; if (in_loop) @@ -675,9 +816,9 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun else if (cf_instr) { struct list *tail = list_tail(&block->instrs); + struct hlsl_ir_node *not, *iff; + struct hlsl_block then_block; struct hlsl_ir_load *load; - struct hlsl_ir_node *not; - struct hlsl_ir_if *iff;
/* If we're in a loop, we should have used "break" instead. */ assert(!in_loop); @@ -685,21 +826,21 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun if (tail == &cf_instr->entry) return has_early_return;
- if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) - return false; - list_add_tail(&block->instrs, &load->node.entry); + hlsl_block_init(&then_block); + list_move_slice_tail(&then_block.instrs, list_next(&block->instrs, &cf_instr->entry), tail); + lower_return(ctx, func, &then_block, in_loop);
- if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, cf_instr->loc))) + if (!(load = hlsl_new_var_load(ctx, func->early_return_var, &cf_instr->loc))) return false; - list_add_tail(&block->instrs, ¬->entry); + hlsl_block_add_instr(block, &load->node);
- if (!(iff = hlsl_new_if(ctx, not, cf_instr->loc))) + if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, &cf_instr->loc))) return false; - list_add_tail(&block->instrs, &iff->node.entry); - - list_move_slice_tail(&iff->then_instrs.instrs, list_next(&block->instrs, &cf_instr->entry), tail); + hlsl_block_add_instr(block, not);
- lower_return(ctx, func, &iff->then_instrs, in_loop); + if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &cf_instr->loc))) + return false; + list_add_tail(&block->instrs, &iff->entry); }
return has_early_return; @@ -721,7 +862,6 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * hlsl_error(ctx, &call->node.loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Function "%s" is not defined.", decl->func->name);
- list_init(&block.instrs); if (!hlsl_clone_block(ctx, &block, &decl->body)) return false; list_move_before(&call->node.entry, &block.instrs); @@ -731,6 +871,142 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * return true; }
+static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct hlsl_ir_node *index, + const struct vkd3d_shader_location *loc) +{ + unsigned int dim_count = index->data_type->dimx; + struct hlsl_ir_node *store, *zero; + struct hlsl_ir_load *coords_load; + struct hlsl_deref coords_deref; + struct hlsl_ir_var *coords; + + assert(dim_count < 4); + + if (!(coords = hlsl_new_synthetic_var(ctx, "coords", + hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count + 1), loc))) + return NULL; + + hlsl_init_simple_deref_from_var(&coords_deref, coords); + if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, index, (1u << dim_count) - 1, loc))) + return NULL; + list_add_after(&index->entry, &store->entry); + + if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) + return NULL; + list_add_after(&store->entry, &zero->entry); + + if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, zero, 1u << dim_count, loc))) + return NULL; + list_add_after(&zero->entry, &store->entry); + + if (!(coords_load = hlsl_new_var_load(ctx, coords, loc))) + return NULL; + list_add_after(&store->entry, &coords_load->node.entry); + + return &coords_load->node; +} + +/* hlsl_ir_index nodes are a parse-time construct used to represent array indexing and struct + * record access before knowing if they will be used in the lhs of an assignment --in which case + * they are lowered into a deref-- or as the load of an element within a larger value. + * For the latter case, this pass takes care of lowering hlsl_ir_indexes into individual + * hlsl_ir_loads, or individual hlsl_ir_resource_loads, in case the indexing is a + * resource access. */ +static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *val, *store; + struct hlsl_deref var_deref; + struct hlsl_ir_index *index; + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + + if (instr->type != HLSL_IR_INDEX) + return false; + index = hlsl_ir_index(instr); + val = index->val.node; + + if (hlsl_index_is_resource_access(index)) + { + unsigned int dim_count = hlsl_sampler_dim_count(val->data_type->sampler_dim); + struct hlsl_ir_node *coords = index->idx.node; + struct hlsl_resource_load_params params = {0}; + struct hlsl_ir_node *load; + + assert(coords->data_type->class == HLSL_CLASS_VECTOR); + assert(coords->data_type->base_type == HLSL_TYPE_UINT); + assert(coords->data_type->dimx == dim_count); + + if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) + return false; + + params.type = HLSL_RESOURCE_LOAD; + params.resource = val; + params.coords = coords; + params.format = val->data_type->e.resource_format; + + if (!(load = hlsl_new_resource_load(ctx, ¶ms, &instr->loc))) + return false; + list_add_before(&instr->entry, &load->entry); + hlsl_replace_node(instr, load); + return true; + } + + if (!(var = hlsl_new_synthetic_var(ctx, "index-val", val->data_type, &instr->loc))) + return false; + hlsl_init_simple_deref_from_var(&var_deref, var); + + if (!(store = hlsl_new_simple_store(ctx, var, val))) + return false; + list_add_before(&instr->entry, &store->entry); + + if (hlsl_index_is_noncontiguous(index)) + { + struct hlsl_ir_node *mat = index->val.node; + struct hlsl_deref row_deref; + unsigned int i; + + assert(!hlsl_type_is_row_major(mat->data_type)); + + if (!(var = hlsl_new_synthetic_var(ctx, "row", instr->data_type, &instr->loc))) + return false; + hlsl_init_simple_deref_from_var(&row_deref, var); + + for (i = 0; i < mat->data_type->dimx; ++i) + { + struct hlsl_ir_node *c; + + if (!(c = hlsl_new_uint_constant(ctx, i, &instr->loc))) + return false; + list_add_before(&instr->entry, &c->entry); + + if (!(load = hlsl_new_load_index(ctx, &var_deref, c, &instr->loc))) + return false; + list_add_before(&instr->entry, &load->node.entry); + + if (!(load = hlsl_new_load_index(ctx, &load->src, index->idx.node, &instr->loc))) + return false; + list_add_before(&instr->entry, &load->node.entry); + + if (!(store = hlsl_new_store_index(ctx, &row_deref, c, &load->node, 0, &instr->loc))) + return false; + list_add_before(&instr->entry, &store->entry); + } + + if (!(load = hlsl_new_var_load(ctx, var, &instr->loc))) + return false; + list_add_before(&instr->entry, &load->node.entry); + hlsl_replace_node(instr, &load->node); + } + else + { + if (!(load = hlsl_new_load_index(ctx, &var_deref, index->idx.node, &instr->loc))) + return false; + list_add_before(&instr->entry, &load->node.entry); + hlsl_replace_node(instr, &load->node); + } + return true; +} + /* Lower casts from vec1 to vecN to swizzles. */ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -746,26 +1022,24 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type;
- if (src_type->type <= HLSL_CLASS_VECTOR && dst_type->type <= HLSL_CLASS_VECTOR && src_type->dimx == 1) + if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->dimx == 1) { - struct hlsl_ir_node *replacement; - struct hlsl_ir_swizzle *swizzle; - struct hlsl_ir_expr *new_cast; + struct hlsl_ir_node *replacement, *new_cast, *swizzle;
dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->base_type); /* We need to preserve the cast since it might be doing more than just * turning the scalar into a vector. */ if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_scalar_type, &cast->node.loc))) return false; - list_add_after(&cast->node.entry, &new_cast->node.entry); - replacement = &new_cast->node; + list_add_after(&cast->node.entry, &new_cast->entry); + replacement = new_cast;
if (dst_type->dimx != 1) { if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), dst_type->dimx, replacement, &cast->node.loc))) return false; - list_add_after(&new_cast->node.entry, &swizzle->node.entry); - replacement = &swizzle->node; + list_add_after(&new_cast->entry, &swizzle->entry); + replacement = swizzle; }
hlsl_replace_node(&cast->node, replacement); @@ -949,9 +1223,9 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ path_node = deref->path[depth].node; subtype = hlsl_get_element_type_from_path_index(ctx, type, path_node);
- if (type->type == HLSL_CLASS_STRUCT) + if (type->class == HLSL_CLASS_STRUCT) { - unsigned int idx = hlsl_ir_constant(path_node)->value[0].u; + unsigned int idx = hlsl_ir_constant(path_node)->value.u[0].u;
for (i = 0; i < idx; ++i) comp_start += hlsl_type_component_count(type->e.record.fields[i].type); @@ -966,7 +1240,7 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ if (path_node->type == HLSL_IR_CONSTANT) { copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype, - depth + 1, hlsl_ir_constant(path_node)->value[0].u * subtype_comp_count, writemask); + depth + 1, hlsl_ir_constant(path_node)->value.u[0].u * subtype_comp_count, writemask); } else { @@ -1041,14 +1315,14 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count));
- if (instr->data_type->type != HLSL_CLASS_OBJECT) + if (instr->data_type->class != HLSL_CLASS_OBJECT) { - struct hlsl_ir_swizzle *swizzle_node; + struct hlsl_ir_node *swizzle_node;
if (!(swizzle_node = hlsl_new_swizzle(ctx, ret_swizzle, instr_component_count, new_instr, &instr->loc))) return false; - list_add_before(&instr->entry, &swizzle_node->node.entry); - new_instr = &swizzle_node->node; + list_add_before(&instr->entry, &swizzle_node->entry); + new_instr = swizzle_node; }
hlsl_replace_node(instr, new_instr); @@ -1061,9 +1335,9 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, { const unsigned int instr_component_count = hlsl_type_component_count(instr->data_type); const struct hlsl_ir_var *var = deref->var; - union hlsl_constant_value values[4] = {0}; - struct hlsl_ir_constant *cons; + struct hlsl_constant_value values = {0}; unsigned int start, count, i; + struct hlsl_ir_node *cons;
if (!hlsl_component_index_range_from_deref(ctx, deref, &start, &count)) return false; @@ -1076,21 +1350,17 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, || value->node->type != HLSL_IR_CONSTANT) return false;
- values[i] = hlsl_ir_constant(value->node)->value[value->component]; + values.u[i] = hlsl_ir_constant(value->node)->value.u[value->component]; }
- if (!(cons = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) + if (!(cons = hlsl_new_constant(ctx, instr->data_type, &values, &instr->loc))) return false; - cons->value[0] = values[0]; - cons->value[1] = values[1]; - cons->value[2] = values[2]; - cons->value[3] = values[3]; - list_add_before(&instr->entry, &cons->node.entry); + list_add_before(&instr->entry, &cons->entry);
TRACE("Load from %s[%u-%u]%s turned into a constant %p.\n", var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), cons);
- hlsl_replace_node(instr, &cons->node); + hlsl_replace_node(instr, cons); return true; }
@@ -1099,7 +1369,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, { struct hlsl_type *type = load->node.data_type;
- switch (type->type) + switch (type->class) { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: @@ -1220,7 +1490,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s { unsigned int writemask = store->writemask;
- if (store->rhs.node->data_type->type == HLSL_CLASS_OBJECT) + if (store->rhs.node->data_type->class == HLSL_CLASS_OBJECT) writemask = VKD3DSP_WRITEMASK_0; copy_propagation_set_value(var_def, start, writemask, store->rhs.node); } @@ -1270,8 +1540,8 @@ static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct { struct hlsl_ir_if *iff = hlsl_ir_if(instr);
- copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); - copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->then_block); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_block);
break; } @@ -1301,19 +1571,19 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if bool progress = false;
copy_propagation_state_init(ctx, &inner_state, state); - progress |= copy_propagation_transform_block(ctx, &iff->then_instrs, &inner_state); + progress |= copy_propagation_transform_block(ctx, &iff->then_block, &inner_state); copy_propagation_state_destroy(&inner_state);
copy_propagation_state_init(ctx, &inner_state, state); - progress |= copy_propagation_transform_block(ctx, &iff->else_instrs, &inner_state); + progress |= copy_propagation_transform_block(ctx, &iff->else_block, &inner_state); copy_propagation_state_destroy(&inner_state);
/* Ideally we'd invalidate the outer state looking at what was * touched in the two inner states, but this doesn't work for * loops (because we need to know what is invalidated in advance), * so we need copy_propagation_invalidate_from_block() anyway. */ - copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); - copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->then_block); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_block);
return progress; } @@ -1379,7 +1649,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b return progress; }
-static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) +bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) { struct copy_propagation_state state; bool progress; @@ -1471,7 +1741,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_
static bool is_vec1(const struct hlsl_type *type) { - return (type->type == HLSL_CLASS_SCALAR) || (type->type == HLSL_CLASS_VECTOR && type->dimx == 1); + return (type->class == HLSL_CLASS_SCALAR) || (type->class == HLSL_CLASS_VECTOR && type->dimx == 1); }
static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -1505,21 +1775,20 @@ static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst static bool split_copy(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, const struct hlsl_ir_load *load, const unsigned int idx, struct hlsl_type *type) { - struct hlsl_ir_store *split_store; + struct hlsl_ir_node *split_store, *c; struct hlsl_ir_load *split_load; - struct hlsl_ir_constant *c;
if (!(c = hlsl_new_uint_constant(ctx, idx, &store->node.loc))) return false; - list_add_before(&store->node.entry, &c->node.entry); + list_add_before(&store->node.entry, &c->entry);
- if (!(split_load = hlsl_new_load_index(ctx, &load->src, &c->node, &store->node.loc))) + if (!(split_load = hlsl_new_load_index(ctx, &load->src, c, &store->node.loc))) return false; list_add_before(&store->node.entry, &split_load->node.entry);
- if (!(split_store = hlsl_new_store_index(ctx, &store->lhs, &c->node, &split_load->node, 0, &store->node.loc))) + if (!(split_store = hlsl_new_store_index(ctx, &store->lhs, c, &split_load->node, 0, &store->node.loc))) return false; - list_add_before(&store->node.entry, &split_store->node.entry); + list_add_before(&store->node.entry, &split_store->entry);
return true; } @@ -1538,7 +1807,7 @@ static bool split_array_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, store = hlsl_ir_store(instr); rhs = store->rhs.node; type = rhs->data_type; - if (type->type != HLSL_CLASS_ARRAY) + if (type->class != HLSL_CLASS_ARRAY) return false; element_type = type->e.array.type;
@@ -1575,7 +1844,7 @@ static bool split_struct_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr store = hlsl_ir_store(instr); rhs = store->rhs.node; type = rhs->data_type; - if (type->type != HLSL_CLASS_STRUCT) + if (type->class != HLSL_CLASS_STRUCT) return false;
if (rhs->type != HLSL_IR_LOAD) @@ -1614,7 +1883,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr store = hlsl_ir_store(instr); rhs = store->rhs.node; type = rhs->data_type; - if (type->type != HLSL_CLASS_MATRIX) + if (type->class != HLSL_CLASS_MATRIX) return false; element_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type));
@@ -1649,22 +1918,21 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type;
- if (src_type->type <= HLSL_CLASS_VECTOR && dst_type->type <= HLSL_CLASS_VECTOR && dst_type->dimx < src_type->dimx) + if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && dst_type->dimx < src_type->dimx) { - struct hlsl_ir_swizzle *swizzle; - struct hlsl_ir_expr *new_cast; + struct hlsl_ir_node *new_cast, *swizzle;
dst_vector_type = hlsl_get_vector_type(ctx, dst_type->base_type, src_type->dimx); /* We need to preserve the cast since it might be doing more than just * narrowing the vector. */ if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_vector_type, &cast->node.loc))) return false; - list_add_after(&cast->node.entry, &new_cast->node.entry); - if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dst_type->dimx, &new_cast->node, &cast->node.loc))) + list_add_after(&cast->node.entry, &new_cast->entry); + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dst_type->dimx, new_cast, &cast->node.loc))) return false; - list_add_after(&new_cast->node.entry, &swizzle->node.entry); + list_add_after(&new_cast->entry, &swizzle->entry);
- hlsl_replace_node(&cast->node, &swizzle->node); + hlsl_replace_node(&cast->node, swizzle); return true; }
@@ -1684,8 +1952,7 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
if (next_instr->type == HLSL_IR_SWIZZLE) { - struct hlsl_ir_swizzle *new_swizzle; - struct hlsl_ir_node *new_instr; + struct hlsl_ir_node *new_swizzle; unsigned int combined_swizzle;
combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->swizzle, @@ -1695,9 +1962,8 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc))) return false;
- new_instr = &new_swizzle->node; - list_add_before(&instr->entry, &new_instr->entry); - hlsl_replace_node(instr, new_instr); + list_add_before(&instr->entry, &new_swizzle->entry); + hlsl_replace_node(instr, new_swizzle); return true; }
@@ -1725,6 +1991,81 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i return true; }
+static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *idx; + struct hlsl_deref *deref; + struct hlsl_type *type; + unsigned int i; + + if (instr->type != HLSL_IR_LOAD) + return false; + + deref = &hlsl_ir_load(instr)->src; + assert(deref->var); + + if (deref->path_len == 0) + return false; + + type = deref->var->data_type; + for (i = 0; i < deref->path_len - 1; ++i) + type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node); + + idx = deref->path[deref->path_len - 1].node; + + if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT) + { + struct hlsl_ir_node *eq, *swizzle, *dot, *c, *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_constant_value value; + struct hlsl_ir_load *vector_load; + enum hlsl_ir_expr_op op; + + if (!(vector_load = hlsl_new_load_parent(ctx, deref, &instr->loc))) + return false; + list_add_before(&instr->entry, &vector_load->node.entry); + + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), type->dimx, idx, &instr->loc))) + return false; + list_add_before(&instr->entry, &swizzle->entry); + + value.u[0].u = 0; + value.u[1].u = 1; + value.u[2].u = 2; + value.u[3].u = 3; + if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, type->dimx), &value, &instr->loc))) + return false; + list_add_before(&instr->entry, &c->entry); + + operands[0] = swizzle; + operands[1] = c; + if (!(eq = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, + hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, type->dimx), &instr->loc))) + return false; + list_add_before(&instr->entry, &eq->entry); + + if (!(eq = hlsl_new_cast(ctx, eq, type, &instr->loc))) + return false; + list_add_before(&instr->entry, &eq->entry); + + op = HLSL_OP2_DOT; + if (type->dimx == 1) + op = type->base_type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; + + /* Note: We may be creating a DOT for bool vectors here, which we need to lower to + * LOGIC_OR + LOGIC_AND. */ + operands[0] = &vector_load->node; + operands[1] = eq; + if (!(dot = hlsl_new_expr(ctx, op, operands, instr->data_type, &instr->loc))) + return false; + list_add_before(&instr->entry, &dot->entry); + hlsl_replace_node(instr, dot); + + return true; + } + + return false; +} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -1737,7 +2078,7 @@ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, voi if (expr->op != HLSL_OP2_DIV) return false;
- if (!(rcp = hlsl_new_unary_expr(ctx, HLSL_OP1_RCP, expr->operands[1].node, instr->loc))) + if (!(rcp = hlsl_new_unary_expr(ctx, HLSL_OP1_RCP, expr->operands[1].node, &instr->loc))) return false; list_add_before(&expr->node.entry, &rcp->entry); expr->op = HLSL_OP2_MUL; @@ -1758,7 +2099,7 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *c if (expr->op != HLSL_OP1_SQRT) return false;
- if (!(rsq = hlsl_new_unary_expr(ctx, HLSL_OP1_RSQ, expr->operands[0].node, instr->loc))) + if (!(rsq = hlsl_new_unary_expr(ctx, HLSL_OP1_RSQ, expr->operands[0].node, &instr->loc))) return false; list_add_before(&expr->node.entry, &rsq->entry); expr->op = HLSL_OP1_RCP; @@ -1770,9 +2111,7 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *c /* Lower DP2 to MUL + ADD */ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_node *arg1, *arg2, *mul, *replacement; - struct hlsl_ir_swizzle *add_x, *add_y; - struct hlsl_ir_constant *zero; + struct hlsl_ir_node *arg1, *arg2, *mul, *replacement, *zero, *add_x, *add_y; struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) @@ -1791,11 +2130,11 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co
if (!(zero = hlsl_new_float_constant(ctx, 0.0f, &expr->node.loc))) return false; - list_add_before(&instr->entry, &zero->node.entry); + list_add_before(&instr->entry, &zero->entry);
operands[0] = arg1; operands[1] = arg2; - operands[2] = &zero->node; + operands[2] = zero;
if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_DP2ADD, operands, instr->data_type, &expr->node.loc))) return false; @@ -1808,13 +2147,13 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co
if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), instr->data_type->dimx, mul, &expr->node.loc))) return false; - list_add_before(&instr->entry, &add_x->node.entry); + list_add_before(&instr->entry, &add_x->entry);
if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y), instr->data_type->dimx, mul, &expr->node.loc))) return false; - list_add_before(&instr->entry, &add_y->node.entry); + list_add_before(&instr->entry, &add_y->entry);
- if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, &add_x->node, &add_y->node))) + if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, add_x, add_y))) return false; } list_add_before(&instr->entry, &replacement->entry); @@ -1836,7 +2175,7 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co if (expr->op != HLSL_OP1_ABS) return false;
- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, instr->loc))) + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, &instr->loc))) return false; list_add_before(&instr->entry, &neg->entry);
@@ -1848,77 +2187,124 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co return true; }
-static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +/* Lower ROUND using FRC, ROUND(x) -> ((x + 0.5) - FRC(x + 0.5)). */ +static bool lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_type *type = instr->data_type, *arg_type; - struct hlsl_ir_constant *zero; + struct hlsl_ir_node *arg, *neg, *sum, *frc, *half, *replacement; + struct hlsl_type *type = instr->data_type; + struct hlsl_constant_value half_value; + unsigned int i, component_count; struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) return false; + expr = hlsl_ir_expr(instr); - if (expr->op != HLSL_OP1_CAST) - return false; - arg_type = expr->operands[0].node->data_type; - if (type->type > HLSL_CLASS_VECTOR || arg_type->type > HLSL_CLASS_VECTOR) + arg = expr->operands[0].node; + if (expr->op != HLSL_OP1_ROUND) return false; - if (type->base_type != HLSL_TYPE_BOOL) + + component_count = hlsl_type_component_count(type); + for (i = 0; i < component_count; ++i) + half_value.u[i].f = 0.5f; + if (!(half = hlsl_new_constant(ctx, type, &half_value, &expr->node.loc))) return false;
- /* Narrowing casts should have already been lowered. */ - assert(type->dimx == arg_type->dimx); + list_add_before(&instr->entry, &half->entry);
- zero = hlsl_new_constant(ctx, arg_type, &instr->loc); - if (!zero) + if (!(sum = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, half))) return false; - list_add_before(&instr->entry, &zero->node.entry); + list_add_before(&instr->entry, &sum->entry);
- expr->op = HLSL_OP2_NEQUAL; - hlsl_src_from_node(&expr->operands[1], &zero->node); + if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, sum, &instr->loc))) + return false; + list_add_before(&instr->entry, &frc->entry);
+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, frc, &instr->loc))) + return false; + list_add_before(&instr->entry, &neg->entry); + + if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, sum, neg))) + return false; + list_add_before(&instr->entry, &replacement->entry); + + hlsl_replace_node(instr, replacement); return true; }
-struct hlsl_ir_load *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, +static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_type *type = instr->data_type, *arg_type; + static const struct hlsl_constant_value zero_value; + struct hlsl_ir_node *zero; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP1_CAST) + return false; + arg_type = expr->operands[0].node->data_type; + if (type->class > HLSL_CLASS_VECTOR || arg_type->class > HLSL_CLASS_VECTOR) + return false; + if (type->base_type != HLSL_TYPE_BOOL) + return false; + + /* Narrowing casts should have already been lowered. */ + assert(type->dimx == arg_type->dimx); + + zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc); + if (!zero) + return false; + list_add_before(&instr->entry, &zero->entry); + + expr->op = HLSL_OP2_NEQUAL; + hlsl_src_from_node(&expr->operands[1], zero); + + return true; +} + +struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) { - struct hlsl_ir_store *store; + struct hlsl_block then_block, else_block; + struct hlsl_ir_node *iff, *store; struct hlsl_ir_load *load; struct hlsl_ir_var *var; - struct hlsl_ir_if *iff;
assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type));
if (!(var = hlsl_new_synthetic_var(ctx, "conditional", if_true->data_type, &condition->loc))) return NULL;
- if (!(iff = hlsl_new_if(ctx, condition, condition->loc))) - return NULL; - list_add_tail(instrs, &iff->node.entry); + hlsl_block_init(&then_block); + hlsl_block_init(&else_block);
if (!(store = hlsl_new_simple_store(ctx, var, if_true))) return NULL; - list_add_tail(&iff->then_instrs.instrs, &store->node.entry); + hlsl_block_add_instr(&then_block, store);
if (!(store = hlsl_new_simple_store(ctx, var, if_false))) return NULL; - list_add_tail(&iff->else_instrs.instrs, &store->node.entry); + hlsl_block_add_instr(&else_block, store);
- if (!(load = hlsl_new_var_load(ctx, var, condition->loc))) + if (!(iff = hlsl_new_if(ctx, condition, &then_block, &else_block, &condition->loc))) + return NULL; + list_add_tail(instrs, &iff->entry); + + if (!(load = hlsl_new_var_load(ctx, var, &condition->loc))) return NULL; list_add_tail(instrs, &load->node.entry);
- return load; + return &load->node; }
static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg; + struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; struct hlsl_type *type = instr->data_type, *utype; - struct hlsl_ir_expr *cast1, *cast2, *cast3; - struct hlsl_ir_constant *high_bit; + struct hlsl_constant_value high_bit_value; struct hlsl_ir_expr *expr; - struct hlsl_ir_load *cond; unsigned int i;
if (instr->type != HLSL_IR_EXPR) @@ -1928,69 +2314,67 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_DIV) return false; - if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; if (type->base_type != HLSL_TYPE_INT) return false; - utype = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_UINT, type->dimx, type->dimy); + utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy);
if (!(xor = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_XOR, arg1, arg2))) return false; list_add_before(&instr->entry, &xor->entry);
- if (!(high_bit = hlsl_new_constant(ctx, type, &instr->loc))) - return false; for (i = 0; i < type->dimx; ++i) - high_bit->value[i].u = 0x80000000; - list_add_before(&instr->entry, &high_bit->node.entry); + high_bit_value.u[i].u = 0x80000000; + if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) + return false; + list_add_before(&instr->entry, &high_bit->entry);
- if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, &high_bit->node))) + if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, high_bit))) return false; list_add_before(&instr->entry, &and->entry);
- if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, instr->loc))) + if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) return false; list_add_before(&instr->entry, &abs1->entry);
if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast1->node.entry); + list_add_before(&instr->entry, &cast1->entry);
- if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, instr->loc))) + if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) return false; list_add_before(&instr->entry, &abs2->entry);
if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast2->node.entry); + list_add_before(&instr->entry, &cast2->entry);
- if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, &cast1->node, &cast2->node))) + if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, cast1, cast2))) return false; list_add_before(&instr->entry, &div->entry);
if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) return false; - list_add_before(&instr->entry, &cast3->node.entry); + list_add_before(&instr->entry, &cast3->entry);
- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, &cast3->node, instr->loc))) + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) return false; list_add_before(&instr->entry, &neg->entry);
- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, &cast3->node))) + if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) return false; - hlsl_replace_node(instr, &cond->node); + hlsl_replace_node(instr, cond);
return true; }
static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg; + struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; struct hlsl_type *type = instr->data_type, *utype; - struct hlsl_ir_expr *cast1, *cast2, *cast3; - struct hlsl_ir_constant *high_bit; + struct hlsl_constant_value high_bit_value; struct hlsl_ir_expr *expr; - struct hlsl_ir_load *cond; unsigned int i;
if (instr->type != HLSL_IR_EXPR) @@ -2000,53 +2384,53 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_MOD) return false; - if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; if (type->base_type != HLSL_TYPE_INT) return false; - utype = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_UINT, type->dimx, type->dimy); + utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy);
- if (!(high_bit = hlsl_new_constant(ctx, type, &instr->loc))) - return false; for (i = 0; i < type->dimx; ++i) - high_bit->value[i].u = 0x80000000; - list_add_before(&instr->entry, &high_bit->node.entry); + high_bit_value.u[i].u = 0x80000000; + if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) + return false; + list_add_before(&instr->entry, &high_bit->entry);
- if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, &high_bit->node))) + if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, high_bit))) return false; list_add_before(&instr->entry, &and->entry);
- if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, instr->loc))) + if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) return false; list_add_before(&instr->entry, &abs1->entry);
if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast1->node.entry); + list_add_before(&instr->entry, &cast1->entry);
- if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, instr->loc))) + if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) return false; list_add_before(&instr->entry, &abs2->entry);
if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast2->node.entry); + list_add_before(&instr->entry, &cast2->entry);
- if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, &cast1->node, &cast2->node))) + if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, cast1, cast2))) return false; list_add_before(&instr->entry, &div->entry);
if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) return false; - list_add_before(&instr->entry, &cast3->node.entry); + list_add_before(&instr->entry, &cast3->entry);
- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, &cast3->node, instr->loc))) + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) return false; list_add_before(&instr->entry, &neg->entry);
- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, &cast3->node))) + if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) return false; - hlsl_replace_node(instr, &cond->node); + hlsl_replace_node(instr, cond);
return true; } @@ -2063,14 +2447,14 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void
if (expr->op != HLSL_OP1_ABS) return false; - if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; if (type->base_type != HLSL_TYPE_INT) return false;
arg = expr->operands[0].node;
- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, instr->loc))) + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, &instr->loc))) return false; list_add_before(&instr->entry, &neg->entry);
@@ -2080,12 +2464,63 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void return true; }
+static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *arg1, *arg2, *mult, *comps[4] = {0}, *res; + struct hlsl_type *type = instr->data_type; + struct hlsl_ir_expr *expr; + unsigned int i, dimx; + bool is_bool; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + + if (expr->op != HLSL_OP2_DOT) + return false; + + if (type->base_type == HLSL_TYPE_INT || type->base_type == HLSL_TYPE_UINT + || type->base_type == HLSL_TYPE_BOOL) + { + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + assert(arg1->data_type->dimx == arg2->data_type->dimx); + dimx = arg1->data_type->dimx; + is_bool = type->base_type == HLSL_TYPE_BOOL; + + if (!(mult = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL, arg1, arg2))) + return false; + list_add_before(&instr->entry, &mult->entry); + + for (i = 0; i < dimx; ++i) + { + unsigned int s = hlsl_swizzle_from_writemask(1 << i); + + if (!(comps[i] = hlsl_new_swizzle(ctx, s, 1, mult, &instr->loc))) + return false; + list_add_before(&instr->entry, &comps[i]->entry); + } + + res = comps[0]; + for (i = 1; i < dimx; ++i) + { + if (!(res = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_OR : HLSL_OP2_ADD, res, comps[i]))) + return false; + list_add_before(&instr->entry, &res->entry); + } + + hlsl_replace_node(instr, res); + return true; + } + + return false; +} + static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc; + struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one; struct hlsl_type *type = instr->data_type, *btype; - struct hlsl_ir_constant *one; - struct hlsl_ir_load *cond; + struct hlsl_constant_value one_value; struct hlsl_ir_expr *expr; unsigned int i;
@@ -2096,17 +2531,17 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_MOD) return false; - if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; if (type->base_type != HLSL_TYPE_FLOAT) return false; - btype = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_BOOL, type->dimx, type->dimy); + btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy);
if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) return false; list_add_before(&instr->entry, &mul1->entry);
- if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, instr->loc))) + if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, &instr->loc))) return false; list_add_before(&instr->entry, &neg1->entry);
@@ -2115,20 +2550,20 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr ge->data_type = btype; list_add_before(&instr->entry, &ge->entry);
- if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, instr->loc))) + if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, &instr->loc))) return false; list_add_before(&instr->entry, &neg2->entry);
if (!(cond = hlsl_add_conditional(ctx, &instr->entry, ge, arg2, neg2))) return false;
- if (!(one = hlsl_new_constant(ctx, type, &instr->loc))) - return false; for (i = 0; i < type->dimx; ++i) - one->value[i].f = 1.0f; - list_add_before(&instr->entry, &one->node.entry); + one_value.u[i].f = 1.0f; + if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) + return false; + list_add_before(&instr->entry, &one->entry);
- if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, &one->node, &cond->node))) + if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, one, cond))) return false; list_add_before(&instr->entry, &div->entry);
@@ -2136,7 +2571,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return false; list_add_before(&instr->entry, &mul2->entry);
- if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, instr->loc))) + if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, &instr->loc))) return false; list_add_before(&instr->entry, &frc->entry);
@@ -2144,7 +2579,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr hlsl_src_remove(&expr->operands[0]); hlsl_src_remove(&expr->operands[1]); hlsl_src_from_node(&expr->operands[0], frc); - hlsl_src_from_node(&expr->operands[1], &cond->node); + hlsl_src_from_node(&expr->operands[1], cond);
return true; } @@ -2155,6 +2590,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { case HLSL_IR_CONSTANT: case HLSL_IR_EXPR: + case HLSL_IR_INDEX: case HLSL_IR_LOAD: case HLSL_IR_RESOURCE_LOAD: case HLSL_IR_SWIZZLE: @@ -2204,8 +2640,8 @@ static unsigned int index_instructions(struct hlsl_block *block, unsigned int in if (instr->type == HLSL_IR_IF) { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - index = index_instructions(&iff->then_instrs, index); - index = index_instructions(&iff->else_instrs, index); + index = index_instructions(&iff->then_block, index); + index = index_instructions(&iff->else_block, index); } else if (instr->type == HLSL_IR_LOOP) { @@ -2262,9 +2698,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) continue; regset = hlsl_type_get_regset(var->data_type);
- if (var->reg_reservation.type) + if (var->reg_reservation.reg_type && var->regs[regset].bind_count) { - if (var->reg_reservation.type != get_regset_name(regset)) + if (var->reg_reservation.reg_type != get_regset_name(regset)) { struct vkd3d_string_buffer *type_string;
@@ -2277,8 +2713,10 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) else { var->regs[regset].allocated = true; - var->regs[regset].id = var->reg_reservation.index; - TRACE("Allocated reserved %s to %c%u.\n", var->name, var->reg_reservation.type, var->reg_reservation.index); + var->regs[regset].id = var->reg_reservation.reg_index; + TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, + var->reg_reservation.reg_index, var->reg_reservation.reg_type, + var->reg_reservation.reg_index + var->regs[regset].bind_count); } } } @@ -2286,9 +2724,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx)
/* Compute the earliest and latest liveness for each variable. In the case that * a variable is accessed inside of a loop, we promote its liveness to extend - * to at least the range of the entire loop. Note that we don't need to do this - * for anonymous nodes, since there's currently no way to use a node which was - * calculated in an earlier iteration of the loop. */ + * to at least the range of the entire loop. We also do this for nodes, so that + * nodes produced before the loop have their temp register protected from being + * overridden after the last read within an iteration. */ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop_first, unsigned int loop_last) { struct hlsl_ir_node *instr; @@ -2296,7 +2734,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { - const unsigned int var_last_read = loop_last ? max(instr->index, loop_last) : instr->index; + const unsigned int last_read = loop_last ? max(instr->index, loop_last) : instr->index;
switch (instr->type) { @@ -2311,9 +2749,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = store->lhs.var; if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; - store->rhs.node->last_read = instr->index; + store->rhs.node->last_read = last_read; if (store->lhs.offset.node) - store->lhs.offset.node->last_read = instr->index; + store->lhs.offset.node->last_read = last_read; break; } case HLSL_IR_EXPR: @@ -2322,16 +2760,16 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop unsigned int i;
for (i = 0; i < ARRAY_SIZE(expr->operands) && expr->operands[i].node; ++i) - expr->operands[i].node->last_read = instr->index; + expr->operands[i].node->last_read = last_read; break; } case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr);
- compute_liveness_recurse(&iff->then_instrs, loop_first, loop_last); - compute_liveness_recurse(&iff->else_instrs, loop_first, loop_last); - iff->condition.node->last_read = instr->index; + compute_liveness_recurse(&iff->then_block, loop_first, loop_last); + compute_liveness_recurse(&iff->else_block, loop_first, loop_last); + iff->condition.node->last_read = last_read; break; } case HLSL_IR_LOAD: @@ -2339,9 +2777,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_load *load = hlsl_ir_load(instr);
var = load->src.var; - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (load->src.offset.node) - load->src.offset.node->last_read = instr->index; + load->src.offset.node->last_read = last_read; break; } case HLSL_IR_LOOP: @@ -2357,22 +2795,30 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr);
var = load->resource.var; - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (load->resource.offset.node) - load->resource.offset.node->last_read = instr->index; + load->resource.offset.node->last_read = last_read;
if ((var = load->sampler.var)) { - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (load->sampler.offset.node) - load->sampler.offset.node->last_read = instr->index; + load->sampler.offset.node->last_read = last_read; }
- load->coords.node->last_read = instr->index; + load->coords.node->last_read = last_read; if (load->texel_offset.node) - load->texel_offset.node->last_read = instr->index; + load->texel_offset.node->last_read = last_read; if (load->lod.node) - load->lod.node->last_read = instr->index; + load->lod.node->last_read = last_read; + if (load->ddx.node) + load->ddx.node->last_read = last_read; + if (load->ddy.node) + load->ddy.node->last_read = last_read; + if (load->sample_index.node) + load->sample_index.node->last_read = last_read; + if (load->cmp.node) + load->cmp.node->last_read = last_read; break; } case HLSL_IR_RESOURCE_STORE: @@ -2380,18 +2826,26 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr);
var = store->resource.var; - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (store->resource.offset.node) - store->resource.offset.node->last_read = instr->index; - store->coords.node->last_read = instr->index; - store->value.node->last_read = instr->index; + store->resource.offset.node->last_read = last_read; + store->coords.node->last_read = last_read; + store->value.node->last_read = last_read; break; } case HLSL_IR_SWIZZLE: { struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr);
- swizzle->val.node->last_read = instr->index; + swizzle->val.node->last_read = last_read; + break; + } + case HLSL_IR_INDEX: + { + struct hlsl_ir_index *index = hlsl_ir_index(instr); + + index->val.node->last_read = last_read; + index->idx.node->last_read = last_read; break; } case HLSL_IR_CONSTANT: @@ -2426,127 +2880,142 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl compute_liveness_recurse(&entry_func->body, 0, 0); }
-struct liveness +struct register_allocator { - size_t size; - uint32_t reg_count; - struct + size_t count, capacity; + + /* Highest register index that has been allocated. + * Used to declare sm4 temp count. */ + uint32_t max_reg; + + struct allocation { - /* 0 if not live yet. */ - unsigned int last_read; - } *regs; + uint32_t reg; + unsigned int writemask; + unsigned int first_write, last_read; + } *allocations; };
-static unsigned int get_available_writemask(struct liveness *liveness, - unsigned int first_write, unsigned int component_idx, unsigned int reg_size) +static unsigned int get_available_writemask(const struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, uint32_t reg_idx) { - unsigned int i, writemask = 0, count = 0; + unsigned int writemask = VKD3DSP_WRITEMASK_ALL; + size_t i;
- for (i = 0; i < 4; ++i) + for (i = 0; i < allocator->count; ++i) { - if (liveness->regs[component_idx + i].last_read <= first_write) - { - writemask |= 1u << i; - if (++count == reg_size) - return writemask; - } + const struct allocation *allocation = &allocator->allocations[i]; + + /* We do not overlap if first write == last read: + * this is the case where we are allocating the result of that + * expression, e.g. "add r0, r0, r1". */ + + if (allocation->reg == reg_idx + && first_write < allocation->last_read && last_read > allocation->first_write) + writemask &= ~allocation->writemask; + + if (!writemask) + break; }
- return 0; + return writemask; }
-static bool resize_liveness(struct hlsl_ctx *ctx, struct liveness *liveness, size_t new_count) +static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, + uint32_t reg_idx, unsigned int writemask, unsigned int first_write, unsigned int last_read) { - size_t old_capacity = liveness->size; + struct allocation *allocation;
- if (!hlsl_array_reserve(ctx, (void **)&liveness->regs, &liveness->size, new_count, sizeof(*liveness->regs))) - return false; + if (!hlsl_array_reserve(ctx, (void **)&allocator->allocations, &allocator->capacity, + allocator->count + 1, sizeof(*allocator->allocations))) + return;
- if (liveness->size > old_capacity) - memset(liveness->regs + old_capacity, 0, (liveness->size - old_capacity) * sizeof(*liveness->regs)); - return true; + allocation = &allocator->allocations[allocator->count++]; + allocation->reg = reg_idx; + allocation->writemask = writemask; + allocation->first_write = first_write; + allocation->last_read = last_read; + + allocator->max_reg = max(allocator->max_reg, reg_idx); }
/* reg_size is the number of register components to be reserved, while component_count is the number * of components for the register's writemask. In SM1, floats and vectors allocate the whole * register, even if they don't use it completely. */ -static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct liveness *liveness, +static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, unsigned int reg_size, unsigned int component_count) { - unsigned int component_idx, writemask, i; struct hlsl_reg ret = {0}; + unsigned int writemask; + uint32_t reg_idx;
assert(component_count <= reg_size);
- for (component_idx = 0; component_idx < liveness->size; component_idx += 4) + for (reg_idx = 0;; ++reg_idx) { - if ((writemask = get_available_writemask(liveness, first_write, component_idx, reg_size))) + writemask = get_available_writemask(allocator, first_write, last_read, reg_idx); + + if (vkd3d_popcount(writemask) >= reg_size) + { + writemask = hlsl_combine_writemasks(writemask, (1u << reg_size) - 1); break; + } } - if (component_idx == liveness->size) - { - if (!resize_liveness(ctx, liveness, component_idx + 4)) - return ret; - writemask = (1u << reg_size) - 1; - } - for (i = 0; i < 4; ++i) - { - if (writemask & (1u << i)) - liveness->regs[component_idx + i].last_read = last_read; - } - ret.id = component_idx / 4; + + record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read); + + ret.id = reg_idx; + ret.bind_count = 1; ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); ret.allocated = true; - liveness->reg_count = max(liveness->reg_count, ret.id + 1); return ret; }
-static bool is_range_available(struct liveness *liveness, unsigned int first_write, - unsigned int component_idx, unsigned int reg_size) +static bool is_range_available(const struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size) { - unsigned int i; + uint32_t i;
- for (i = 0; i < reg_size; i += 4) + for (i = 0; i < (reg_size / 4); ++i) { - if (!get_available_writemask(liveness, first_write, component_idx + i, 4)) + if (get_available_writemask(allocator, first_write, last_read, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) return false; } return true; }
-static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct liveness *liveness, +static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, unsigned int reg_size) { - unsigned int i, component_idx; struct hlsl_reg ret = {0}; + uint32_t reg_idx; + unsigned int i;
- for (component_idx = 0; component_idx < liveness->size; component_idx += 4) + for (reg_idx = 0;; ++reg_idx) { - if (is_range_available(liveness, first_write, component_idx, - min(reg_size, liveness->size - component_idx))) + if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size)) break; } - if (!resize_liveness(ctx, liveness, component_idx + reg_size)) - return ret;
- for (i = 0; i < reg_size; ++i) - liveness->regs[component_idx + i].last_read = last_read; - ret.id = component_idx / 4; + for (i = 0; i < reg_size / 4; ++i) + record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read); + + ret.id = reg_idx; + ret.bind_count = align(reg_size, 4) / 4; ret.allocated = true; - liveness->reg_count = max(liveness->reg_count, ret.id + align(reg_size, 4)); return ret; }
-static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, struct liveness *liveness, +static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, const struct hlsl_type *type) { unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC];
- if (type->type <= HLSL_CLASS_VECTOR) - return allocate_register(ctx, liveness, first_write, last_read, reg_size, type->dimx); + if (type->class <= HLSL_CLASS_VECTOR) + return allocate_register(ctx, allocator, first_write, last_read, reg_size, type->dimx); else - return allocate_range(ctx, liveness, first_write, last_read, reg_size); + return allocate_range(ctx, allocator, first_write, last_read, reg_size); }
static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) @@ -2565,14 +3034,99 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); }
-static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct liveness *liveness) +static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_resource_load *load; + struct hlsl_ir_var *var; + enum hlsl_regset regset; + unsigned int index; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + + load = hlsl_ir_resource_load(instr); + var = load->resource.var; + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); + + if (regset == HLSL_REGSET_SAMPLERS) + { + enum hlsl_sampler_dim dim; + + assert(!load->sampler.var); + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false; + + dim = var->objects_usage[regset][index].sampler_dim; + if (dim != load->sampling_dim) + { + if (dim == HLSL_SAMPLER_DIM_GENERIC) + { + var->objects_usage[regset][index].first_sampler_dim_loc = instr->loc; + } + else + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER, + "Inconsistent generic sampler usage dimension."); + hlsl_note(ctx, &var->objects_usage[regset][index].first_sampler_dim_loc, + VKD3D_SHADER_LOG_ERROR, "First use is here."); + return false; + } + } + var->objects_usage[regset][index].used = true; + var->objects_usage[regset][index].sampler_dim = load->sampling_dim; + } + else + { + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false; + + var->objects_usage[regset][index].used = true; + var->objects_usage[regset][index].sampler_dim = load->sampling_dim; + + if (load->sampler.var) + { + var = load->sampler.var; + if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) + return false; + + var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; + } + } + + return false; +} + +static void calculate_resource_register_counts(struct hlsl_ctx *ctx) +{ + struct hlsl_ir_var *var; + struct hlsl_type *type; + unsigned int i, k; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + type = var->data_type; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { + for (i = 0; i < type->reg_size[k]; ++i) + { + /* Samplers are only allocated until the last used one. */ + if (var->objects_usage[k][i].used) + var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; + } + } + } +} + +static void allocate_variable_temp_register(struct hlsl_ctx *ctx, + struct hlsl_ir_var *var, struct register_allocator *allocator) { if (var->is_input_semantic || var->is_output_semantic || var->is_uniform) return;
if (!var->regs[HLSL_REGSET_NUMERIC].allocated && var->last_read) { - var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, liveness, + var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, allocator, var->first_write, var->last_read, var->data_type);
TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, debug_register('r', @@ -2580,15 +3134,20 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir } }
-static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct liveness *liveness) +static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, + struct hlsl_block *block, struct register_allocator *allocator) { struct hlsl_ir_node *instr;
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { + /* In SM4 all constants are inlined. */ + if (ctx->profile->major_version >= 4 && instr->type == HLSL_IR_CONSTANT) + continue; + if (!instr->reg.allocated && instr->last_read) { - instr->reg = allocate_numeric_registers_for_type(ctx, liveness, instr->index, instr->last_read, + instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, instr->data_type); TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); @@ -2599,8 +3158,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - allocate_temp_registers_recurse(ctx, &iff->then_instrs, liveness); - allocate_temp_registers_recurse(ctx, &iff->else_instrs, liveness); + allocate_temp_registers_recurse(ctx, &iff->then_block, allocator); + allocate_temp_registers_recurse(ctx, &iff->else_block, allocator); break; }
@@ -2609,21 +3168,21 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl struct hlsl_ir_load *load = hlsl_ir_load(instr); /* We need to at least allocate a variable for undefs. * FIXME: We should probably find a way to remove them instead. */ - allocate_variable_temp_register(ctx, load->src.var, liveness); + allocate_variable_temp_register(ctx, load->src.var, allocator); break; }
case HLSL_IR_LOOP: { struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); - allocate_temp_registers_recurse(ctx, &loop->body, liveness); + allocate_temp_registers_recurse(ctx, &loop->body, allocator); break; }
case HLSL_IR_STORE: { struct hlsl_ir_store *store = hlsl_ir_store(instr); - allocate_variable_temp_register(ctx, store->lhs.var, liveness); + allocate_variable_temp_register(ctx, store->lhs.var, allocator); break; }
@@ -2633,7 +3192,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl } }
-static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct liveness *liveness) +static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, + struct hlsl_block *block, struct register_allocator *allocator) { struct hlsl_constant_defs *defs = &ctx->constant_defs; struct hlsl_ir_node *instr; @@ -2649,7 +3209,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b unsigned int x, y, i, writemask, end_reg; unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC];
- constant->reg = allocate_numeric_registers_for_type(ctx, liveness, 1, UINT_MAX, type); + constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type));
if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, @@ -2662,7 +3222,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b defs->count = end_reg; }
- assert(type->type <= HLSL_CLASS_LAST_NUMERIC); + assert(type->class <= HLSL_CLASS_LAST_NUMERIC);
if (!(writemask = constant->reg.writemask)) writemask = (1u << type->dimx) - 1; @@ -2671,12 +3231,12 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b { for (x = 0, i = 0; x < 4; ++x) { - const union hlsl_constant_value *value; + const union hlsl_constant_value_component *value; float f;
if (!(writemask & (1u << x))) continue; - value = &constant->value[i++]; + value = &constant->value.u[i++];
switch (type->base_type) { @@ -2714,15 +3274,15 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - allocate_const_registers_recurse(ctx, &iff->then_instrs, liveness); - allocate_const_registers_recurse(ctx, &iff->else_instrs, liveness); + allocate_const_registers_recurse(ctx, &iff->then_block, allocator); + allocate_const_registers_recurse(ctx, &iff->else_block, allocator); break; }
case HLSL_IR_LOOP: { struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); - allocate_const_registers_recurse(ctx, &loop->body, liveness); + allocate_const_registers_recurse(ctx, &loop->body, allocator); break; }
@@ -2734,10 +3294,10 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b
static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { - struct liveness liveness = {0}; + struct register_allocator allocator = {0}; struct hlsl_ir_var *var;
- allocate_const_registers_recurse(ctx, &entry_func->body, &liveness); + allocate_const_registers_recurse(ctx, &entry_func->body, &allocator);
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -2748,12 +3308,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi if (reg_size == 0) continue;
- var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &liveness, + var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &allocator, 1, UINT_MAX, var->data_type); TRACE("Allocated %s to %s.\n", var->name, debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } } + + vkd3d_free(allocator.allocations); }
/* Simple greedy temporary register allocation pass that just assigns a unique @@ -2762,15 +3324,33 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi * does not handle constants. */ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { - struct liveness liveness = {0}; - allocate_temp_registers_recurse(ctx, &entry_func->body, &liveness); - ctx->temp_count = liveness.reg_count; - vkd3d_free(liveness.regs); + struct register_allocator allocator = {0}; + + /* ps_1_* outputs are special and go in temp register 0. */ + if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + { + size_t i; + + for (i = 0; i < entry_func->parameters.count; ++i) + { + const struct hlsl_ir_var *var = entry_func->parameters.vars[i]; + + if (var->is_output_semantic) + { + record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); + break; + } + } + } + + allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); + ctx->temp_count = allocator.max_reg + 1; + vkd3d_free(allocator.allocations); }
static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output) { - static const char *shader_names[] = + static const char *const shader_names[] = { [VKD3D_SHADER_TYPE_PIXEL] = "Pixel", [VKD3D_SHADER_TYPE_VERTEX] = "Vertex", @@ -2791,7 +3371,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var D3DDECLUSAGE usage; uint32_t usage_idx;
- if (!hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) + /* ps_1_* outputs are special and go in temp register 0. */ + if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + return; + + builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &type, ®); + if (!builtin && !hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Invalid semantic '%s'.", var->semantic.name); @@ -2800,8 +3385,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
if ((!output && !var->last_read) || (output && !var->first_write)) return; - - builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &type, ®); } else { @@ -2827,6 +3410,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var { var->regs[HLSL_REGSET_NUMERIC].allocated = true; var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; + var->regs[HLSL_REGSET_NUMERIC].bind_count = 1; var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); @@ -2853,23 +3437,117 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3
LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) { - if (buffer->used_size && buffer->reservation.type == 'b' && buffer->reservation.index == index) + if (buffer->used_size && buffer->reservation.reg_type == 'b' && buffer->reservation.reg_index == index) return buffer; } return NULL; }
-static void calculate_buffer_offset(struct hlsl_ir_var *var) +static void calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_var *var) { + unsigned int var_reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + enum hlsl_type_class var_class = var->data_type->class; struct hlsl_buffer *buffer = var->buffer;
- buffer->size = hlsl_type_get_sm4_offset(var->data_type, buffer->size); + if (var->reg_reservation.offset_type == 'c') + { + if (var->reg_reservation.offset_index % 4) + { + if (var_class == HLSL_CLASS_MATRIX) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() reservations with matrix types must be aligned with the beginning of a register."); + } + else if (var_class == HLSL_CLASS_ARRAY) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() reservations with array types must be aligned with the beginning of a register."); + } + else if (var_class == HLSL_CLASS_STRUCT) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() reservations with struct types must be aligned with the beginning of a register."); + } + else if (var_class == HLSL_CLASS_VECTOR) + { + unsigned int aligned_offset = hlsl_type_get_sm4_offset(var->data_type, var->reg_reservation.offset_index); + + if (var->reg_reservation.offset_index != aligned_offset) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() reservations with vector types cannot span multiple registers."); + } + } + var->buffer_offset = var->reg_reservation.offset_index; + } + else + { + var->buffer_offset = hlsl_type_get_sm4_offset(var->data_type, buffer->size); + }
- var->buffer_offset = buffer->size; TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name); - buffer->size += var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + buffer->size = max(buffer->size, var->buffer_offset + var_reg_size); if (var->last_read) - buffer->used_size = buffer->size; + buffer->used_size = max(buffer->used_size, var->buffer_offset + var_reg_size); +} + +static void validate_buffer_offsets(struct hlsl_ctx *ctx) +{ + struct hlsl_ir_var *var1, *var2; + struct hlsl_buffer *buffer; + + LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var1->is_uniform || var1->data_type->class == HLSL_CLASS_OBJECT) + continue; + + buffer = var1->buffer; + if (!buffer->used_size) + continue; + + LIST_FOR_EACH_ENTRY(var2, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int var1_reg_size, var2_reg_size; + + if (!var2->is_uniform || var2->data_type->class == HLSL_CLASS_OBJECT) + continue; + + if (var1 == var2 || var1->buffer != var2->buffer) + continue; + + /* This is to avoid reporting the error twice for the same pair of overlapping variables. */ + if (strcmp(var1->name, var2->name) >= 0) + continue; + + var1_reg_size = var1->data_type->reg_size[HLSL_REGSET_NUMERIC]; + var2_reg_size = var2->data_type->reg_size[HLSL_REGSET_NUMERIC]; + + if (var1->buffer_offset < var2->buffer_offset + var2_reg_size + && var2->buffer_offset < var1->buffer_offset + var1_reg_size) + hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid packoffset() reservation: Variables %s and %s overlap.", + var1->name, var2->name); + } + } + + LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + buffer = var1->buffer; + if (!buffer || buffer == ctx->globals_buffer) + continue; + + if (var1->reg_reservation.offset_type + || (var1->data_type->class == HLSL_CLASS_OBJECT && var1->reg_reservation.reg_type)) + buffer->manually_packed_elements = true; + else + buffer->automatically_packed_elements = true; + + if (buffer->manually_packed_elements && buffer->automatically_packed_elements) + { + hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() must be specified for all the buffer elements, or none of them."); + break; + } + } }
static void allocate_buffers(struct hlsl_ctx *ctx) @@ -2880,15 +3558,17 @@ static void allocate_buffers(struct hlsl_ctx *ctx)
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->is_uniform && var->data_type->type != HLSL_CLASS_OBJECT) + if (var->is_uniform && var->data_type->class != HLSL_CLASS_OBJECT) { if (var->is_param) var->buffer = ctx->params_buffer;
- calculate_buffer_offset(var); + calculate_buffer_offset(ctx, var); } }
+ validate_buffer_offsets(ctx); + LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) { if (!buffer->used_size) @@ -2896,28 +3576,30 @@ static void allocate_buffers(struct hlsl_ctx *ctx)
if (buffer->type == HLSL_BUFFER_CONSTANT) { - if (buffer->reservation.type == 'b') + if (buffer->reservation.reg_type == 'b') { - const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.index); + const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.reg_index);
if (reserved_buffer && reserved_buffer != buffer) { hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple buffers bound to cb%u.", buffer->reservation.index); + "Multiple buffers bound to cb%u.", buffer->reservation.reg_index); hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR, - "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.index); + "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.reg_index); }
- buffer->reg.id = buffer->reservation.index; + buffer->reg.id = buffer->reservation.reg_index; + buffer->reg.bind_count = 1; buffer->reg.allocated = true; TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); } - else if (!buffer->reservation.type) + else if (!buffer->reservation.reg_type) { while (get_reserved_buffer(ctx, index)) ++index;
buffer->reg.id = index; + buffer->reg.bind_count = 1; buffer->reg.allocated = true; TRACE("Allocated %s to cb%u.\n", buffer->name, index); ++index; @@ -2939,13 +3621,29 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum uint32_t index) { const struct hlsl_ir_var *var; + unsigned int start, count;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) { - if (!var->regs[regset].allocated) + if (var->reg_reservation.reg_type == get_regset_name(regset) + && var->data_type->reg_size[regset]) + { + /* Vars with a reservation prevent non-reserved vars from being + * bound there even if the reserved vars aren't used. */ + start = var->reg_reservation.reg_index; + count = var->data_type->reg_size[regset]; + } + else if (var->regs[regset].allocated) + { + start = var->regs[regset].id; + count = var->regs[regset].bind_count; + } + else + { continue; + }
- if (index == var->regs[regset].id) + if (start <= index && index < start + count) return var; } return NULL; @@ -2956,7 +3654,6 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) char regset_name = get_regset_name(regset); struct hlsl_ir_var *var; uint32_t min_index = 0; - uint32_t index;
if (regset == HLSL_REGSET_UAVS) { @@ -2968,19 +3665,17 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) } }
- index = min_index; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var->last_read || !var->data_type->reg_size[regset]) + unsigned int count = var->regs[regset].bind_count; + + if (count == 0) continue;
if (var->regs[regset].allocated) { - const struct hlsl_ir_var *reserved_object; - unsigned int index = var->regs[regset].id; - - reserved_object = get_allocated_object(ctx, regset, index); + const struct hlsl_ir_var *reserved_object, *last_reported = NULL; + unsigned int index, i;
if (var->regs[regset].id < min_index) { @@ -2988,28 +3683,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, "UAV index (%u) must be higher than the maximum render target index (%u).", var->regs[regset].id, min_index - 1); + continue; } - else if (reserved_object && reserved_object != var) + + for (i = 0; i < count; ++i) { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple objects bound to %c%u.", regset_name, index); - hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, - "Object '%s' is already bound to %c%u.", reserved_object->name, - regset_name, index); - } + index = var->regs[regset].id + i;
- var->regs[regset].id = var->reg_reservation.index; - var->regs[regset].allocated = true; - TRACE("Allocated reserved %s to %c%u.\n", var->name, regset_name, var->regs[regset].id); + reserved_object = get_allocated_object(ctx, regset, index); + if (reserved_object && reserved_object != var && reserved_object != last_reported) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, + "Multiple variables bound to %c%u.", regset_name, index); + hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, + "Variable '%s' is already bound to %c%u.", reserved_object->name, + regset_name, index); + last_reported = reserved_object; + } + } } else { - while (get_allocated_object(ctx, regset, index)) + unsigned int index = min_index; + unsigned int available = 0; + + while (available < count) + { + if (get_allocated_object(ctx, regset, index)) + available = 0; + else + ++available; ++index; + } + index -= count;
var->regs[regset].id = index; var->regs[regset].allocated = true; - TRACE("Allocated object to %c%u.\n", regset_name, index); + TRACE("Allocated variable %s to %c%u-%c%u.\n", var->name, regset_name, index, regset_name, + index + count); ++index; } } @@ -3034,12 +3745,12 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl return false;
/* We should always have generated a cast to UINT. */ - assert(path_node->data_type->type == HLSL_CLASS_SCALAR + assert(path_node->data_type->class == HLSL_CLASS_SCALAR && path_node->data_type->base_type == HLSL_TYPE_UINT);
- idx = hlsl_ir_constant(path_node)->value[0].u; + idx = hlsl_ir_constant(path_node)->value.u[0].u;
- switch (type->type) + switch (type->class) { case HLSL_CLASS_VECTOR: if (idx >= type->dimx) @@ -3090,6 +3801,55 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl return true; }
+bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + enum hlsl_regset regset, unsigned int *index) +{ + struct hlsl_type *type = deref->var->data_type; + unsigned int i; + + assert(regset <= HLSL_REGSET_LAST_OBJECT); + + *index = 0; + + for (i = 0; i < deref->path_len; ++i) + { + struct hlsl_ir_node *path_node = deref->path[i].node; + unsigned int idx = 0; + + assert(path_node); + if (path_node->type != HLSL_IR_CONSTANT) + return false; + + /* We should always have generated a cast to UINT. */ + assert(path_node->data_type->class == HLSL_CLASS_SCALAR + && path_node->data_type->base_type == HLSL_TYPE_UINT); + + idx = hlsl_ir_constant(path_node)->value.u[0].u; + + switch (type->class) + { + case HLSL_CLASS_ARRAY: + if (idx >= type->e.array.elements_count) + return false; + + *index += idx * type->e.array.type->reg_size[regset]; + break; + + case HLSL_CLASS_STRUCT: + *index += type->e.record.fields[idx].reg_offset[regset]; + break; + + default: + vkd3d_unreachable(); + } + + type = hlsl_get_element_type_from_path_index(ctx, type, path_node); + } + + assert(type->reg_size[regset] == 1); + return true; +} + bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node; @@ -3102,13 +3862,13 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref }
/* We should always have generated a cast to UINT. */ - assert(offset_node->data_type->type == HLSL_CLASS_SCALAR + assert(offset_node->data_type->class == HLSL_CLASS_SCALAR && offset_node->data_type->base_type == HLSL_TYPE_UINT);
if (offset_node->type != HLSL_IR_CONSTANT) return false;
- *offset = hlsl_ir_constant(offset_node)->value[0].u; + *offset = hlsl_ir_constant(offset_node)->value.u[0].u;
size = deref->var->data_type->reg_size[deref->offset_regset]; if (*offset >= size) @@ -3170,7 +3930,7 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a const struct hlsl_type *type = instr->data_type; const struct hlsl_ir_constant *constant;
- if (type->type != HLSL_CLASS_SCALAR + if (type->class != HLSL_CLASS_SCALAR || (type->base_type != HLSL_TYPE_INT && type->base_type != HLSL_TYPE_UINT)) { struct vkd3d_string_buffer *string; @@ -3190,15 +3950,34 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a } constant = hlsl_ir_constant(instr);
- if ((type->base_type == HLSL_TYPE_INT && constant->value[0].i <= 0) - || (type->base_type == HLSL_TYPE_UINT && !constant->value[0].u)) + if ((type->base_type == HLSL_TYPE_INT && constant->value.u[0].i <= 0) + || (type->base_type == HLSL_TYPE_UINT && !constant->value.u[0].u)) hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT, "Thread count must be a positive integer.");
- ctx->thread_count[i] = constant->value[0].u; + ctx->thread_count[i] = constant->value.u[0].u; } }
+static bool type_has_object_components(struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_OBJECT) + return true; + if (type->class == HLSL_CLASS_ARRAY) + return type_has_object_components(type->e.array.type); + if (type->class == HLSL_CLASS_STRUCT) + { + unsigned int i; + + for (i = 0; i < type->e.record.field_count; ++i) + { + if (type_has_object_components(type->e.record.fields[i].type)) + return true; + } + } + return false; +} + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) { @@ -3209,10 +3988,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry unsigned int i; bool progress;
- list_move_head(&body->instrs, &ctx->static_initializers); + list_move_head(&body->instrs, &ctx->static_initializers.instrs);
memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx)); - transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); + hlsl_transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); vkd3d_free(recursive_call_ctx.backtrace);
/* Avoid going into an infinite loop when processing call instructions. @@ -3222,7 +4001,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
lower_return(ctx, entry_func, body, false);
- while (transform_ir(ctx, lower_calls, body, NULL)); + while (hlsl_transform_ir(ctx, lower_calls, body, NULL)); + + hlsl_transform_ir(ctx, lower_index_loads, body, NULL);
LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) { @@ -3234,15 +4015,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry { var = entry_func->parameters.vars[i];
- if (var->data_type->type == HLSL_CLASS_OBJECT || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) { prepend_uniform_copy(ctx, &body->instrs, var); } else { - if (var->data_type->type != HLSL_CLASS_STRUCT && !var->semantic.name) + if (type_has_object_components(var->data_type)) + hlsl_fixme(ctx, &var->loc, "Prepend uniform copies for object components within structs."); + + if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT + && !var->semantic.name) + { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Parameter "%s" is missing a semantic.", var->name); + var->semantic.reported_missing = true; + }
if (var->storage_modifiers & HLSL_STORAGE_IN) prepend_input_var_copy(ctx, &body->instrs, var); @@ -3252,7 +4040,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } if (entry_func->return_var) { - if (entry_func->return_var->data_type->type != HLSL_CLASS_STRUCT && !entry_func->return_var->semantic.name) + if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT && !entry_func->return_var->semantic.name) hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Entry point "%s" is missing a return value semantic.", entry_func->func->name);
@@ -3274,60 +4062,71 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, "Entry point "%s" is missing a [numthreads] attribute.", entry_func->func->name);
- transform_ir(ctx, lower_broadcasts, body, NULL); - while (transform_ir(ctx, fold_redundant_casts, body, NULL)); + hlsl_transform_ir(ctx, lower_broadcasts, body, NULL); + while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); do { - progress = transform_ir(ctx, split_array_copies, body, NULL); - progress |= transform_ir(ctx, split_struct_copies, body, NULL); + progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); + progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); } while (progress); - transform_ir(ctx, split_matrix_copies, body, NULL); - - transform_ir(ctx, lower_narrowing_casts, body, NULL); - transform_ir(ctx, lower_casts_to_bool, body, NULL); - transform_ir(ctx, lower_int_division, body, NULL); - transform_ir(ctx, lower_int_modulus, body, NULL); - transform_ir(ctx, lower_int_abs, body, NULL); - transform_ir(ctx, lower_float_modulus, body, NULL); + hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); + + hlsl_transform_ir(ctx, lower_narrowing_casts, body, NULL); + hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); + hlsl_transform_ir(ctx, lower_int_dot, body, NULL); + hlsl_transform_ir(ctx, lower_int_division, body, NULL); + hlsl_transform_ir(ctx, lower_int_modulus, body, NULL); + hlsl_transform_ir(ctx, lower_int_abs, body, NULL); + hlsl_transform_ir(ctx, lower_float_modulus, body, NULL); + hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); do { - progress = transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); - progress |= transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); - progress |= copy_propagation_execute(ctx, body); - progress |= transform_ir(ctx, fold_swizzle_chains, body, NULL); - progress |= transform_ir(ctx, remove_trivial_swizzles, body, NULL); + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); + progress |= hlsl_copy_propagation_execute(ctx, body); + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); } while (progress);
+ hlsl_transform_ir(ctx, lower_nonconstant_vector_derefs, body, NULL); + hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); + hlsl_transform_ir(ctx, lower_int_dot, body, NULL); + if (profile->major_version < 4) { - transform_ir(ctx, lower_division, body, NULL); - transform_ir(ctx, lower_sqrt, body, NULL); - transform_ir(ctx, lower_dot, body, NULL); + hlsl_transform_ir(ctx, lower_division, body, NULL); + hlsl_transform_ir(ctx, lower_sqrt, body, NULL); + hlsl_transform_ir(ctx, lower_dot, body, NULL); + hlsl_transform_ir(ctx, lower_round, body, NULL); }
if (profile->major_version < 2) { - transform_ir(ctx, lower_abs, body, NULL); + hlsl_transform_ir(ctx, lower_abs, body, NULL); }
- transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, track_object_components_usage, body, NULL);
/* TODO: move forward, remove when no longer needed */ - transform_ir(ctx, transform_deref_paths_into_offsets, body, NULL); - while (transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); + transform_derefs(ctx, replace_deref_path_with_offset, body); + while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL));
do compute_liveness(ctx, entry_func); - while (transform_ir(ctx, dce, body, NULL)); + while (hlsl_transform_ir(ctx, dce, body, NULL));
compute_liveness(ctx, entry_func);
if (TRACE_ON()) rb_for_each_entry(&ctx->functions, dump_function, ctx);
+ calculate_resource_register_counts(ctx); + allocate_register_reservations(ctx); + allocate_temp_registers(ctx, entry_func); if (profile->major_version < 4) { diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index 3210bbd5712..301113c8477 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -22,7 +22,49 @@
#include "hlsl.h"
-static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src) +static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].f = fabsf(src->value.u[k].f); + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].d = fabs(src->value.u[k].d); + break; + + case HLSL_TYPE_INT: + /* C's abs(INT_MIN) is undefined, but HLSL evaluates this to INT_MIN */ + if (src->value.u[k].i == INT_MIN) + dst->u[k].i = INT_MIN; + else + dst->u[k].i = abs(src->value.u[k].i); + break; + + case HLSL_TYPE_UINT: + dst->u[k].u = src->value.u[k].u; + break; + + default: + FIXME("Fold abs() for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; +} + +static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { unsigned int k; uint32_t u; @@ -30,11 +72,11 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct double d; float f;
- if (dst->node.data_type->dimx != src->node.data_type->dimx - || dst->node.data_type->dimy != src->node.data_type->dimy) + if (dst_type->dimx != src->node.data_type->dimx + || dst_type->dimy != src->node.data_type->dimy) { FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type), - debug_hlsl_type(ctx, dst->node.data_type)); + debug_hlsl_type(ctx, dst_type)); return false; }
@@ -44,61 +86,61 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - u = src->value[k].f; - i = src->value[k].f; - f = src->value[k].f; - d = src->value[k].f; + u = src->value.u[k].f; + i = src->value.u[k].f; + f = src->value.u[k].f; + d = src->value.u[k].f; break;
case HLSL_TYPE_DOUBLE: - u = src->value[k].d; - i = src->value[k].d; - f = src->value[k].d; - d = src->value[k].d; + u = src->value.u[k].d; + i = src->value.u[k].d; + f = src->value.u[k].d; + d = src->value.u[k].d; break;
case HLSL_TYPE_INT: - u = src->value[k].i; - i = src->value[k].i; - f = src->value[k].i; - d = src->value[k].i; + u = src->value.u[k].i; + i = src->value.u[k].i; + f = src->value.u[k].i; + d = src->value.u[k].i; break;
case HLSL_TYPE_UINT: - u = src->value[k].u; - i = src->value[k].u; - f = src->value[k].u; - d = src->value[k].u; + u = src->value.u[k].u; + i = src->value.u[k].u; + f = src->value.u[k].u; + d = src->value.u[k].u; break;
case HLSL_TYPE_BOOL: - u = !!src->value[k].u; - i = !!src->value[k].u; - f = !!src->value[k].u; - d = !!src->value[k].u; + u = !!src->value.u[k].u; + i = !!src->value.u[k].u; + f = !!src->value.u[k].u; + d = !!src->value.u[k].u; break;
default: vkd3d_unreachable(); }
- switch (dst->node.data_type->base_type) + switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].f = f; + dst->u[k].f = f; break;
case HLSL_TYPE_DOUBLE: - dst->value[k].d = d; + dst->u[k].d = d; break;
case HLSL_TYPE_INT: - dst->value[k].i = i; + dst->u[k].i = i; break;
case HLSL_TYPE_UINT: - dst->value[k].u = u; + dst->u[k].u = u; break;
case HLSL_TYPE_BOOL: @@ -110,9 +152,10 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct return true; }
-static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src) +static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src->node.data_type->base_type); @@ -123,30 +166,30 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].f = -src->value[k].f; + dst->u[k].f = -src->value.u[k].f; break;
case HLSL_TYPE_DOUBLE: - dst->value[k].d = -src->value[k].d; + dst->u[k].d = -src->value.u[k].d; break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = -src->value[k].u; + dst->u[k].u = -src->value.u[k].u; break;
default: - FIXME("Fold negation for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold negation for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src1, - struct hlsl_ir_constant *src2) +static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); @@ -158,32 +201,32 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].f = src1->value[k].f + src2->value[k].f; + dst->u[k].f = src1->value.u[k].f + src2->value.u[k].f; break;
case HLSL_TYPE_DOUBLE: - dst->value[k].d = src1->value[k].d + src2->value[k].d; + dst->u[k].d = src1->value.u[k].d + src2->value.u[k].d; break;
/* Handling HLSL_TYPE_INT through the unsigned field to avoid * undefined behavior with signed integers in C. */ case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u + src2->value[k].u; + dst->u[k].u = src1->value.u[k].u + src2->value.u[k].u; break;
default: - FIXME("Fold addition for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold addition for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); @@ -195,32 +238,32 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].f = src1->value[k].f * src2->value[k].f; + dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; break;
case HLSL_TYPE_DOUBLE: - dst->value[k].d = src1->value[k].d * src2->value[k].d; + dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u * src2->value[k].u; + dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; break;
default: - FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { unsigned int k;
- assert(dst->node.data_type->base_type == HLSL_TYPE_BOOL); + assert(dst_type->base_type == HLSL_TYPE_BOOL); assert(src1->node.data_type->base_type == src2->node.data_type->base_type);
for (k = 0; k < 4; ++k) @@ -229,270 +272,270 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].u = src1->value[k].f != src2->value[k].f; + dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; break;
case HLSL_TYPE_DOUBLE: - dst->value[k].u = src1->value[k].d != src2->value[k].d; + dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: case HLSL_TYPE_BOOL: - dst->value[k].u = src1->value[k].u != src2->value[k].u; + dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; break;
default: vkd3d_unreachable(); }
- dst->value[k].u *= ~0u; + dst->u[k].u *= ~0u; } return true; }
-static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, + const struct vkd3d_shader_location *loc) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - if (ctx->profile->major_version >= 4 && src2->value[k].f == 0) + if (ctx->profile->major_version >= 4 && src2->value.u[k].f == 0) { - hlsl_warning(ctx, &dst->node.loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, "Floating point division by zero."); } - dst->value[k].f = src1->value[k].f / src2->value[k].f; - if (ctx->profile->major_version < 4 && !isfinite(dst->value[k].f)) + dst->u[k].f = src1->value.u[k].f / src2->value.u[k].f; + if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) { - hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Infinities and NaNs are not allowed by the shader model."); } break;
case HLSL_TYPE_DOUBLE: - if (src2->value[k].d == 0) + if (src2->value.u[k].d == 0) { - hlsl_warning(ctx, &dst->node.loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, "Floating point division by zero."); } - dst->value[k].d = src1->value[k].d / src2->value[k].d; + dst->u[k].d = src1->value.u[k].d / src2->value.u[k].d; break;
case HLSL_TYPE_INT: - if (src2->value[k].i == 0) + if (src2->value.u[k].i == 0) { - hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); return false; } - if (src1->value[k].i == INT_MIN && src2->value[k].i == -1) - dst->value[k].i = INT_MIN; + if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) + dst->u[k].i = INT_MIN; else - dst->value[k].i = src1->value[k].i / src2->value[k].i; + dst->u[k].i = src1->value.u[k].i / src2->value.u[k].i; break;
case HLSL_TYPE_UINT: - if (src2->value[k].u == 0) + if (src2->value.u[k].u == 0) { - hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); return false; } - dst->value[k].u = src1->value[k].u / src2->value[k].u; + dst->u[k].u = src1->value.u[k].u / src2->value.u[k].u; break;
default: - FIXME("Fold division for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold division for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, + const struct vkd3d_shader_location *loc) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: - if (src2->value[k].i == 0) + if (src2->value.u[k].i == 0) { - hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, - "Division by zero."); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); return false; } - if (src1->value[k].i == INT_MIN && src2->value[k].i == -1) - dst->value[k].i = 0; + if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) + dst->u[k].i = 0; else - dst->value[k].i = src1->value[k].i % src2->value[k].i; + dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; break;
case HLSL_TYPE_UINT: - if (src2->value[k].u == 0) + if (src2->value.u[k].u == 0) { - hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, - "Division by zero."); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); return false; } - dst->value[k].u = src1->value[k].u % src2->value[k].u; + dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; break;
default: - FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: - dst->value[k].i = max(src1->value[k].i, src2->value[k].i); + dst->u[k].i = max(src1->value.u[k].i, src2->value.u[k].i); break;
case HLSL_TYPE_UINT: - dst->value[k].u = max(src1->value[k].u, src2->value[k].u); + dst->u[k].u = max(src1->value.u[k].u, src2->value.u[k].u); break;
default: - FIXME("Fold max for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold max for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: - dst->value[k].i = min(src1->value[k].i, src2->value[k].i); + dst->u[k].i = min(src1->value.u[k].i, src2->value.u[k].i); break;
case HLSL_TYPE_UINT: - dst->value[k].u = min(src1->value[k].u, src2->value[k].u); + dst->u[k].u = min(src1->value.u[k].u, src2->value.u[k].u); break;
default: - FIXME("Fold min for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold min for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u ^ src2->value[k].u; + dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; break;
default: - FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u & src2->value[k].u; + dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; break;
default: - FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u | src2->value[k].u; + dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; break;
default: - FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } @@ -501,7 +544,9 @@ static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst,
bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_constant *arg1, *arg2 = NULL, *res; + struct hlsl_ir_constant *arg1, *arg2 = NULL; + struct hlsl_constant_value res = {0}; + struct hlsl_ir_node *res_node; struct hlsl_ir_expr *expr; unsigned int i; bool success; @@ -512,7 +557,7 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, if (!expr->operands[0].node) return false;
- if (instr->data_type->type > HLSL_CLASS_VECTOR) + if (instr->data_type->class > HLSL_CLASS_VECTOR) return false;
for (i = 0; i < ARRAY_SIZE(expr->operands); ++i) @@ -521,64 +566,65 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, { if (expr->operands[i].node->type != HLSL_IR_CONSTANT) return false; - assert(expr->operands[i].node->data_type->type <= HLSL_CLASS_VECTOR); + assert(expr->operands[i].node->data_type->class <= HLSL_CLASS_VECTOR); } } arg1 = hlsl_ir_constant(expr->operands[0].node); if (expr->operands[1].node) arg2 = hlsl_ir_constant(expr->operands[1].node);
- if (!(res = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) - return false; - switch (expr->op) { + case HLSL_OP1_ABS: + success = fold_abs(ctx, &res, instr->data_type, arg1); + break; + case HLSL_OP1_CAST: - success = fold_cast(ctx, res, arg1); + success = fold_cast(ctx, &res, instr->data_type, arg1); break;
case HLSL_OP1_NEG: - success = fold_neg(ctx, res, arg1); + success = fold_neg(ctx, &res, instr->data_type, arg1); break;
case HLSL_OP2_ADD: - success = fold_add(ctx, res, arg1, arg2); + success = fold_add(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_MUL: - success = fold_mul(ctx, res, arg1, arg2); + success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_NEQUAL: - success = fold_nequal(ctx, res, arg1, arg2); + success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_DIV: - success = fold_div(ctx, res, arg1, arg2); + success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break;
case HLSL_OP2_MOD: - success = fold_mod(ctx, res, arg1, arg2); + success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break;
case HLSL_OP2_MAX: - success = fold_max(ctx, res, arg1, arg2); + success = fold_max(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_MIN: - success = fold_min(ctx, res, arg1, arg2); + success = fold_min(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_BIT_XOR: - success = fold_bit_xor(ctx, res, arg1, arg2); + success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_BIT_AND: - success = fold_bit_and(ctx, res, arg1, arg2); + success = fold_bit_and(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_BIT_OR: - success = fold_bit_or(ctx, res, arg1, arg2); + success = fold_bit_or(ctx, &res, instr->data_type, arg1, arg2); break;
default: @@ -589,20 +635,20 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
if (success) { - list_add_before(&expr->node.entry, &res->node.entry); - hlsl_replace_node(&expr->node, &res->node); - } - else - { - vkd3d_free(res); + if (!(res_node = hlsl_new_constant(ctx, instr->data_type, &res, &instr->loc))) + return false; + list_add_before(&expr->node.entry, &res_node->entry); + hlsl_replace_node(&expr->node, res_node); } return success; }
bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_constant *value, *res; + struct hlsl_constant_value value; struct hlsl_ir_swizzle *swizzle; + struct hlsl_ir_constant *src; + struct hlsl_ir_node *dst; unsigned int i;
if (instr->type != HLSL_IR_SWIZZLE) @@ -610,15 +656,15 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst swizzle = hlsl_ir_swizzle(instr); if (swizzle->val.node->type != HLSL_IR_CONSTANT) return false; - value = hlsl_ir_constant(swizzle->val.node); - - if (!(res = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) - return false; + src = hlsl_ir_constant(swizzle->val.node);
for (i = 0; i < swizzle->node.data_type->dimx; ++i) - res->value[i] = value->value[hlsl_swizzle_get_component(swizzle->swizzle, i)]; + value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->swizzle, i)]; + + if (!(dst = hlsl_new_constant(ctx, instr->data_type, &value, &instr->loc))) + return false;
- list_add_before(&swizzle->node.entry, &res->node.entry); - hlsl_replace_node(&swizzle->node, &res->node); + list_add_before(&swizzle->node.entry, &dst->entry); + hlsl_replace_node(&swizzle->node, dst); return true; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c deleted file mode 100644 index 4a62d804ed6..00000000000 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c +++ /dev/null @@ -1,980 +0,0 @@ -/* - * HLSL code generation for DXBC shader models 1-3 - * - * Copyright 2019-2020 Zebediah Figura for CodeWeavers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#include "hlsl.h" -#include <stdio.h> - -bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) -{ - unsigned int i; - - static const struct - { - const char *semantic; - bool output; - enum vkd3d_shader_type shader_type; - unsigned int major_version; - D3DSHADER_PARAM_REGISTER_TYPE type; - DWORD offset; - } - register_table[] = - { - {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, - {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, - {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, - - {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, - {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, - {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, - {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, - }; - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) - { - if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) - && output == register_table[i].output - && ctx->profile->type == register_table[i].shader_type - && ctx->profile->major_version == register_table[i].major_version) - { - *type = register_table[i].type; - if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) - *reg = register_table[i].offset; - else - *reg = semantic->index; - return true; - } - } - - return false; -} - -bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) -{ - static const struct - { - const char *name; - D3DDECLUSAGE usage; - } - semantics[] = - { - {"binormal", D3DDECLUSAGE_BINORMAL}, - {"blendindices", D3DDECLUSAGE_BLENDINDICES}, - {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, - {"color", D3DDECLUSAGE_COLOR}, - {"depth", D3DDECLUSAGE_DEPTH}, - {"fog", D3DDECLUSAGE_FOG}, - {"normal", D3DDECLUSAGE_NORMAL}, - {"position", D3DDECLUSAGE_POSITION}, - {"positiont", D3DDECLUSAGE_POSITIONT}, - {"psize", D3DDECLUSAGE_PSIZE}, - {"sample", D3DDECLUSAGE_SAMPLE}, - {"sv_depth", D3DDECLUSAGE_DEPTH}, - {"sv_position", D3DDECLUSAGE_POSITION}, - {"sv_target", D3DDECLUSAGE_COLOR}, - {"tangent", D3DDECLUSAGE_TANGENT}, - {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, - {"texcoord", D3DDECLUSAGE_TEXCOORD}, - }; - - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(semantics); ++i) - { - if (!ascii_strcasecmp(semantic->name, semantics[i].name)) - { - *usage = semantics[i].usage; - *usage_idx = semantic->index; - return true; - } - } - - return false; -} - -static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) -{ - if (type == VKD3D_SHADER_TYPE_VERTEX) - return D3DVS_VERSION(major, minor); - else - return D3DPS_VERSION(major, minor); -} - -static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) -{ - switch (type->type) - { - case HLSL_CLASS_ARRAY: - return sm1_class(type->e.array.type); - case HLSL_CLASS_MATRIX: - assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3DXPC_MATRIX_COLUMNS; - else - return D3DXPC_MATRIX_ROWS; - case HLSL_CLASS_OBJECT: - return D3DXPC_OBJECT; - case HLSL_CLASS_SCALAR: - return D3DXPC_SCALAR; - case HLSL_CLASS_STRUCT: - return D3DXPC_STRUCT; - case HLSL_CLASS_VECTOR: - return D3DXPC_VECTOR; - default: - ERR("Invalid class %#x.\n", type->type); - vkd3d_unreachable(); - } -} - -static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) -{ - switch (type->base_type) - { - case HLSL_TYPE_BOOL: - return D3DXPT_BOOL; - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return D3DXPT_FLOAT; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - return D3DXPT_INT; - case HLSL_TYPE_PIXELSHADER: - return D3DXPT_PIXELSHADER; - case HLSL_TYPE_SAMPLER: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3DXPT_SAMPLER1D; - case HLSL_SAMPLER_DIM_2D: - return D3DXPT_SAMPLER2D; - case HLSL_SAMPLER_DIM_3D: - return D3DXPT_SAMPLER3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3DXPT_SAMPLERCUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3DXPT_SAMPLER; - default: - ERR("Invalid dimension %#x.\n", type->sampler_dim); - vkd3d_unreachable(); - } - break; - case HLSL_TYPE_STRING: - return D3DXPT_STRING; - case HLSL_TYPE_TEXTURE: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3DXPT_TEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3DXPT_TEXTURE2D; - case HLSL_SAMPLER_DIM_3D: - return D3DXPT_TEXTURE3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3DXPT_TEXTURECUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3DXPT_TEXTURE; - default: - ERR("Invalid dimension %#x.\n", type->sampler_dim); - vkd3d_unreachable(); - } - break; - case HLSL_TYPE_VERTEXSHADER: - return D3DXPT_VERTEXSHADER; - case HLSL_TYPE_VOID: - return D3DXPT_VOID; - default: - vkd3d_unreachable(); - } -} - -static const struct hlsl_type *get_array_type(const struct hlsl_type *type) -{ - if (type->type == HLSL_CLASS_ARRAY) - return get_array_type(type->e.array.type); - return type; -} - -static unsigned int get_array_size(const struct hlsl_type *type) -{ - if (type->type == HLSL_CLASS_ARRAY) - return get_array_size(type->e.array.type) * type->e.array.elements_count; - return 1; -} - -static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) -{ - const struct hlsl_type *array_type = get_array_type(type); - unsigned int array_size = get_array_size(type); - unsigned int field_count = 0; - size_t fields_offset = 0; - size_t i; - - if (type->bytecode_offset) - return; - - if (array_type->type == HLSL_CLASS_STRUCT) - { - field_count = array_type->e.record.field_count; - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - field->name_bytecode_offset = put_string(buffer, field->name); - write_sm1_type(buffer, field->type, ctab_start); - } - - fields_offset = bytecode_get_size(buffer) - ctab_start; - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - put_u32(buffer, field->name_bytecode_offset - ctab_start); - put_u32(buffer, field->type->bytecode_offset - ctab_start); - } - } - - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); - put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, field_count)); - put_u32(buffer, fields_offset); -} - -static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) -{ - struct hlsl_ir_var *var; - - list_remove(&to_sort->extern_entry); - - LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) - { - if (strcmp(to_sort->name, var->name) < 0) - { - list_add_before(&var->extern_entry, &to_sort->extern_entry); - return; - } - } - - list_add_tail(sorted, &to_sort->extern_entry); -} - -static void sm1_sort_externs(struct hlsl_ctx *ctx) -{ - struct list sorted = LIST_INIT(sorted); - struct hlsl_ir_var *var, *next; - - LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - sm1_sort_extern(&sorted, var); - list_move_tail(&ctx->extern_vars, &sorted); -} - -static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - struct hlsl_ir_function_decl *entry_func) -{ - size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; - unsigned int uniform_count = 0; - struct hlsl_ir_var *var; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); - - if (!var->semantic.name && var->regs[regset].allocated) - { - ++uniform_count; - - if (var->is_param && var->is_uniform) - { - struct vkd3d_string_buffer *name; - - if (!(name = hlsl_get_string_buffer(ctx))) - { - buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; - return; - } - vkd3d_string_buffer_printf(name, "$%s", var->name); - vkd3d_free((char *)var->name); - var->name = hlsl_strdup(ctx, name->buffer); - hlsl_release_string_buffer(ctx, name); - } - } - } - - sm1_sort_externs(ctx); - - size_offset = put_u32(buffer, 0); - ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); - - ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); - creator_offset = put_u32(buffer, 0); - put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); - put_u32(buffer, uniform_count); - put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ - put_u32(buffer, 0); /* FIXME: flags */ - put_u32(buffer, 0); /* FIXME: target string */ - - vars_start = bytecode_get_size(buffer); - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); - - if (!var->semantic.name && var->regs[regset].allocated) - { - put_u32(buffer, 0); /* name */ - if (var->data_type->type == HLSL_CLASS_OBJECT - && (var->data_type->base_type == HLSL_TYPE_SAMPLER - || var->data_type->base_type == HLSL_TYPE_TEXTURE)) - { - assert(regset == HLSL_REGSET_SAMPLERS); - put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); - put_u32(buffer, 1); - } - else - { - assert(regset == HLSL_REGSET_NUMERIC); - put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); - put_u32(buffer, var->data_type->reg_size[regset] / 4); - } - put_u32(buffer, 0); /* type */ - put_u32(buffer, 0); /* FIXME: default value */ - } - } - - uniform_count = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); - - if (!var->semantic.name && var->regs[regset].allocated) - { - size_t var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); - size_t name_offset; - - name_offset = put_string(buffer, var->name); - set_u32(buffer, var_offset, name_offset - ctab_start); - - write_sm1_type(buffer, var->data_type, ctab_start); - set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); - ++uniform_count; - } - } - - offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); - set_u32(buffer, creator_offset, offset - ctab_start); - - ctab_end = bytecode_get_size(buffer); - set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); -} - -static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) -{ - return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) - | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); -} - -struct sm1_instruction -{ - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; - - struct sm1_dst_register - { - D3DSHADER_PARAM_REGISTER_TYPE type; - D3DSHADER_PARAM_DSTMOD_TYPE mod; - unsigned int writemask; - uint32_t reg; - } dst; - - struct sm1_src_register - { - D3DSHADER_PARAM_REGISTER_TYPE type; - D3DSHADER_PARAM_SRCMOD_TYPE mod; - unsigned int swizzle; - uint32_t reg; - } srcs[3]; - unsigned int src_count; - - unsigned int has_dst; -}; - -static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) -{ - assert(reg->writemask); - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); -} - -static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, - const struct sm1_src_register *reg) -{ - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); -} - -static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct sm1_instruction *instr) -{ - uint32_t token = instr->opcode; - unsigned int i; - - if (ctx->profile->major_version > 1) - token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - - if (instr->has_dst) - write_sm1_dst_register(buffer, &instr->dst); - - for (i = 0; i < instr->src_count; ++i) - write_sm1_src_register(buffer, &instr->srcs[i]); -}; - -static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask) -{ - src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); -} - -static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, - const struct hlsl_reg *src3) -{ - struct sm1_instruction instr = - { - .opcode = D3DSIO_DP2ADD, - - .dst.type = D3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .srcs[2].type = D3DSPR_TEMP, - .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), - .srcs[2].reg = src3->id, - .src_count = 3, - }; - - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = D3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = D3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; - - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = D3DSPR_TEMP, - .dst.mod = dst_mod, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), - .srcs[0].reg = src->id, - .srcs[0].mod = src_mod, - .src_count = 1, - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -{ - unsigned int i, x; - - for (i = 0; i < ctx->constant_defs.count; ++i) - { - uint32_t token = D3DSIO_DEF; - const struct sm1_dst_register reg = - { - .type = D3DSPR_CONST, - .writemask = VKD3DSP_WRITEMASK_ALL, - .reg = i, - }; - - if (ctx->profile->major_version > 1) - token |= 5 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - - write_sm1_dst_register(buffer, ®); - for (x = 0; x < 4; ++x) - put_f32(buffer, ctx->constant_defs.values[i].f[x]); - } -} - -static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_var *var, bool output) -{ - struct sm1_dst_register reg = {0}; - uint32_t token, usage_idx; - D3DDECLUSAGE usage; - bool ret; - - if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) - { - usage = 0; - usage_idx = 0; - } - else - { - ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); - assert(ret); - reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; - reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; - } - - token = D3DSIO_DCL; - if (ctx->profile->major_version > 1) - token |= 2 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - - token = (1u << 31); - token |= usage << D3DSP_DCL_USAGE_SHIFT; - token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; - put_u32(buffer, token); - - reg.writemask = (1 << var->data_type->dimx) - 1; - write_sm1_dst_register(buffer, ®); -} - -static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -{ - bool write_in = false, write_out = false; - struct hlsl_ir_var *var; - - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) - write_in = true; - else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) - write_in = write_out = true; - else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) - write_in = true; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (write_in && var->is_input_semantic) - write_sm1_semantic_dcl(ctx, buffer, var, false); - if (write_out && var->is_output_semantic) - write_sm1_semantic_dcl(ctx, buffer, var, true); - } -} - -static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = D3DSPR_CONST, - .srcs[0].reg = constant->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), - .src_count = 1, - }; - - assert(instr->reg.allocated); - assert(constant->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) -{ - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - struct hlsl_ir_node *arg1 = expr->operands[0].node; - unsigned int i; - - for (i = 0; i < instr->data_type->dimx; ++i) - { - struct hlsl_reg src = arg1->reg, dst = instr->reg; - - src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); - dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); - write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); - } -} - -static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -{ - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - struct hlsl_ir_node *arg1 = expr->operands[0].node; - struct hlsl_ir_node *arg2 = expr->operands[1].node; - struct hlsl_ir_node *arg3 = expr->operands[2].node; - - assert(instr->reg.allocated); - - if (instr->data_type->base_type != HLSL_TYPE_FLOAT) - { - /* These need to be lowered. */ - hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); - return; - } - - switch (expr->op) - { - case HLSL_OP1_ABS: - write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_EXP2: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); - break; - - case HLSL_OP1_NEG: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); - break; - - case HLSL_OP1_SAT: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); - break; - - case HLSL_OP1_RCP: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); - break; - - case HLSL_OP1_RSQ: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); - break; - - case HLSL_OP2_ADD: - write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MAX: - write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MIN: - write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MUL: - write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP1_FRACT: - write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); - break; - - case HLSL_OP2_DOT: - switch (arg1->data_type->dimx) - { - case 4: - write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case 3: - write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_OP3_DP2ADD: - write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - - default: - hlsl_fixme(ctx, &instr->loc, "SM1 "%s" expression.", debug_hlsl_expr_op(expr->op)); - break; - } -} - -static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_load *load = hlsl_ir_load(instr); - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].reg = reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), - .src_count = 1, - }; - - assert(instr->reg.allocated); - - if (load->src.var->is_uniform) - { - assert(reg.allocated); - sm1_instr.srcs[0].type = D3DSPR_CONST; - } - else if (load->src.var->is_input_semantic) - { - if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, - false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) - { - assert(reg.allocated); - sm1_instr.srcs[0].type = D3DSPR_INPUT; - sm1_instr.srcs[0].reg = reg.id; - } - else - sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); - } - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_store *store = hlsl_ir_store(instr); - const struct hlsl_ir_node *rhs = store->rhs.node; - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = reg.id, - .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].reg = rhs->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), - .src_count = 1, - }; - - if (store->lhs.var->data_type->type == HLSL_CLASS_MATRIX) - { - FIXME("Matrix writemasks need to be lowered.\n"); - return; - } - - if (store->lhs.var->is_output_semantic) - { - if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, - true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) - { - assert(reg.allocated); - sm1_instr.dst.type = D3DSPR_OUTPUT; - sm1_instr.dst.reg = reg.id; - } - else - sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; - } - else - assert(reg.allocated); - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); - const struct hlsl_ir_node *val = swizzle->val.node; - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].reg = val->reg.id, - .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), - swizzle->swizzle, instr->data_type->dimx), - .src_count = 1, - }; - - assert(instr->reg.allocated); - assert(val->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_function_decl *entry_func) -{ - const struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { - if (instr->data_type->type == HLSL_CLASS_MATRIX) - { - /* These need to be lowered. */ - hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); - continue; - } - else if (instr->data_type->type == HLSL_CLASS_OBJECT) - { - hlsl_fixme(ctx, &instr->loc, "Object copy."); - break; - } - - assert(instr->data_type->type == HLSL_CLASS_SCALAR || instr->data_type->type == HLSL_CLASS_VECTOR); - } - - switch (instr->type) - { - case HLSL_IR_CALL: - vkd3d_unreachable(); - - case HLSL_IR_CONSTANT: - write_sm1_constant(ctx, buffer, instr); - break; - - case HLSL_IR_EXPR: - write_sm1_expr(ctx, buffer, instr); - break; - - case HLSL_IR_LOAD: - write_sm1_load(ctx, buffer, instr); - break; - - case HLSL_IR_STORE: - write_sm1_store(ctx, buffer, instr); - break; - - case HLSL_IR_SWIZZLE: - write_sm1_swizzle(ctx, buffer, instr); - break; - - default: - hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); - } - } -} - -int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -{ - struct vkd3d_bytecode_buffer buffer = {0}; - int ret; - - put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); - - write_sm1_uniforms(ctx, &buffer, entry_func); - - write_sm1_constant_defs(ctx, &buffer); - write_sm1_semantic_dcls(ctx, &buffer); - write_sm1_instructions(ctx, &buffer, entry_func); - - put_u32(&buffer, D3DSIO_END); - - if (!(ret = buffer.status)) - { - out->code = buffer.data; - out->size = buffer.size; - } - return ret; -} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c deleted file mode 100644 index 553a75818e7..00000000000 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c +++ /dev/null @@ -1,2531 +0,0 @@ -/* - * HLSL code generation for DXBC shader models 4-5 - * - * Copyright 2019-2020 Zebediah Figura for CodeWeavers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#include "hlsl.h" -#include <stdio.h> -#include "d3dcommon.h" -#include "sm4.h" - -static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); - -static bool type_is_integer(const struct hlsl_type *type) -{ - switch (type->base_type) - { - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - return true; - - default: - return false; - } -} - -bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, enum vkd3d_sm4_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) -{ - unsigned int i; - - static const struct - { - const char *semantic; - bool output; - enum vkd3d_shader_type shader_type; - enum vkd3d_sm4_swizzle_type swizzle_type; - enum vkd3d_sm4_register_type type; - bool has_idx; - } - register_table[] = - { - {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, - {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, - {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, - - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, - - /* Put sv_target in this table, instead of letting it fall through to - * default varying allocation, so that the register index matches the - * usage index. */ - {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, - }; - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) - { - if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) - && output == register_table[i].output - && ctx->profile->type == register_table[i].shader_type) - { - *type = register_table[i].type; - if (swizzle_type) - *swizzle_type = register_table[i].swizzle_type; - *has_idx = register_table[i].has_idx; - return true; - } - } - - return false; -} - -bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3D_NAME *usage) -{ - unsigned int i; - - static const struct - { - const char *name; - bool output; - enum vkd3d_shader_type shader_type; - D3DDECLUSAGE usage; - } - semantics[] = - { - {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - - {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, - - {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, - - {"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, - {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, - - {"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, - - {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, - {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, - - {"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, - }; - - for (i = 0; i < ARRAY_SIZE(semantics); ++i) - { - if (!ascii_strcasecmp(semantic->name, semantics[i].name) - && output == semantics[i].output - && ctx->profile->type == semantics[i].shader_type - && !ascii_strncasecmp(semantic->name, "sv_", 3)) - { - *usage = semantics[i].usage; - return true; - } - } - - if (!ascii_strncasecmp(semantic->name, "sv_", 3)) - return false; - - *usage = D3D_NAME_UNDEFINED; - return true; -} - -static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) -{ - struct vkd3d_bytecode_buffer buffer = {0}; - struct vkd3d_string_buffer *string; - const struct hlsl_ir_var *var; - size_t count_position; - unsigned int i; - bool ret; - - count_position = put_u32(&buffer, 0); - put_u32(&buffer, 8); /* unknown */ - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; - enum vkd3d_sm4_register_type type; - uint32_t usage_idx, reg_idx; - D3D_NAME usage; - bool has_idx; - - if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) - continue; - - ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); - assert(ret); - if (usage == ~0u) - continue; - usage_idx = var->semantic.index; - - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) - { - reg_idx = has_idx ? var->semantic.index : ~0u; - } - else - { - assert(var->regs[HLSL_REGSET_NUMERIC].allocated); - type = VKD3D_SM4_RT_INPUT; - reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; - } - - use_mask = width; /* FIXME: accurately report use mask */ - if (output) - use_mask = 0xf ^ use_mask; - - /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ - if (usage >= 64) - usage = 0; - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, usage_idx); - put_u32(&buffer, usage); - switch (var->data_type->base_type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32); - break; - - case HLSL_TYPE_INT: - put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32); - break; - - default: - if ((string = hlsl_type_to_string(ctx, var->data_type))) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Invalid data type %s for semantic variable %s.", string->buffer, var->name); - hlsl_release_string_buffer(ctx, string); - put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN); - } - put_u32(&buffer, reg_idx); - put_u32(&buffer, vkd3d_make_u16(width, use_mask)); - } - - i = 0; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - const char *semantic = var->semantic.name; - size_t string_offset; - D3D_NAME usage; - - if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) - continue; - - hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); - if (usage == ~0u) - continue; - - if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) - string_offset = put_string(&buffer, "SV_Target"); - else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) - string_offset = put_string(&buffer, "SV_Depth"); - else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) - string_offset = put_string(&buffer, "SV_Position"); - else - string_offset = put_string(&buffer, semantic); - set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); - } - - set_u32(&buffer, count_position, i); - - dxbc_writer_add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, buffer.data, buffer.size); -} - -static const struct hlsl_type *get_array_type(const struct hlsl_type *type) -{ - if (type->type == HLSL_CLASS_ARRAY) - return get_array_type(type->e.array.type); - return type; -} - -static unsigned int get_array_size(const struct hlsl_type *type) -{ - if (type->type == HLSL_CLASS_ARRAY) - return get_array_size(type->e.array.type) * type->e.array.elements_count; - return 1; -} - -static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) -{ - switch (type->type) - { - case HLSL_CLASS_ARRAY: - return sm4_class(type->e.array.type); - case HLSL_CLASS_MATRIX: - assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3D_SVC_MATRIX_COLUMNS; - else - return D3D_SVC_MATRIX_ROWS; - case HLSL_CLASS_OBJECT: - return D3D_SVC_OBJECT; - case HLSL_CLASS_SCALAR: - return D3D_SVC_SCALAR; - case HLSL_CLASS_STRUCT: - return D3D_SVC_STRUCT; - case HLSL_CLASS_VECTOR: - return D3D_SVC_VECTOR; - default: - ERR("Invalid class %#x.\n", type->type); - vkd3d_unreachable(); - } -} - -static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) -{ - switch (type->base_type) - { - case HLSL_TYPE_BOOL: - return D3D_SVT_BOOL; - case HLSL_TYPE_DOUBLE: - return D3D_SVT_DOUBLE; - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return D3D_SVT_FLOAT; - case HLSL_TYPE_INT: - return D3D_SVT_INT; - case HLSL_TYPE_PIXELSHADER: - return D3D_SVT_PIXELSHADER; - case HLSL_TYPE_SAMPLER: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3D_SVT_SAMPLER1D; - case HLSL_SAMPLER_DIM_2D: - return D3D_SVT_SAMPLER2D; - case HLSL_SAMPLER_DIM_3D: - return D3D_SVT_SAMPLER3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3D_SVT_SAMPLERCUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3D_SVT_SAMPLER; - default: - vkd3d_unreachable(); - } - break; - case HLSL_TYPE_STRING: - return D3D_SVT_STRING; - case HLSL_TYPE_TEXTURE: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3D_SVT_TEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3D_SVT_TEXTURE2D; - case HLSL_SAMPLER_DIM_3D: - return D3D_SVT_TEXTURE3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3D_SVT_TEXTURECUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3D_SVT_TEXTURE; - default: - vkd3d_unreachable(); - } - break; - case HLSL_TYPE_UINT: - return D3D_SVT_UINT; - case HLSL_TYPE_VERTEXSHADER: - return D3D_SVT_VERTEXSHADER; - case HLSL_TYPE_VOID: - return D3D_SVT_VOID; - default: - vkd3d_unreachable(); - } -} - -static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) -{ - const struct hlsl_type *array_type = get_array_type(type); - const char *name = array_type->name ? array_type->name : "<unnamed>"; - const struct hlsl_profile_info *profile = ctx->profile; - unsigned int field_count = 0, array_size = 0; - size_t fields_offset = 0, name_offset = 0; - size_t i; - - if (type->bytecode_offset) - return; - - if (profile->major_version >= 5) - name_offset = put_string(buffer, name); - - if (type->type == HLSL_CLASS_ARRAY) - array_size = get_array_size(type); - - if (array_type->type == HLSL_CLASS_STRUCT) - { - field_count = array_type->e.record.field_count; - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - field->name_bytecode_offset = put_string(buffer, field->name); - write_sm4_type(ctx, buffer, field->type); - } - - fields_offset = bytecode_get_size(buffer); - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - put_u32(buffer, field->name_bytecode_offset); - put_u32(buffer, field->type->bytecode_offset); - put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); - } - } - - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type))); - put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, field_count)); - put_u32(buffer, fields_offset); - - if (profile->major_version >= 5) - { - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, name_offset); - } -} - -static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) -{ - switch (type->base_type) - { - case HLSL_TYPE_SAMPLER: - return D3D_SIT_SAMPLER; - case HLSL_TYPE_TEXTURE: - return D3D_SIT_TEXTURE; - case HLSL_TYPE_UAV: - return D3D_SIT_UAV_RWTYPED; - default: - vkd3d_unreachable(); - } -} - -static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) -{ - switch (type->e.resource_format->base_type) - { - case HLSL_TYPE_DOUBLE: - return D3D_RETURN_TYPE_DOUBLE; - - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return D3D_RETURN_TYPE_FLOAT; - - case HLSL_TYPE_INT: - return D3D_RETURN_TYPE_SINT; - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - return D3D_RETURN_TYPE_UINT; - - default: - vkd3d_unreachable(); - } -} - -static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) -{ - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3D_SRV_DIMENSION_TEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3D_SRV_DIMENSION_TEXTURE2D; - case HLSL_SAMPLER_DIM_3D: - return D3D_SRV_DIMENSION_TEXTURE3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3D_SRV_DIMENSION_TEXTURECUBE; - case HLSL_SAMPLER_DIM_1DARRAY: - return D3D_SRV_DIMENSION_TEXTURE1DARRAY; - case HLSL_SAMPLER_DIM_2DARRAY: - return D3D_SRV_DIMENSION_TEXTURE2DARRAY; - case HLSL_SAMPLER_DIM_2DMS: - return D3D_SRV_DIMENSION_TEXTURE2DMS; - case HLSL_SAMPLER_DIM_2DMSARRAY: - return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; - case HLSL_SAMPLER_DIM_CUBEARRAY: - return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; - default: - vkd3d_unreachable(); - } -} - -static int sm4_compare_extern_resources(const void *a, const void *b) -{ - const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; - const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; - enum hlsl_regset aa_regset, bb_regset; - - aa_regset = hlsl_type_get_regset(aa->data_type); - bb_regset = hlsl_type_get_regset(bb->data_type); - - if (aa_regset != bb_regset) - return aa_regset - bb_regset; - - return aa->regs[aa_regset].id - bb->regs[bb_regset].id; -} - -static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) -{ - const struct hlsl_ir_var **extern_resources = NULL; - const struct hlsl_ir_var *var; - enum hlsl_regset regset; - size_t capacity = 0; - - *count = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (!hlsl_type_is_resource(var->data_type)) - continue; - regset = hlsl_type_get_regset(var->data_type); - if (!var->regs[regset].allocated) - continue; - - if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, - sizeof(*extern_resources)))) - { - *count = 0; - return NULL; - } - - extern_resources[*count] = var; - ++*count; - } - - qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); - return extern_resources; -} - -static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) -{ - unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; - size_t cbuffers_offset, resources_offset, creator_offset, string_offset; - size_t cbuffer_position, resource_position, creator_position; - const struct hlsl_profile_info *profile = ctx->profile; - const struct hlsl_ir_var **extern_resources; - struct vkd3d_bytecode_buffer buffer = {0}; - const struct hlsl_buffer *cbuffer; - const struct hlsl_ir_var *var; - - static const uint16_t target_types[] = - { - 0xffff, /* PIXEL */ - 0xfffe, /* VERTEX */ - 0x4753, /* GEOMETRY */ - 0x4853, /* HULL */ - 0x4453, /* DOMAIN */ - 0x4353, /* COMPUTE */ - }; - - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - - resource_count += extern_resources_count; - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) - { - ++cbuffer_count; - ++resource_count; - } - } - - put_u32(&buffer, cbuffer_count); - cbuffer_position = put_u32(&buffer, 0); - put_u32(&buffer, resource_count); - resource_position = put_u32(&buffer, 0); - put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), - target_types[profile->type])); - put_u32(&buffer, 0); /* FIXME: compilation flags */ - creator_position = put_u32(&buffer, 0); - - if (profile->major_version >= 5) - { - put_u32(&buffer, TAG_RD11); - put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ - put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ - put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ - put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ - put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ - put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ - put_u32(&buffer, 0); /* unknown; possibly a null terminator */ - } - - /* Bound resources. */ - - resources_offset = bytecode_get_size(&buffer); - set_u32(&buffer, resource_position, resources_offset); - - for (i = 0; i < extern_resources_count; ++i) - { - enum hlsl_regset regset; - uint32_t flags = 0; - - var = extern_resources[i]; - regset = hlsl_type_get_regset(var->data_type); - - if (var->reg_reservation.type) - flags |= D3D_SIF_USERPACKED; - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, sm4_resource_type(var->data_type)); - if (regset == HLSL_REGSET_SAMPLERS) - { - put_u32(&buffer, 0); - put_u32(&buffer, 0); - put_u32(&buffer, 0); - } - else - { - put_u32(&buffer, sm4_resource_format(var->data_type)); - put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); - put_u32(&buffer, ~0u); /* FIXME: multisample count */ - flags |= (var->data_type->e.resource_format->dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; - } - put_u32(&buffer, var->regs[regset].id); - put_u32(&buffer, 1); /* bind count */ - put_u32(&buffer, flags); - } - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - uint32_t flags = 0; - - if (!cbuffer->reg.allocated) - continue; - - if (cbuffer->reservation.type) - flags |= D3D_SIF_USERPACKED; - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); - put_u32(&buffer, 0); /* return type */ - put_u32(&buffer, 0); /* dimension */ - put_u32(&buffer, 0); /* multisample count */ - put_u32(&buffer, cbuffer->reg.id); /* bind point */ - put_u32(&buffer, 1); /* bind count */ - put_u32(&buffer, flags); /* flags */ - } - - for (i = 0; i < extern_resources_count; ++i) - { - var = extern_resources[i]; - - string_offset = put_string(&buffer, var->name); - set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); - } - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (!cbuffer->reg.allocated) - continue; - - string_offset = put_string(&buffer, cbuffer->name); - set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); - } - - /* Buffers. */ - - cbuffers_offset = bytecode_get_size(&buffer); - set_u32(&buffer, cbuffer_position, cbuffers_offset); - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - unsigned int var_count = 0; - - if (!cbuffer->reg.allocated) - continue; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform && var->buffer == cbuffer) - ++var_count; - } - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, var_count); - put_u32(&buffer, 0); /* variable offset */ - put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); - put_u32(&buffer, 0); /* FIXME: flags */ - put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); - } - - i = 0; - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (!cbuffer->reg.allocated) - continue; - - string_offset = put_string(&buffer, cbuffer->name); - set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); - } - - i = 0; - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - size_t vars_start = bytecode_get_size(&buffer); - - if (!cbuffer->reg.allocated) - continue; - - set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform && var->buffer == cbuffer) - { - uint32_t flags = 0; - - if (var->last_read) - flags |= D3D_SVF_USED; - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, var->buffer_offset * sizeof(float)); - put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); - put_u32(&buffer, flags); - put_u32(&buffer, 0); /* type */ - put_u32(&buffer, 0); /* FIXME: default value */ - - if (profile->major_version >= 5) - { - put_u32(&buffer, 0); /* texture start */ - put_u32(&buffer, 0); /* texture count */ - put_u32(&buffer, 0); /* sampler start */ - put_u32(&buffer, 0); /* sampler count */ - } - } - } - - j = 0; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform && var->buffer == cbuffer) - { - const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); - size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); - size_t string_offset = put_string(&buffer, var->name); - - set_u32(&buffer, var_offset, string_offset); - write_sm4_type(ctx, &buffer, var->data_type); - set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); - ++j; - } - } - } - - creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); - set_u32(&buffer, creator_position, creator_offset); - - dxbc_writer_add_section(dxbc, TAG_RDEF, buffer.data, buffer.size); - - vkd3d_free(extern_resources); -} - -static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) -{ - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return VKD3D_SM4_RESOURCE_TEXTURE_1D; - case HLSL_SAMPLER_DIM_2D: - return VKD3D_SM4_RESOURCE_TEXTURE_2D; - case HLSL_SAMPLER_DIM_3D: - return VKD3D_SM4_RESOURCE_TEXTURE_3D; - case HLSL_SAMPLER_DIM_CUBE: - return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; - case HLSL_SAMPLER_DIM_1DARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; - case HLSL_SAMPLER_DIM_2DARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; - case HLSL_SAMPLER_DIM_2DMS: - return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; - case HLSL_SAMPLER_DIM_2DMSARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; - case HLSL_SAMPLER_DIM_CUBEARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; - default: - vkd3d_unreachable(); - } -} - -struct sm4_instruction_modifier -{ - enum vkd3d_sm4_instruction_modifier type; - - union - { - struct - { - int u, v, w; - } aoffimmi; - } u; -}; - -static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_modifier *imod) -{ - uint32_t word = 0; - - word |= VKD3D_SM4_MODIFIER_MASK & imod->type; - - switch (imod->type) - { - case VKD3D_SM4_MODIFIER_AOFFIMMI: - assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); - assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); - assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); - word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; - word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; - word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; - break; - - default: - vkd3d_unreachable(); - } - - return word; -} - -struct sm4_register -{ - enum vkd3d_sm4_register_type type; - uint32_t idx[2]; - unsigned int idx_count; - enum vkd3d_sm4_dimension dim; - uint32_t immconst_uint[4]; - unsigned int mod; -}; - -struct sm4_instruction -{ - enum vkd3d_sm4_opcode opcode; - - struct sm4_instruction_modifier modifiers[1]; - unsigned int modifier_count; - - struct sm4_dst_register - { - struct sm4_register reg; - unsigned int writemask; - } dsts[2]; - unsigned int dst_count; - - struct sm4_src_register - { - struct sm4_register reg; - enum vkd3d_sm4_swizzle_type swizzle_type; - unsigned int swizzle; - } srcs[4]; - unsigned int src_count; - - uint32_t idx[3]; - unsigned int idx_count; -}; - -static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, - unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, - const struct hlsl_deref *deref, const struct hlsl_type *data_type) -{ - const struct hlsl_ir_var *var = deref->var; - - if (var->is_uniform) - { - if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_TEXTURE) - { - reg->type = VKD3D_SM4_RT_RESOURCE; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_UAV) - { - reg->type = VKD3D_SM5_RT_UAV; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_SAMPLER) - { - reg->type = VKD3D_SM4_RT_SAMPLER; - reg->dim = VKD3D_SM4_DIMENSION_NONE; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; - - assert(data_type->type <= HLSL_CLASS_VECTOR); - reg->type = VKD3D_SM4_RT_CONSTBUFFER; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->buffer->reg.id; - reg->idx[1] = offset / 4; - reg->idx_count = 2; - *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); - } - } - else if (var->is_input_semantic) - { - bool has_idx; - - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, swizzle_type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - - if (has_idx) - { - reg->idx[0] = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); - } - else - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - assert(hlsl_reg.allocated); - reg->type = VKD3D_SM4_RT_INPUT; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } - } - else if (var->is_output_semantic) - { - bool has_idx; - - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, swizzle_type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - - if (has_idx) - { - reg->idx[0] = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - - if (reg->type == VKD3D_SM4_RT_DEPTHOUT) - reg->dim = VKD3D_SM4_DIMENSION_SCALAR; - else - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); - } - else - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - assert(hlsl_reg.allocated); - reg->type = VKD3D_SM4_RT_OUTPUT; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - reg->idx[0] = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } - } - else - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - assert(hlsl_reg.allocated); - reg->type = VKD3D_SM4_RT_TEMP; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } -} - -static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, - const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) -{ - unsigned int writemask; - - sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); - if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) - src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); -} - -static void sm4_register_from_node(struct sm4_register *reg, unsigned int *writemask, - enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) -{ - assert(instr->reg.allocated); - reg->type = VKD3D_SM4_RT_TEMP; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = instr->reg.id; - reg->idx_count = 1; - *writemask = instr->reg.writemask; -} - -static void sm4_dst_from_node(struct sm4_dst_register *dst, const struct hlsl_ir_node *instr) -{ - unsigned int swizzle_type; - - sm4_register_from_node(&dst->reg, &dst->writemask, &swizzle_type, instr); -} - -static void sm4_src_from_node(struct sm4_src_register *src, - const struct hlsl_ir_node *instr, unsigned int map_writemask) -{ - unsigned int writemask; - - sm4_register_from_node(&src->reg, &writemask, &src->swizzle_type, instr); - if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) - src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); -} - -static uint32_t sm4_encode_register(const struct sm4_register *reg) -{ - return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) - | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) - | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); -} - -static uint32_t sm4_register_order(const struct sm4_register *reg) -{ - uint32_t order = 1; - if (reg->type == VKD3D_SM4_RT_IMMCONST) - order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; - order += reg->idx_count; - if (reg->mod) - ++order; - return order; -} - -static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) -{ - uint32_t token = instr->opcode; - unsigned int size = 1, i, j; - - size += instr->modifier_count; - for (i = 0; i < instr->dst_count; ++i) - size += sm4_register_order(&instr->dsts[i].reg); - for (i = 0; i < instr->src_count; ++i) - size += sm4_register_order(&instr->srcs[i].reg); - size += instr->idx_count; - - token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); - - if (instr->modifier_count > 0) - token |= VKD3D_SM4_INSTRUCTION_MODIFIER; - put_u32(buffer, token); - - for (i = 0; i < instr->modifier_count; ++i) - { - token = sm4_encode_instruction_modifier(&instr->modifiers[i]); - if (instr->modifier_count > i + 1) - token |= VKD3D_SM4_INSTRUCTION_MODIFIER; - put_u32(buffer, token); - } - - for (i = 0; i < instr->dst_count; ++i) - { - token = sm4_encode_register(&instr->dsts[i].reg); - if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) - token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; - put_u32(buffer, token); - - for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) - put_u32(buffer, instr->dsts[i].reg.idx[j]); - } - - for (i = 0; i < instr->src_count; ++i) - { - token = sm4_encode_register(&instr->srcs[i].reg); - token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; - token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; - if (instr->srcs[i].reg.mod) - token |= VKD3D_SM4_EXTENDED_OPERAND; - put_u32(buffer, token); - - if (instr->srcs[i].reg.mod) - put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) - | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); - - for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) - put_u32(buffer, instr->srcs[i].reg.idx[j]); - - if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) - { - put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); - if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) - { - put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); - put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); - put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); - } - } - } - - for (j = 0; j < instr->idx_count; ++j) - put_u32(buffer, instr->idx[j]); -} - -static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, - const struct hlsl_ir_node *texel_offset) -{ - struct sm4_instruction_modifier modif; - struct hlsl_ir_constant *offset; - - if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) - return false; - offset = hlsl_ir_constant(texel_offset); - - modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; - modif.u.aoffimmi.u = offset->value[0].i; - modif.u.aoffimmi.v = offset->value[1].i; - modif.u.aoffimmi.w = offset->value[2].i; - if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 - || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 - || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) - return false; - - instr->modifiers[instr->modifier_count++] = modif; - return true; -} - -static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) -{ - const struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, - - .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, - .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, - .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, - .srcs[0].reg.idx_count = 2, - .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, - .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), - .src_count = 1, - }; - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_dcl_sampler(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) -{ - const struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_SAMPLER, - - .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, - .dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id}, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - }; - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_dcl_texture(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) -{ - bool uav = (var->data_type->base_type == HLSL_TYPE_UAV); - struct sm4_instruction instr = - { - .opcode = (uav ? VKD3D_SM5_OP_DCL_UAV_TYPED : VKD3D_SM4_OP_DCL_RESOURCE) - | (sm4_resource_dimension(var->data_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT), - - .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, - .dsts[0].reg.idx = {uav ? var->regs[HLSL_REGSET_UAVS].id : var->regs[HLSL_REGSET_TEXTURES].id}, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - - .idx[0] = sm4_resource_format(var->data_type) * 0x1111, - .idx_count = 1, - }; - - if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS - || var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) - { - instr.opcode |= var->data_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; - } - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) -{ - const struct hlsl_profile_info *profile = ctx->profile; - const bool output = var->is_output_semantic; - D3D_NAME usage; - bool has_idx; - - struct sm4_instruction instr = - { - .dsts[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, - .dst_count = 1, - }; - - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) - { - if (has_idx) - { - instr.dsts[0].reg.idx[0] = var->semantic.index; - instr.dsts[0].reg.idx_count = 1; - } - else - { - instr.dsts[0].reg.idx_count = 0; - } - instr.dsts[0].writemask = (1 << var->data_type->dimx) - 1; - } - else - { - instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; - instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; - instr.dsts[0].reg.idx_count = 1; - instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; - } - - if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) - instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; - - hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); - if (usage == ~0u) - usage = D3D_NAME_UNDEFINED; - - if (var->is_input_semantic) - { - switch (usage) - { - case D3D_NAME_UNDEFINED: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; - break; - - case D3D_NAME_INSTANCE_ID: - case D3D_NAME_PRIMITIVE_ID: - case D3D_NAME_VERTEX_ID: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; - break; - - default: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; - break; - } - - if (profile->type == VKD3D_SHADER_TYPE_PIXEL) - { - enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; - - if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type)) - mode = VKD3DSIM_CONSTANT; - - instr.opcode |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; - } - } - else - { - if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) - instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; - else - instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; - } - - switch (usage) - { - case D3D_NAME_COVERAGE: - case D3D_NAME_DEPTH: - case D3D_NAME_DEPTH_GREATER_EQUAL: - case D3D_NAME_DEPTH_LESS_EQUAL: - case D3D_NAME_TARGET: - case D3D_NAME_UNDEFINED: - break; - - default: - instr.idx_count = 1; - instr.idx[0] = usage; - break; - } - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_TEMPS, - - .idx = {temp_count}, - .idx_count = 1, - }; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, - - .idx = {thread_count[0], thread_count[1], thread_count[2]}, - .idx_count = 3, - }; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_RET, - }; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], src, instr.dsts[0].writemask); - instr.srcs[0].reg.mod = src_mod; - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, - enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, - const struct hlsl_ir_node *src) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - assert(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - assert(1 - dst_idx >= 0); - instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; - instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; - instr.dsts[1 - dst_idx].reg.idx_count = 0; - instr.dst_count = 2; - - sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask); - sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -/* dp# instructions don't map the swizzle. */ -static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, - enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, - const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - assert(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - assert(1 - dst_idx >= 0); - instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; - instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; - instr.dsts[1 - dst_idx].reg.idx_count = 0; - instr.dst_count = 2; - - sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[dst_idx].writemask); - sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_constant(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_constant *constant) -{ - const unsigned int dimx = constant->node.data_type->dimx; - struct sm4_instruction instr; - struct sm4_register *reg = &instr.srcs[0].reg; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_dst_from_node(&instr.dsts[0], &constant->node); - instr.dst_count = 1; - - instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - reg->type = VKD3D_SM4_RT_IMMCONST; - if (dimx == 1) - { - reg->dim = VKD3D_SM4_DIMENSION_SCALAR; - reg->immconst_uint[0] = constant->value[0].u; - } - else - { - unsigned int i, j = 0; - - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - for (i = 0; i < 4; ++i) - { - if (instr.dsts[0].writemask & (1u << i)) - reg->immconst_uint[i] = constant->value[j++].u; - } - } - instr.src_count = 1, - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, - const struct hlsl_ir_node *texel_offset) -{ - bool uav = (resource_type->base_type == HLSL_TYPE_UAV); - struct sm4_instruction instr; - unsigned int dim_count; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = uav ? VKD3D_SM5_OP_LD_UAV_TYPED : VKD3D_SM4_OP_LD; - - if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } - } - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - - if (!uav) - { - /* Mipmap level is in the last component in the IR, but needs to be in the W - * component in the instruction. */ - dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); - if (dim_count == 1) - instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, X, X, Y), 4); - if (dim_count == 2) - instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, Y, X, Z), 4); - } - - sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); - - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_deref *sampler, - const struct hlsl_ir_node *coords, const struct hlsl_ir_node *texel_offset) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_SAMPLE; - - if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } - } - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); - sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 3; - - write_sm4_instruction(buffer, &instr); -} - -static bool type_is_float(const struct hlsl_type *type) -{ - return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; -} - -static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, - const struct hlsl_ir_node *arg, uint32_t mask) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_AND; - - sm4_dst_from_node(&instr.dsts[0], &expr->node); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); - instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; - instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; - instr.srcs[1].reg.immconst_uint[0] = mask; - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_cast(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) -{ - static const union - { - uint32_t u; - float f; - } one = { .f = 1.0 }; - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_type *dst_type = expr->node.data_type; - const struct hlsl_type *src_type = arg1->data_type; - - /* Narrowing casts were already lowered. */ - assert(src_type->dimx == dst_type->dimx); - - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - switch (src_type->base_type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_TYPE_INT: - switch (src_type->base_type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_TYPE_UINT: - switch (src_type->base_type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_TYPE_HALF: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to half."); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); - break; - - case HLSL_TYPE_BOOL: - /* Casts to bool should have already been lowered. */ - default: - vkd3d_unreachable(); - } -} - -static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; - - sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_expr(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) -{ - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_ir_node *arg2 = expr->operands[1].node; - const struct hlsl_type *dst_type = expr->node.data_type; - struct vkd3d_string_buffer *dst_type_string; - - assert(expr->node.reg.allocated); - - if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) - return; - - switch (expr->op) - { - case HLSL_OP1_ABS: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP1_BIT_NOT: - assert(type_is_integer(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_CAST: - write_sm4_cast(ctx, buffer, expr); - break; - - case HLSL_OP1_COS: - assert(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); - break; - - case HLSL_OP1_EXP2: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); - break; - - case HLSL_OP1_FLOOR: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); - break; - - case HLSL_OP1_FRACT: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOG2: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOGIC_NOT: - assert(dst_type->base_type == HLSL_TYPE_BOOL); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_NEG: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP1_REINTERPRET: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_OP1_ROUND: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_RSQ: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); - break; - - case HLSL_OP1_SAT: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV - | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), - &expr->node, arg1, 0); - break; - - case HLSL_OP1_SIN: - assert(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); - break; - - case HLSL_OP1_SQRT: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); - break; - - case HLSL_OP2_ADD: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_BIT_AND: - assert(type_is_integer(dst_type)); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_OR: - assert(type_is_integer(dst_type)); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_XOR: - assert(type_is_integer(dst_type)); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_DIV: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: - write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_DOT: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - switch (arg1->data_type->dimx) - { - case 4: - write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); - break; - - case 3: - write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); - break; - - case 2: - write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); - break; - - case 1: - default: - vkd3d_unreachable(); - } - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_EQUAL: - { - const struct hlsl_type *src_type = arg1->data_type; - - assert(dst_type->base_type == HLSL_TYPE_BOOL); - - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between "%s" operands.", - debug_hlsl_type(ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_GEQUAL: - { - const struct hlsl_type *src_type = arg1->data_type; - - assert(dst_type->base_type == HLSL_TYPE_BOOL); - - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between "%s" operands.", - debug_hlsl_type(ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_LESS: - { - const struct hlsl_type *src_type = arg1->data_type; - - assert(dst_type->base_type == HLSL_TYPE_BOOL); - - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between "%s" operands.", - debug_hlsl_type(ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_LOGIC_AND: - assert(dst_type->base_type == HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LOGIC_OR: - assert(dst_type->base_type == HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LSHIFT: - assert(type_is_integer(dst_type)); - assert(dst_type->base_type != HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_MAX: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_MIN: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_MOD: - switch (dst_type->base_type) - { - case HLSL_TYPE_UINT: - write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_MUL: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - /* Using IMUL instead of UMUL because we're taking the low - * bits, and the native compiler generates IMUL. */ - write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_NEQUAL: - { - const struct hlsl_type *src_type = arg1->data_type; - - assert(dst_type->base_type == HLSL_TYPE_BOOL); - - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between "%s" operands.", - debug_hlsl_type(ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_RSHIFT: - assert(type_is_integer(dst_type)); - assert(dst_type->base_type != HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, - &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); - } - - hlsl_release_string_buffer(ctx, dst_type_string); -} - -static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, - .src_count = 1, - }; - - assert(iff->condition.node->data_type->dimx == 1); - - sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); - write_sm4_instruction(buffer, &instr); - - write_sm4_block(ctx, buffer, &iff->then_instrs); - - if (!list_empty(&iff->else_instrs.instrs)) - { - instr.opcode = VKD3D_SM4_OP_ELSE; - instr.src_count = 0; - write_sm4_instruction(buffer, &instr); - - write_sm4_block(ctx, buffer, &iff->else_instrs); - } - - instr.opcode = VKD3D_SM4_OP_ENDIF; - instr.src_count = 0; - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_jump(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) -{ - struct sm4_instruction instr = {0}; - - switch (jump->type) - { - case HLSL_IR_JUMP_BREAK: - instr.opcode = VKD3D_SM4_OP_BREAK; - break; - - case HLSL_IR_JUMP_RETURN: - vkd3d_unreachable(); - - default: - hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); - return; - } - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_load(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_dst_from_node(&instr.dsts[0], &load->node); - instr.dst_count = 1; - - sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, load->node.data_type, instr.dsts[0].writemask); - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_loop(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_LOOP, - }; - - write_sm4_instruction(buffer, &instr); - - write_sm4_block(ctx, buffer, &loop->body); - - instr.opcode = VKD3D_SM4_OP_ENDLOOP; - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_deref *sampler, - const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) -{ - struct sm4_src_register *src; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - - instr.opcode = VKD3D_SM4_OP_GATHER4; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); - - if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - if (ctx->profile->major_version < 5) - { - hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); - return; - } - instr.opcode = VKD3D_SM5_OP_GATHER4_PO; - sm4_src_from_node(&instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); - } - } - - sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); - - src = &instr.srcs[instr.src_count++]; - sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); - src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; - src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; - src->swizzle = swizzle; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_resource_load(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) -{ - const struct hlsl_type *resource_type = load->resource.var->data_type; - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *coords = load->coords.node; - - if (resource_type->type != HLSL_CLASS_OBJECT) - { - assert(resource_type->type == HLSL_CLASS_ARRAY || resource_type->type == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &load->node.loc, "Resource being a component of another variable."); - return; - } - - if (load->sampler.var) - { - const struct hlsl_type *sampler_type = load->sampler.var->data_type; - - if (sampler_type->type != HLSL_CLASS_OBJECT) - { - assert(sampler_type->type == HLSL_CLASS_ARRAY || sampler_type->type == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &load->node.loc, "Sampler being a component of another variable."); - return; - } - assert(sampler_type->base_type == HLSL_TYPE_SAMPLER); - assert(sampler_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC); - - if (!load->sampler.var->is_uniform) - { - hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); - return; - } - } - - if (!load->resource.var->is_uniform) - { - hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); - return; - } - - switch (load->load_type) - { - case HLSL_RESOURCE_LOAD: - write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, - coords, texel_offset); - break; - - case HLSL_RESOURCE_SAMPLE: - if (!load->sampler.var) - { - hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); - return; - } - write_sm4_sample(ctx, buffer, resource_type, &load->node, - &load->resource, &load->sampler, coords, texel_offset); - break; - - case HLSL_RESOURCE_GATHER_RED: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_GREEN: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_BLUE: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_ALPHA: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); - break; - - case HLSL_RESOURCE_SAMPLE_LOD: - hlsl_fixme(ctx, &load->node.loc, "SM4 sample-LOD expression."); - break; - } -} - -static void write_sm4_resource_store(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) -{ - const struct hlsl_type *resource_type = store->resource.var->data_type; - - if (resource_type->type != HLSL_CLASS_OBJECT) - { - assert(resource_type->type == HLSL_CLASS_ARRAY || resource_type->type == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &store->node.loc, "Resource being a component of another variable."); - return; - } - - if (!store->resource.var->is_uniform) - { - hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); - return; - } - - write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); -} - -static void write_sm4_store(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) -{ - const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm4_instruction instr; - unsigned int writemask; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); - instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_swizzle(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) -{ - struct sm4_instruction instr; - unsigned int writemask; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_dst_from_node(&instr.dsts[0], &swizzle->node); - instr.dst_count = 1; - - sm4_register_from_node(&instr.srcs[0].reg, &writemask, &instr.srcs[0].swizzle_type, swizzle->val.node); - instr.srcs[0].swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), - swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_block *block) -{ - const struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { - if (instr->data_type->type == HLSL_CLASS_MATRIX) - { - hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); - break; - } - else if (instr->data_type->type == HLSL_CLASS_OBJECT) - { - hlsl_fixme(ctx, &instr->loc, "Object copy."); - break; - } - - assert(instr->data_type->type == HLSL_CLASS_SCALAR || instr->data_type->type == HLSL_CLASS_VECTOR); - } - - switch (instr->type) - { - case HLSL_IR_CALL: - vkd3d_unreachable(); - - case HLSL_IR_CONSTANT: - write_sm4_constant(ctx, buffer, hlsl_ir_constant(instr)); - break; - - case HLSL_IR_EXPR: - write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); - break; - - case HLSL_IR_IF: - write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); - break; - - case HLSL_IR_JUMP: - write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); - break; - - case HLSL_IR_LOAD: - write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); - break; - - case HLSL_IR_RESOURCE_LOAD: - write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); - break; - - case HLSL_IR_RESOURCE_STORE: - write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); - break; - - case HLSL_IR_LOOP: - write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); - break; - - case HLSL_IR_STORE: - write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); - break; - - case HLSL_IR_SWIZZLE: - write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); - break; - - default: - hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); - } - } -} - -static void write_sm4_shdr(struct hlsl_ctx *ctx, - const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) -{ - const struct hlsl_profile_info *profile = ctx->profile; - const struct hlsl_ir_var **extern_resources; - struct vkd3d_bytecode_buffer buffer = {0}; - unsigned int extern_resources_count, i; - const struct hlsl_buffer *cbuffer; - const struct hlsl_ir_var *var; - size_t token_count_position; - - static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = - { - VKD3D_SM4_PS, - VKD3D_SM4_VS, - VKD3D_SM4_GS, - VKD3D_SM5_HS, - VKD3D_SM5_DS, - VKD3D_SM5_CS, - 0, /* EFFECT */ - 0, /* TEXTURE */ - VKD3D_SM4_LIB, - }; - - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - - put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); - token_count_position = put_u32(&buffer, 0); - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) - write_sm4_dcl_constant_buffer(&buffer, cbuffer); - } - - for (i = 0; i < extern_resources_count; ++i) - { - var = extern_resources[i]; - - if (var->data_type->base_type == HLSL_TYPE_SAMPLER) - write_sm4_dcl_sampler(&buffer, var); - else if (var->data_type->base_type == HLSL_TYPE_TEXTURE || var->data_type->base_type == HLSL_TYPE_UAV) - write_sm4_dcl_texture(&buffer, var); - } - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) - write_sm4_dcl_semantic(ctx, &buffer, var); - } - - if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) - write_sm4_dcl_thread_group(&buffer, ctx->thread_count); - - if (ctx->temp_count) - write_sm4_dcl_temps(&buffer, ctx->temp_count); - - write_sm4_block(ctx, &buffer, &entry_func->body); - - write_sm4_ret(&buffer); - - set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); - - dxbc_writer_add_section(dxbc, TAG_SHDR, buffer.data, buffer.size); - - vkd3d_free(extern_resources); -} - -int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -{ - struct dxbc_writer dxbc; - size_t i; - int ret; - - dxbc_writer_init(&dxbc); - - write_sm4_signature(ctx, &dxbc, false); - write_sm4_signature(ctx, &dxbc, true); - write_sm4_rdef(ctx, &dxbc); - write_sm4_shdr(ctx, entry_func, &dxbc); - - if (!(ret = ctx->result)) - ret = dxbc_writer_write(&dxbc, out); - for (i = 0; i < dxbc.section_count; ++i) - vkd3d_shader_free_shader_code(&dxbc.sections[i].data); - return ret; -} diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c new file mode 100644 index 00000000000..9eefb82c226 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -0,0 +1,1072 @@ +/* + * Copyright 2023 Conor McCarthy for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_shader_private.h" + +static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) +{ + return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID; +} + +static bool shader_instruction_is_dcl(const struct vkd3d_shader_instruction *ins) +{ + return (VKD3DSIH_DCL <= ins->handler_idx && ins->handler_idx <= VKD3DSIH_DCL_VERTICES_OUT) + || ins->handler_idx == VKD3DSIH_HS_DECLS; +} + +static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) +{ + ins->handler_idx = VKD3DSIH_NOP; + ins->dst_count = 0; + ins->src_count = 0; + ins->dst = NULL; + ins->src = NULL; +} + +static void shader_register_eliminate_phase_addressing(struct vkd3d_shader_register *reg, + unsigned int instance_id) +{ + unsigned int i; + + for (i = 0; i < reg->idx_count; ++i) + { + if (reg->idx[i].rel_addr && shader_register_is_phase_instance_id(®->idx[i].rel_addr->reg)) + { + reg->idx[i].rel_addr = NULL; + reg->idx[i].offset += instance_id; + } + } +} + +static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_instruction *ins, + unsigned int instance_id) +{ + struct vkd3d_shader_register *reg; + unsigned int i; + + for (i = 0; i < ins->src_count; ++i) + { + reg = (struct vkd3d_shader_register *)&ins->src[i].reg; + if (shader_register_is_phase_instance_id(reg)) + { + reg->type = VKD3DSPR_IMMCONST; + reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + reg->non_uniform = false; + reg->idx[0].offset = ~0u; + reg->idx[0].rel_addr = NULL; + reg->idx[1].offset = ~0u; + reg->idx[1].rel_addr = NULL; + reg->idx[2].offset = ~0u; + reg->idx[2].rel_addr = NULL; + reg->idx_count = 0; + reg->immconst_type = VKD3D_IMMCONST_SCALAR; + reg->u.immconst_uint[0] = instance_id; + continue; + } + shader_register_eliminate_phase_addressing(reg, instance_id); + } + + for (i = 0; i < ins->dst_count; ++i) + shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id); +} + +struct hull_flattener +{ + struct vkd3d_shader_instruction_array instructions; + + unsigned int max_temp_count; + unsigned int temp_dcl_idx; + + unsigned int instance_count; + unsigned int phase_body_idx; + enum vkd3d_shader_opcode phase; +}; + +static bool flattener_is_in_fork_or_join_phase(const struct hull_flattener *flattener) +{ + return flattener->phase == VKD3DSIH_HS_FORK_PHASE || flattener->phase == VKD3DSIH_HS_JOIN_PHASE; +} + +struct shader_phase_location +{ + unsigned int index; + unsigned int instance_count; + unsigned int instruction_count; +}; + +struct shader_phase_location_array +{ + /* Unlikely worst case: one phase for each component of each output register. */ + struct shader_phase_location locations[MAX_REG_OUTPUT * VKD3D_VEC4_SIZE]; + unsigned int count; +}; + +static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normaliser, + unsigned int index, struct shader_phase_location_array *locations) +{ + struct vkd3d_shader_instruction *ins = &normaliser->instructions.elements[index]; + struct shader_phase_location *loc; + bool b; + + if (ins->handler_idx == VKD3DSIH_HS_FORK_PHASE || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) + { + b = flattener_is_in_fork_or_join_phase(normaliser); + /* Reset the phase info. */ + normaliser->phase_body_idx = ~0u; + normaliser->phase = ins->handler_idx; + normaliser->instance_count = 1; + /* Leave the first occurrence and delete the rest. */ + if (b) + vkd3d_shader_instruction_make_nop(ins); + return; + } + else if (ins->handler_idx == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT + || ins->handler_idx == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) + { + normaliser->instance_count = ins->declaration.count + !ins->declaration.count; + vkd3d_shader_instruction_make_nop(ins); + return; + } + else if (ins->handler_idx == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( + &ins->declaration.dst.reg)) + { + vkd3d_shader_instruction_make_nop(ins); + return; + } + else if (ins->handler_idx == VKD3DSIH_DCL_TEMPS && normaliser->phase != VKD3DSIH_INVALID) + { + /* Leave only the first temp declaration and set it to the max count later. */ + if (!normaliser->max_temp_count) + normaliser->temp_dcl_idx = index; + else + vkd3d_shader_instruction_make_nop(ins); + normaliser->max_temp_count = max(normaliser->max_temp_count, ins->declaration.count); + return; + } + + if (normaliser->phase == VKD3DSIH_INVALID || shader_instruction_is_dcl(ins)) + return; + + if (normaliser->phase_body_idx == ~0u) + normaliser->phase_body_idx = index; + + if (ins->handler_idx == VKD3DSIH_RET) + { + vkd3d_shader_instruction_make_nop(ins); + if (locations->count >= ARRAY_SIZE(locations->locations)) + { + FIXME("Insufficient space for phase location.\n"); + return; + } + loc = &locations->locations[locations->count++]; + loc->index = normaliser->phase_body_idx; + loc->instance_count = normaliser->instance_count; + loc->instruction_count = index - normaliser->phase_body_idx; + } +} + +static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normaliser, + struct shader_phase_location_array *locations) +{ + struct shader_phase_location *loc; + unsigned int i, j, k, end, count; + + for (i = 0, count = 0; i < locations->count; ++i) + count += (locations->locations[i].instance_count - 1) * locations->locations[i].instruction_count; + + if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + end = normaliser->instructions.count; + normaliser->instructions.count += count; + + for (i = locations->count; i > 0; --i) + { + loc = &locations->locations[i - 1]; + j = loc->index + loc->instruction_count; + memmove(&normaliser->instructions.elements[j + count], &normaliser->instructions.elements[j], + (end - j) * sizeof(*normaliser->instructions.elements)); + end = j; + count -= (loc->instance_count - 1) * loc->instruction_count; + loc->index += count; + } + + for (i = 0, count = 0; i < locations->count; ++i) + { + loc = &locations->locations[i]; + /* Make a copy of the non-dcl instructions for each instance. */ + for (j = 1; j < loc->instance_count; ++j) + { + for (k = 0; k < loc->instruction_count; ++k) + { + if (!shader_instruction_array_clone_instruction(&normaliser->instructions, + loc->index + loc->instruction_count * j + k, loc->index + k)) + return VKD3D_ERROR_OUT_OF_MEMORY; + } + } + /* Replace each reference to the instance id with a constant instance id. */ + for (j = 0; j < loc->instance_count; ++j) + { + for (k = 0; k < loc->instruction_count; ++k) + shader_instruction_eliminate_phase_instance_id( + &normaliser->instructions.elements[loc->index + loc->instruction_count * j + k], j); + } + } + + return VKD3D_OK; +} + +static void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, + enum vkd3d_data_type data_type, unsigned int idx_count) +{ + reg->type = reg_type; + reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + reg->non_uniform = false; + reg->data_type = data_type; + reg->idx[0].offset = ~0u; + reg->idx[0].rel_addr = NULL; + reg->idx[1].offset = ~0u; + reg->idx[1].rel_addr = NULL; + reg->idx[2].offset = ~0u; + reg->idx[2].rel_addr = NULL; + reg->idx_count = idx_count; + reg->immconst_type = VKD3D_IMMCONST_SCALAR; +} + +static void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) +{ + memset(ins, 0, sizeof(*ins)); + ins->handler_idx = handler_idx; +} + +enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) +{ + struct hull_flattener flattener = {*src_instructions}; + struct vkd3d_shader_instruction_array *instructions; + struct shader_phase_location_array locations; + enum vkd3d_result result = VKD3D_OK; + unsigned int i; + + instructions = &flattener.instructions; + + flattener.phase = VKD3DSIH_INVALID; + for (i = 0, locations.count = 0; i < instructions->count; ++i) + flattener_eliminate_phase_related_dcls(&flattener, i, &locations); + + if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) + return result; + + if (flattener.phase != VKD3DSIH_INVALID) + { + if (flattener.temp_dcl_idx) + instructions->elements[flattener.temp_dcl_idx].declaration.count = flattener.max_temp_count; + + if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + shader_instruction_init(&instructions->elements[instructions->count++], VKD3DSIH_RET); + } + + *src_instructions = flattener.instructions; + return result; +} + +struct control_point_normaliser +{ + struct vkd3d_shader_instruction_array instructions; + enum vkd3d_shader_opcode phase; + struct vkd3d_shader_src_param *outpointid_param; +}; + +static bool control_point_normaliser_is_in_control_point_phase(const struct control_point_normaliser *normaliser) +{ + return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; +} + +static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( + struct vkd3d_shader_instruction_array *instructions) +{ + struct vkd3d_shader_src_param *rel_addr; + + if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1))) + return NULL; + + shader_register_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT, 0); + rel_addr->swizzle = 0; + rel_addr->modifiers = 0; + + return rel_addr; +} + +static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param *dst_param, + struct control_point_normaliser *normaliser) +{ + struct vkd3d_shader_register *reg = &dst_param->reg; + + if (control_point_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) + { + /* The TPF reader validates idx_count. */ + assert(reg->idx_count == 1); + reg->idx[1] = reg->idx[0]; + /* The control point id param is implicit here. Avoid later complications by inserting it. */ + reg->idx[0].offset = 0; + reg->idx[0].rel_addr = normaliser->outpointid_param; + ++reg->idx_count; + } +} + +static void shader_dst_param_io_init(struct vkd3d_shader_dst_param *param, const struct signature_element *e, + enum vkd3d_shader_register_type reg_type, unsigned int idx_count) +{ + param->write_mask = e->mask; + param->modifiers = 0; + param->shift = 0; + shader_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); +} + +static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser, + const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst) +{ + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_dst_param *param; + const struct signature_element *e; + unsigned int i, count; + + for (i = 0, count = 1; i < s->element_count; ++i) + count += !!s->elements[i].used_mask; + + if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + memmove(&normaliser->instructions.elements[dst + count], &normaliser->instructions.elements[dst], + (normaliser->instructions.count - dst) * sizeof(*normaliser->instructions.elements)); + normaliser->instructions.count += count; + + ins = &normaliser->instructions.elements[dst]; + shader_instruction_init(ins, VKD3DSIH_HS_CONTROL_POINT_PHASE); + ins->flags = 1; + ++ins; + + for (i = 0; i < s->element_count; ++i) + { + e = &s->elements[i]; + if (!e->used_mask) + continue; + + if (e->sysval_semantic != VKD3D_SHADER_SV_NONE) + { + shader_instruction_init(ins, VKD3DSIH_DCL_INPUT_SIV); + param = &ins->declaration.register_semantic.reg; + ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); + } + else + { + shader_instruction_init(ins, VKD3DSIH_DCL_INPUT); + param = &ins->declaration.dst; + } + + shader_dst_param_io_init(param, e, VKD3DSPR_INPUT, 2); + param->reg.idx[0].offset = input_control_point_count; + param->reg.idx[1].offset = i; + + ++ins; + } + + return VKD3D_OK; +} + +enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( + struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature) +{ + struct vkd3d_shader_instruction_array *instructions; + struct control_point_normaliser normaliser; + unsigned int input_control_point_count; + struct vkd3d_shader_instruction *ins; + enum vkd3d_result ret; + unsigned int i, j; + + if (!(normaliser.outpointid_param = instruction_array_create_outpointid_param(src_instructions))) + { + ERR("Failed to allocate src param.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + normaliser.instructions = *src_instructions; + instructions = &normaliser.instructions; + normaliser.phase = VKD3DSIH_INVALID; + + for (i = 0; i < normaliser.instructions.count; ++i) + { + ins = &instructions->elements[i]; + + switch (ins->handler_idx) + { + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + normaliser.phase = ins->handler_idx; + break; + default: + if (shader_instruction_is_dcl(ins)) + break; + for (j = 0; j < ins->dst_count; ++j) + shader_dst_param_normalise_outpointid((struct vkd3d_shader_dst_param *)&ins->dst[j], &normaliser); + break; + } + } + + normaliser.phase = VKD3DSIH_INVALID; + input_control_point_count = 1; + + for (i = 0; i < instructions->count; ++i) + { + ins = &instructions->elements[i]; + + switch (ins->handler_idx) + { + case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: + input_control_point_count = ins->declaration.count; + break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + *src_instructions = normaliser.instructions; + return VKD3D_OK; + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + ret = control_point_normaliser_emit_hs_input(&normaliser, input_signature, + input_control_point_count, i); + *src_instructions = normaliser.instructions; + return ret; + default: + break; + } + } + + *src_instructions = normaliser.instructions; + return VKD3D_OK; +} + +struct io_normaliser +{ + struct vkd3d_shader_instruction_array instructions; + enum vkd3d_shader_type shader_type; + struct shader_signature *input_signature; + struct shader_signature *output_signature; + struct shader_signature *patch_constant_signature; + + unsigned int max_temp_count; + unsigned int temp_dcl_idx; + + unsigned int instance_count; + unsigned int phase_body_idx; + enum vkd3d_shader_opcode phase; + unsigned int output_control_point_count; + + struct vkd3d_shader_src_param *outpointid_param; + + struct vkd3d_shader_dst_param *input_dcl_params[MAX_REG_OUTPUT]; + struct vkd3d_shader_dst_param *output_dcl_params[MAX_REG_OUTPUT]; + struct vkd3d_shader_dst_param *pc_dcl_params[MAX_REG_OUTPUT]; + uint8_t input_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; + uint8_t output_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; + uint8_t pc_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; +}; + +static bool io_normaliser_is_in_fork_or_join_phase(const struct io_normaliser *normaliser) +{ + return normaliser->phase == VKD3DSIH_HS_FORK_PHASE || normaliser->phase == VKD3DSIH_HS_JOIN_PHASE; +} + +static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser *normaliser) +{ + return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; +} + +static unsigned int shader_signature_find_element_for_reg(const struct shader_signature *signature, + unsigned int reg_idx, unsigned int write_mask) +{ + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + struct signature_element *e = &signature->elements[i]; + if (e->register_index <= reg_idx && e->register_index + e->register_count > reg_idx + && (e->mask & write_mask) == write_mask) + { + return i; + } + } + + /* Validated in the TPF reader. */ + vkd3d_unreachable(); +} + +static unsigned int range_map_get_register_count(uint8_t range_map[][VKD3D_VEC4_SIZE], + unsigned int register_idx, unsigned int write_mask) +{ + return range_map[register_idx][vkd3d_write_mask_get_component_idx(write_mask)]; +} + +static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], unsigned int register_idx, + unsigned int register_count, unsigned int write_mask, bool is_dcl_indexrange) +{ + unsigned int i, j, r, c, component_idx, component_count; + + assert(write_mask <= VKD3DSP_WRITEMASK_ALL); + component_idx = vkd3d_write_mask_get_component_idx(write_mask); + component_count = vkd3d_write_mask_component_count(write_mask); + + assert(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count); + + if (range_map[register_idx][component_idx] > register_count && is_dcl_indexrange) + { + /* Validated in the TPF reader. */ + assert(range_map[register_idx][component_idx] != UINT8_MAX); + return; + } + if (range_map[register_idx][component_idx] == register_count) + { + /* Already done. This happens when fxc splits a register declaration by + * component(s). The dcl_indexrange instructions are split too. */ + return; + } + range_map[register_idx][component_idx] = register_count; + + for (i = 0; i < register_count; ++i) + { + r = register_idx + i; + for (j = !i; j < component_count; ++j) + { + c = component_idx + j; + /* A synthetic patch constant range which overlaps an existing range can start upstream of it + * for fork/join phase instancing, but ranges declared by dcl_indexrange should not overlap. + * The latter is validated in the TPF reader. */ + assert(!range_map[r][c] || !is_dcl_indexrange); + range_map[r][c] = UINT8_MAX; + } + } +} + +static void io_normaliser_add_index_range(struct io_normaliser *normaliser, + const struct vkd3d_shader_instruction *ins) +{ + const struct vkd3d_shader_index_range *range = &ins->declaration.index_range; + const struct vkd3d_shader_register *reg = &range->dst.reg; + unsigned int reg_idx, write_mask, element_idx; + const struct shader_signature *signature; + uint8_t (*range_map)[VKD3D_VEC4_SIZE]; + + switch (reg->type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_INCONTROLPOINT: + range_map = normaliser->input_range_map; + signature = normaliser->input_signature; + break; + case VKD3DSPR_OUTCONTROLPOINT: + range_map = normaliser->output_range_map; + signature = normaliser->output_signature; + break; + case VKD3DSPR_OUTPUT: + if (!io_normaliser_is_in_fork_or_join_phase(normaliser)) + { + range_map = normaliser->output_range_map; + signature = normaliser->output_signature; + break; + } + /* fall through */ + case VKD3DSPR_PATCHCONST: + range_map = normaliser->pc_range_map; + signature = normaliser->patch_constant_signature; + break; + default: + /* Validated in the TPF reader. */ + vkd3d_unreachable(); + } + + reg_idx = reg->idx[reg->idx_count - 1].offset; + write_mask = range->dst.write_mask; + element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); + range_map_set_register_range(range_map, reg_idx, range->register_count, + signature->elements[element_idx].mask, true); +} + +static int signature_element_mask_compare(const void *a, const void *b) +{ + const struct signature_element *e = a, *f = b; + int ret; + + return (ret = vkd3d_u32_compare(e->mask, f->mask)) ? ret : vkd3d_u32_compare(e->register_index, f->register_index); +} + +static bool sysval_semantics_should_merge(const struct signature_element *e, const struct signature_element *f) +{ + if (e->sysval_semantic < VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE + || e->sysval_semantic > VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN) + return false; + + return e->sysval_semantic == f->sysval_semantic + /* Line detail and density must be merged together to match the SPIR-V array. + * This deletes one of the two sysvals, but these are not used. */ + || (e->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDET + && f->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN) + || (e->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN + && f->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDET); +} + +/* Merge tess factor sysvals because they are an array in SPIR-V. */ +static void shader_signature_map_patch_constant_index_ranges(struct shader_signature *s, + uint8_t range_map[][VKD3D_VEC4_SIZE]) +{ + struct signature_element *e, *f; + unsigned int i, j, register_count; + + qsort(s->elements, s->element_count, sizeof(s->elements[0]), signature_element_mask_compare); + + for (i = 0; i < s->element_count; i += register_count) + { + e = &s->elements[i]; + register_count = 1; + + if (!e->sysval_semantic) + continue; + + for (j = i + 1; j < s->element_count; ++j, ++register_count) + { + f = &s->elements[j]; + if (f->register_index != e->register_index + register_count || !sysval_semantics_should_merge(e, f)) + break; + } + if (register_count < 2) + continue; + + range_map_set_register_range(range_map, e->register_index, register_count, e->mask, false); + } +} + +static int signature_element_register_compare(const void *a, const void *b) +{ + const struct signature_element *e = a, *f = b; + + return vkd3d_u32_compare(e->register_index, f->register_index); +} + +static int signature_element_index_compare(const void *a, const void *b) +{ + const struct signature_element *e = a, *f = b; + + return vkd3d_u32_compare(e->sort_index, f->sort_index); +} + +static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map[][VKD3D_VEC4_SIZE], + bool is_patch_constant) +{ + unsigned int i, j, element_count, new_count, register_count; + struct signature_element *elements; + struct signature_element *e, *f; + + element_count = s->element_count; + if (!(elements = vkd3d_malloc(element_count * sizeof(*elements)))) + return false; + memcpy(elements, s->elements, element_count * sizeof(*elements)); + + qsort(elements, element_count, sizeof(elements[0]), signature_element_register_compare); + + for (i = 0, new_count = 0; i < element_count; i = j, elements[new_count++] = *e) + { + e = &elements[i]; + j = i + 1; + + if (e->register_index == ~0u) + continue; + + /* Do not merge if the register index will be relative-addressed. */ + if (range_map_get_register_count(range_map, e->register_index, e->mask) > 1) + continue; + + for (; j < element_count; ++j) + { + f = &elements[j]; + + /* Merge different components of the same register unless sysvals are different, + * or it will be relative-addressed. */ + if (f->register_index != e->register_index || f->sysval_semantic != e->sysval_semantic + || range_map_get_register_count(range_map, f->register_index, f->mask) > 1) + break; + + TRACE("Merging %s, reg %u, mask %#x, sysval %#x with %s, mask %#x, sysval %#x.\n", e->semantic_name, + e->register_index, e->mask, e->sysval_semantic, f->semantic_name, f->mask, f->sysval_semantic); + assert(!(e->mask & f->mask)); + + e->mask |= f->mask; + e->used_mask |= f->used_mask; + e->semantic_index = min(e->semantic_index, f->semantic_index); + } + } + element_count = new_count; + vkd3d_free(s->elements); + s->elements = elements; + s->element_count = element_count; + + if (is_patch_constant) + shader_signature_map_patch_constant_index_ranges(s, range_map); + + for (i = 0, new_count = 0; i < element_count; i += register_count, elements[new_count++] = *e) + { + e = &elements[i]; + register_count = 1; + + if (e->register_index >= MAX_REG_OUTPUT) + continue; + + register_count = range_map_get_register_count(range_map, e->register_index, e->mask); + assert(register_count != UINT8_MAX); + register_count += !register_count; + + if (register_count > 1) + { + TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count); + e->register_count = register_count; + } + } + element_count = new_count; + + /* Restoring the original order is required for sensible trace output. */ + qsort(elements, element_count, sizeof(elements[0]), signature_element_index_compare); + + s->element_count = element_count; + + return true; +} + +static bool sysval_semantic_is_tess_factor(enum vkd3d_shader_sysval_semantic sysval_semantic) +{ + return sysval_semantic >= VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE + && sysval_semantic <= VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN; +} + +static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_shader_register *reg, + unsigned int id_idx, unsigned int register_index) +{ + assert(id_idx < ARRAY_SIZE(reg->idx) - 1); + + /* For a relative-addressed register index, move the id up a slot to separate it from the address, + * because rel_addr can be replaced with a constant offset in some cases. */ + if (reg->idx[id_idx].rel_addr) + { + reg->idx[id_idx + 1].rel_addr = NULL; + reg->idx[id_idx + 1].offset = reg->idx[id_idx].offset; + reg->idx[id_idx].offset -= register_index; + ++id_idx; + } + /* Otherwise we have no address for the arrayed register, so insert one. This happens e.g. where + * tessellation level registers are merged into an array because they're an array in SPIR-V. */ + else + { + ++id_idx; + memmove(®->idx[1], ®->idx[0], id_idx * sizeof(reg->idx[0])); + reg->idx[0].rel_addr = NULL; + reg->idx[0].offset = reg->idx[id_idx].offset - register_index; + } + + return id_idx; +} + +static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, bool is_io_dcl, + struct io_normaliser *normaliser) + { + unsigned int id_idx, reg_idx, write_mask, element_idx; + struct vkd3d_shader_register *reg = &dst_param->reg; + struct vkd3d_shader_dst_param **dcl_params; + const struct shader_signature *signature; + const struct signature_element *e; + + if ((reg->type == VKD3DSPR_OUTPUT && io_normaliser_is_in_fork_or_join_phase(normaliser)) + || reg->type == VKD3DSPR_PATCHCONST) + { + signature = normaliser->patch_constant_signature; + /* Convert patch constant outputs to the patch constant register type to avoid the need + * to convert compiler symbols when accessed as inputs in a later stage. */ + reg->type = VKD3DSPR_PATCHCONST; + dcl_params = normaliser->pc_dcl_params; + } + else if (reg->type == VKD3DSPR_OUTPUT || dst_param->reg.type == VKD3DSPR_COLOROUT) + { + signature = normaliser->output_signature; + dcl_params = normaliser->output_dcl_params; + } + else if (dst_param->reg.type == VKD3DSPR_INCONTROLPOINT || dst_param->reg.type == VKD3DSPR_INPUT) + { + signature = normaliser->input_signature; + dcl_params = normaliser->input_dcl_params; + } + else + { + return true; + } + + id_idx = reg->idx_count - 1; + reg_idx = reg->idx[id_idx].offset; + write_mask = dst_param->write_mask; + element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); + e = &signature->elements[element_idx]; + + dst_param->write_mask >>= vkd3d_write_mask_get_component_idx(e->mask); + if (is_io_dcl) + { + /* Validated in the TPF reader. */ + assert(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); + + if (dcl_params[element_idx]) + { + /* Merge split declarations into a single one. */ + dcl_params[element_idx]->write_mask |= dst_param->write_mask; + /* Turn this into a nop. */ + return false; + } + else + { + dcl_params[element_idx] = dst_param; + } + } + + if (io_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) + { + if (is_io_dcl) + { + /* Emit an array size for the control points for consistency with inputs. */ + reg->idx[0].offset = normaliser->output_control_point_count; + } + else + { + /* The control point id param. */ + assert(reg->idx[0].rel_addr); + } + id_idx = 1; + } + + if ((e->register_count > 1 || sysval_semantic_is_tess_factor(e->sysval_semantic))) + { + if (is_io_dcl) + { + /* For control point I/O, idx 0 contains the control point count. + * Ensure it is moved up to the next slot. */ + reg->idx[id_idx].offset = reg->idx[0].offset; + reg->idx[0].offset = e->register_count; + ++id_idx; + } + else + { + id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); + } + } + + /* Replace the register index with the signature element index */ + reg->idx[id_idx].offset = element_idx; + reg->idx_count = id_idx + 1; + + return true; +} + +static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_param, + struct io_normaliser *normaliser) +{ + unsigned int i, id_idx, reg_idx, write_mask, element_idx, component_idx; + struct vkd3d_shader_register *reg = &src_param->reg; + const struct shader_signature *signature; + const struct signature_element *e; + + /* Input/output registers from one phase can be used as inputs in + * subsequent phases. Specifically: + * + * - Control phase inputs are available as "vicp" in fork and join + * phases. + * - Control phase outputs are available as "vocp" in fork and join + * phases. + * - Fork phase patch constants are available as "vpc" in join + * phases. + * + * We handle "vicp" here by converting INCONTROLPOINT src registers to + * type INPUT so they match the control phase declarations. We handle + * "vocp" by converting OUTCONTROLPOINT registers to type OUTPUT. + * Merging fork and join phases handles "vpc". */ + + switch (reg->type) + { + case VKD3DSPR_PATCHCONST: + signature = normaliser->patch_constant_signature; + break; + case VKD3DSPR_INCONTROLPOINT: + if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) + reg->type = VKD3DSPR_INPUT; + /* fall through */ + case VKD3DSPR_INPUT: + signature = normaliser->input_signature; + break; + case VKD3DSPR_OUTCONTROLPOINT: + if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) + reg->type = VKD3DSPR_OUTPUT; + /* fall through */ + case VKD3DSPR_OUTPUT: + signature = normaliser->output_signature; + break; + default: + return; + } + + id_idx = reg->idx_count - 1; + reg_idx = reg->idx[id_idx].offset; + write_mask = VKD3DSP_WRITEMASK_0 << vkd3d_swizzle_get_component(src_param->swizzle, 0); + element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); + + e = &signature->elements[element_idx]; + if ((e->register_count > 1 || sysval_semantic_is_tess_factor(e->sysval_semantic))) + id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); + reg->idx[id_idx].offset = element_idx; + reg->idx_count = id_idx + 1; + + if ((component_idx = vkd3d_write_mask_get_component_idx(e->mask))) + { + for (i = 0; i < VKD3D_VEC4_SIZE; ++i) + if (vkd3d_swizzle_get_component(src_param->swizzle, i)) + src_param->swizzle -= component_idx << VKD3D_SHADER_SWIZZLE_SHIFT(i); + } +} + +static void shader_instruction_normalise_io_params(struct vkd3d_shader_instruction *ins, + struct io_normaliser *normaliser) +{ + struct vkd3d_shader_register *reg; + bool keep = true; + unsigned int i; + + switch (ins->handler_idx) + { + case VKD3DSIH_DCL_INPUT: + if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) + { + reg = &ins->declaration.dst.reg; + /* We don't need to keep OUTCONTROLPOINT or PATCHCONST input declarations since their + * equivalents were declared earlier, but INCONTROLPOINT may be the first occurrence. */ + if (reg->type == VKD3DSPR_OUTCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST) + vkd3d_shader_instruction_make_nop(ins); + else if (reg->type == VKD3DSPR_INCONTROLPOINT) + reg->type = VKD3DSPR_INPUT; + } + /* fall through */ + case VKD3DSIH_DCL_INPUT_PS: + case VKD3DSIH_DCL_OUTPUT: + keep = shader_dst_param_io_normalise(&ins->declaration.dst, true, normaliser); + break; + case VKD3DSIH_DCL_INPUT_SGV: + case VKD3DSIH_DCL_INPUT_SIV: + case VKD3DSIH_DCL_INPUT_PS_SGV: + case VKD3DSIH_DCL_INPUT_PS_SIV: + case VKD3DSIH_DCL_OUTPUT_SIV: + keep = shader_dst_param_io_normalise(&ins->declaration.register_semantic.reg, true, + normaliser); + break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + normaliser->phase = ins->handler_idx; + memset(normaliser->input_dcl_params, 0, sizeof(normaliser->input_dcl_params)); + memset(normaliser->output_dcl_params, 0, sizeof(normaliser->output_dcl_params)); + memset(normaliser->pc_dcl_params, 0, sizeof(normaliser->pc_dcl_params)); + break; + default: + if (shader_instruction_is_dcl(ins)) + break; + for (i = 0; i < ins->dst_count; ++i) + shader_dst_param_io_normalise((struct vkd3d_shader_dst_param *)&ins->dst[i], false, normaliser); + for (i = 0; i < ins->src_count; ++i) + shader_src_param_io_normalise((struct vkd3d_shader_src_param *)&ins->src[i], normaliser); + break; + } + + if (!keep) + shader_instruction_init(ins, VKD3DSIH_NOP); +} + +enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, + enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, + struct shader_signature *output_signature, struct shader_signature *patch_constant_signature) +{ + struct io_normaliser normaliser = {*instructions}; + struct vkd3d_shader_instruction *ins; + bool has_control_point_phase; + unsigned int i, j; + + normaliser.phase = VKD3DSIH_INVALID; + normaliser.shader_type = shader_type; + normaliser.input_signature = input_signature; + normaliser.output_signature = output_signature; + normaliser.patch_constant_signature = patch_constant_signature; + + for (i = 0, has_control_point_phase = false; i < instructions->count; ++i) + { + ins = &instructions->elements[i]; + + switch (ins->handler_idx) + { + case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: + normaliser.output_control_point_count = ins->declaration.count; + break; + case VKD3DSIH_DCL_INDEX_RANGE: + io_normaliser_add_index_range(&normaliser, ins); + vkd3d_shader_instruction_make_nop(ins); + break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + has_control_point_phase = true; + /* fall through */ + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + normaliser.phase = ins->handler_idx; + break; + default: + break; + } + } + + if (normaliser.shader_type == VKD3D_SHADER_TYPE_HULL && !has_control_point_phase) + { + /* Inputs and outputs must match for the default phase, so merge ranges must match too. */ + for (i = 0; i < MAX_REG_OUTPUT; ++i) + { + for (j = 0; j < VKD3D_VEC4_SIZE; ++j) + { + if (!normaliser.input_range_map[i][j] && normaliser.output_range_map[i][j]) + normaliser.input_range_map[i][j] = normaliser.output_range_map[i][j]; + else if (normaliser.input_range_map[i][j] && !normaliser.output_range_map[i][j]) + normaliser.output_range_map[i][j] = normaliser.input_range_map[i][j]; + else assert(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); + } + } + } + + if (!shader_signature_merge(input_signature, normaliser.input_range_map, false) + || !shader_signature_merge(output_signature, normaliser.output_range_map, false) + || !shader_signature_merge(patch_constant_signature, normaliser.pc_range_map, true)) + { + *instructions = normaliser.instructions; + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + normaliser.phase = VKD3DSIH_INVALID; + for (i = 0; i < normaliser.instructions.count; ++i) + shader_instruction_normalise_io_params(&normaliser.instructions.elements[i], &normaliser); + + *instructions = normaliser.instructions; + return VKD3D_OK; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index bb5a6b61de1..94079696280 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -41,6 +41,7 @@ static void update_location(struct preproc_ctx *ctx); %option bison-locations %option extra-type="struct preproc_ctx *" %option never-interactive +%option nodefault %option noinput %option nounput %option noyy_top_state @@ -75,6 +76,7 @@ INT_SUFFIX [uUlL]{0,2} <C_COMMENT>"*/" {yy_pop_state(yyscanner);} <C_COMMENT,CXX_COMMENT><<EOF>> {yy_pop_state(yyscanner);} <C_COMMENT,CXX_COMMENT>. {} +<C_COMMENT>\n {}
<ERROR>(\{NEWLINE}|[^\n])* {return T_STRING;}
@@ -176,9 +178,9 @@ INT_SUFFIX [uUlL]{0,2} return T_NEWLINE; }
-<INITIAL>{WS}+ {} +<INITIAL,INCLUDE,LINE>{WS}+ {} <INITIAL>[-()[]{},+!*/<>&|^?:] {return yytext[0];} -<INITIAL>. {return T_TEXT;} +<INITIAL,INCLUDE,LINE>. {return T_TEXT;}
%%
diff --git a/libs/vkd3d/libs/vkd3d-shader/sm4.h b/libs/vkd3d/libs/vkd3d-shader/sm4.h deleted file mode 100644 index 5ec4ee17e27..00000000000 --- a/libs/vkd3d/libs/vkd3d-shader/sm4.h +++ /dev/null @@ -1,552 +0,0 @@ -/* - * Copyright 2009 Henri Verbeet for CodeWeavers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#ifndef __VKD3D_SM4_H -#define __VKD3D_SM4_H - -#define VKD3D_SM4_PS 0x0000u -#define VKD3D_SM4_VS 0x0001u -#define VKD3D_SM4_GS 0x0002u -#define VKD3D_SM5_HS 0x0003u -#define VKD3D_SM5_DS 0x0004u -#define VKD3D_SM5_CS 0x0005u -#define VKD3D_SM4_LIB 0xfff0u - -#define VKD3D_SM4_INSTRUCTION_MODIFIER (0x1u << 31) - -#define VKD3D_SM4_MODIFIER_MASK 0x3fu - -#define VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT 6 -#define VKD3D_SM5_MODIFIER_DATA_TYPE_MASK (0xffffu << VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT) - -#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT 6 -#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT) - -#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT 11 -#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK (0xfffu << VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT) - -#define VKD3D_SM4_AOFFIMMI_U_SHIFT 9 -#define VKD3D_SM4_AOFFIMMI_U_MASK (0xfu << VKD3D_SM4_AOFFIMMI_U_SHIFT) -#define VKD3D_SM4_AOFFIMMI_V_SHIFT 13 -#define VKD3D_SM4_AOFFIMMI_V_MASK (0xfu << VKD3D_SM4_AOFFIMMI_V_SHIFT) -#define VKD3D_SM4_AOFFIMMI_W_SHIFT 17 -#define VKD3D_SM4_AOFFIMMI_W_MASK (0xfu << VKD3D_SM4_AOFFIMMI_W_SHIFT) - -#define VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT 24 -#define VKD3D_SM4_INSTRUCTION_LENGTH_MASK (0x1fu << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT) - -#define VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT 11 -#define VKD3D_SM4_INSTRUCTION_FLAGS_MASK (0x7u << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT) - -#define VKD3D_SM4_RESOURCE_TYPE_SHIFT 11 -#define VKD3D_SM4_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM4_RESOURCE_TYPE_SHIFT) - -#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT 16 -#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK (0xfu << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT) - -#define VKD3D_SM4_PRIMITIVE_TYPE_SHIFT 11 -#define VKD3D_SM4_PRIMITIVE_TYPE_MASK (0x3fu << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT) - -#define VKD3D_SM4_INDEX_TYPE_SHIFT 11 -#define VKD3D_SM4_INDEX_TYPE_MASK (0x1u << VKD3D_SM4_INDEX_TYPE_SHIFT) - -#define VKD3D_SM4_SAMPLER_MODE_SHIFT 11 -#define VKD3D_SM4_SAMPLER_MODE_MASK (0xfu << VKD3D_SM4_SAMPLER_MODE_SHIFT) - -#define VKD3D_SM4_SHADER_DATA_TYPE_SHIFT 11 -#define VKD3D_SM4_SHADER_DATA_TYPE_MASK (0xfu << VKD3D_SM4_SHADER_DATA_TYPE_SHIFT) - -#define VKD3D_SM4_INTERPOLATION_MODE_SHIFT 11 -#define VKD3D_SM4_INTERPOLATION_MODE_MASK (0xfu << VKD3D_SM4_INTERPOLATION_MODE_SHIFT) - -#define VKD3D_SM4_GLOBAL_FLAGS_SHIFT 11 -#define VKD3D_SM4_GLOBAL_FLAGS_MASK (0xffu << VKD3D_SM4_GLOBAL_FLAGS_SHIFT) - -#define VKD3D_SM5_PRECISE_SHIFT 19 -#define VKD3D_SM5_PRECISE_MASK (0xfu << VKD3D_SM5_PRECISE_SHIFT) - -#define VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT 11 -#define VKD3D_SM5_CONTROL_POINT_COUNT_MASK (0xffu << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT) - -#define VKD3D_SM5_FP_ARRAY_SIZE_SHIFT 16 -#define VKD3D_SM5_FP_TABLE_COUNT_MASK 0xffffu - -#define VKD3D_SM5_UAV_FLAGS_SHIFT 15 -#define VKD3D_SM5_UAV_FLAGS_MASK (0x1ffu << VKD3D_SM5_UAV_FLAGS_SHIFT) - -#define VKD3D_SM5_SYNC_FLAGS_SHIFT 11 -#define VKD3D_SM5_SYNC_FLAGS_MASK (0xffu << VKD3D_SM5_SYNC_FLAGS_SHIFT) - -#define VKD3D_SM5_TESSELLATOR_SHIFT 11 -#define VKD3D_SM5_TESSELLATOR_MASK (0xfu << VKD3D_SM5_TESSELLATOR_SHIFT) - -#define VKD3D_SM4_OPCODE_MASK 0xff - -#define VKD3D_SM4_EXTENDED_OPERAND (0x1u << 31) - -#define VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK 0x3fu - -#define VKD3D_SM4_REGISTER_MODIFIER_SHIFT 6 -#define VKD3D_SM4_REGISTER_MODIFIER_MASK (0xffu << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) - -#define VKD3D_SM4_REGISTER_PRECISION_SHIFT 14 -#define VKD3D_SM4_REGISTER_PRECISION_MASK (0x7u << VKD3D_SM4_REGISTER_PRECISION_SHIFT) - -#define VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT 17 -#define VKD3D_SM4_REGISTER_NON_UNIFORM_MASK (0x1u << VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT) - -#define VKD3D_SM4_ADDRESSING_SHIFT2 28 -#define VKD3D_SM4_ADDRESSING_MASK2 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT2) - -#define VKD3D_SM4_ADDRESSING_SHIFT1 25 -#define VKD3D_SM4_ADDRESSING_MASK1 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT1) - -#define VKD3D_SM4_ADDRESSING_SHIFT0 22 -#define VKD3D_SM4_ADDRESSING_MASK0 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT0) - -#define VKD3D_SM4_REGISTER_ORDER_SHIFT 20 -#define VKD3D_SM4_REGISTER_ORDER_MASK (0x3u << VKD3D_SM4_REGISTER_ORDER_SHIFT) - -#define VKD3D_SM4_REGISTER_TYPE_SHIFT 12 -#define VKD3D_SM4_REGISTER_TYPE_MASK (0xffu << VKD3D_SM4_REGISTER_TYPE_SHIFT) - -#define VKD3D_SM4_SWIZZLE_TYPE_SHIFT 2 -#define VKD3D_SM4_SWIZZLE_TYPE_MASK (0x3u << VKD3D_SM4_SWIZZLE_TYPE_SHIFT) - -#define VKD3D_SM4_DIMENSION_SHIFT 0 -#define VKD3D_SM4_DIMENSION_MASK (0x3u << VKD3D_SM4_DIMENSION_SHIFT) - -#define VKD3D_SM4_WRITEMASK_SHIFT 4 -#define VKD3D_SM4_WRITEMASK_MASK (0xfu << VKD3D_SM4_WRITEMASK_SHIFT) - -#define VKD3D_SM4_SWIZZLE_SHIFT 4 -#define VKD3D_SM4_SWIZZLE_MASK (0xffu << VKD3D_SM4_SWIZZLE_SHIFT) - -#define VKD3D_SM4_VERSION_MAJOR(version) (((version) >> 4) & 0xf) -#define VKD3D_SM4_VERSION_MINOR(version) (((version) >> 0) & 0xf) - -#define VKD3D_SM4_ADDRESSING_RELATIVE 0x2 -#define VKD3D_SM4_ADDRESSING_OFFSET 0x1 - -#define VKD3D_SM4_INSTRUCTION_FLAG_SATURATE 0x4 - -#define VKD3D_SM4_CONDITIONAL_NZ (0x1u << 18) - -#define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu) - -/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ -#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 - -enum vkd3d_sm4_opcode -{ - VKD3D_SM4_OP_ADD = 0x00, - VKD3D_SM4_OP_AND = 0x01, - VKD3D_SM4_OP_BREAK = 0x02, - VKD3D_SM4_OP_BREAKC = 0x03, - VKD3D_SM4_OP_CASE = 0x06, - VKD3D_SM4_OP_CONTINUE = 0x07, - VKD3D_SM4_OP_CONTINUEC = 0x08, - VKD3D_SM4_OP_CUT = 0x09, - VKD3D_SM4_OP_DEFAULT = 0x0a, - VKD3D_SM4_OP_DERIV_RTX = 0x0b, - VKD3D_SM4_OP_DERIV_RTY = 0x0c, - VKD3D_SM4_OP_DISCARD = 0x0d, - VKD3D_SM4_OP_DIV = 0x0e, - VKD3D_SM4_OP_DP2 = 0x0f, - VKD3D_SM4_OP_DP3 = 0x10, - VKD3D_SM4_OP_DP4 = 0x11, - VKD3D_SM4_OP_ELSE = 0x12, - VKD3D_SM4_OP_EMIT = 0x13, - VKD3D_SM4_OP_ENDIF = 0x15, - VKD3D_SM4_OP_ENDLOOP = 0x16, - VKD3D_SM4_OP_ENDSWITCH = 0x17, - VKD3D_SM4_OP_EQ = 0x18, - VKD3D_SM4_OP_EXP = 0x19, - VKD3D_SM4_OP_FRC = 0x1a, - VKD3D_SM4_OP_FTOI = 0x1b, - VKD3D_SM4_OP_FTOU = 0x1c, - VKD3D_SM4_OP_GE = 0x1d, - VKD3D_SM4_OP_IADD = 0x1e, - VKD3D_SM4_OP_IF = 0x1f, - VKD3D_SM4_OP_IEQ = 0x20, - VKD3D_SM4_OP_IGE = 0x21, - VKD3D_SM4_OP_ILT = 0x22, - VKD3D_SM4_OP_IMAD = 0x23, - VKD3D_SM4_OP_IMAX = 0x24, - VKD3D_SM4_OP_IMIN = 0x25, - VKD3D_SM4_OP_IMUL = 0x26, - VKD3D_SM4_OP_INE = 0x27, - VKD3D_SM4_OP_INEG = 0x28, - VKD3D_SM4_OP_ISHL = 0x29, - VKD3D_SM4_OP_ISHR = 0x2a, - VKD3D_SM4_OP_ITOF = 0x2b, - VKD3D_SM4_OP_LABEL = 0x2c, - VKD3D_SM4_OP_LD = 0x2d, - VKD3D_SM4_OP_LD2DMS = 0x2e, - VKD3D_SM4_OP_LOG = 0x2f, - VKD3D_SM4_OP_LOOP = 0x30, - VKD3D_SM4_OP_LT = 0x31, - VKD3D_SM4_OP_MAD = 0x32, - VKD3D_SM4_OP_MIN = 0x33, - VKD3D_SM4_OP_MAX = 0x34, - VKD3D_SM4_OP_SHADER_DATA = 0x35, - VKD3D_SM4_OP_MOV = 0x36, - VKD3D_SM4_OP_MOVC = 0x37, - VKD3D_SM4_OP_MUL = 0x38, - VKD3D_SM4_OP_NE = 0x39, - VKD3D_SM4_OP_NOP = 0x3a, - VKD3D_SM4_OP_NOT = 0x3b, - VKD3D_SM4_OP_OR = 0x3c, - VKD3D_SM4_OP_RESINFO = 0x3d, - VKD3D_SM4_OP_RET = 0x3e, - VKD3D_SM4_OP_RETC = 0x3f, - VKD3D_SM4_OP_ROUND_NE = 0x40, - VKD3D_SM4_OP_ROUND_NI = 0x41, - VKD3D_SM4_OP_ROUND_PI = 0x42, - VKD3D_SM4_OP_ROUND_Z = 0x43, - VKD3D_SM4_OP_RSQ = 0x44, - VKD3D_SM4_OP_SAMPLE = 0x45, - VKD3D_SM4_OP_SAMPLE_C = 0x46, - VKD3D_SM4_OP_SAMPLE_C_LZ = 0x47, - VKD3D_SM4_OP_SAMPLE_LOD = 0x48, - VKD3D_SM4_OP_SAMPLE_GRAD = 0x49, - VKD3D_SM4_OP_SAMPLE_B = 0x4a, - VKD3D_SM4_OP_SQRT = 0x4b, - VKD3D_SM4_OP_SWITCH = 0x4c, - VKD3D_SM4_OP_SINCOS = 0x4d, - VKD3D_SM4_OP_UDIV = 0x4e, - VKD3D_SM4_OP_ULT = 0x4f, - VKD3D_SM4_OP_UGE = 0x50, - VKD3D_SM4_OP_UMUL = 0x51, - VKD3D_SM4_OP_UMAX = 0x53, - VKD3D_SM4_OP_UMIN = 0x54, - VKD3D_SM4_OP_USHR = 0x55, - VKD3D_SM4_OP_UTOF = 0x56, - VKD3D_SM4_OP_XOR = 0x57, - VKD3D_SM4_OP_DCL_RESOURCE = 0x58, - VKD3D_SM4_OP_DCL_CONSTANT_BUFFER = 0x59, - VKD3D_SM4_OP_DCL_SAMPLER = 0x5a, - VKD3D_SM4_OP_DCL_INDEX_RANGE = 0x5b, - VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY = 0x5c, - VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE = 0x5d, - VKD3D_SM4_OP_DCL_VERTICES_OUT = 0x5e, - VKD3D_SM4_OP_DCL_INPUT = 0x5f, - VKD3D_SM4_OP_DCL_INPUT_SGV = 0x60, - VKD3D_SM4_OP_DCL_INPUT_SIV = 0x61, - VKD3D_SM4_OP_DCL_INPUT_PS = 0x62, - VKD3D_SM4_OP_DCL_INPUT_PS_SGV = 0x63, - VKD3D_SM4_OP_DCL_INPUT_PS_SIV = 0x64, - VKD3D_SM4_OP_DCL_OUTPUT = 0x65, - VKD3D_SM4_OP_DCL_OUTPUT_SIV = 0x67, - VKD3D_SM4_OP_DCL_TEMPS = 0x68, - VKD3D_SM4_OP_DCL_INDEXABLE_TEMP = 0x69, - VKD3D_SM4_OP_DCL_GLOBAL_FLAGS = 0x6a, - VKD3D_SM4_OP_LOD = 0x6c, - VKD3D_SM4_OP_GATHER4 = 0x6d, - VKD3D_SM4_OP_SAMPLE_POS = 0x6e, - VKD3D_SM4_OP_SAMPLE_INFO = 0x6f, - VKD3D_SM5_OP_HS_DECLS = 0x71, - VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE = 0x72, - VKD3D_SM5_OP_HS_FORK_PHASE = 0x73, - VKD3D_SM5_OP_HS_JOIN_PHASE = 0x74, - VKD3D_SM5_OP_EMIT_STREAM = 0x75, - VKD3D_SM5_OP_CUT_STREAM = 0x76, - VKD3D_SM5_OP_FCALL = 0x78, - VKD3D_SM5_OP_BUFINFO = 0x79, - VKD3D_SM5_OP_DERIV_RTX_COARSE = 0x7a, - VKD3D_SM5_OP_DERIV_RTX_FINE = 0x7b, - VKD3D_SM5_OP_DERIV_RTY_COARSE = 0x7c, - VKD3D_SM5_OP_DERIV_RTY_FINE = 0x7d, - VKD3D_SM5_OP_GATHER4_C = 0x7e, - VKD3D_SM5_OP_GATHER4_PO = 0x7f, - VKD3D_SM5_OP_GATHER4_PO_C = 0x80, - VKD3D_SM5_OP_RCP = 0x81, - VKD3D_SM5_OP_F32TOF16 = 0x82, - VKD3D_SM5_OP_F16TOF32 = 0x83, - VKD3D_SM5_OP_COUNTBITS = 0x86, - VKD3D_SM5_OP_FIRSTBIT_HI = 0x87, - VKD3D_SM5_OP_FIRSTBIT_LO = 0x88, - VKD3D_SM5_OP_FIRSTBIT_SHI = 0x89, - VKD3D_SM5_OP_UBFE = 0x8a, - VKD3D_SM5_OP_IBFE = 0x8b, - VKD3D_SM5_OP_BFI = 0x8c, - VKD3D_SM5_OP_BFREV = 0x8d, - VKD3D_SM5_OP_SWAPC = 0x8e, - VKD3D_SM5_OP_DCL_STREAM = 0x8f, - VKD3D_SM5_OP_DCL_FUNCTION_BODY = 0x90, - VKD3D_SM5_OP_DCL_FUNCTION_TABLE = 0x91, - VKD3D_SM5_OP_DCL_INTERFACE = 0x92, - VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT = 0x93, - VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT = 0x94, - VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN = 0x95, - VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING = 0x96, - VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE = 0x97, - VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR = 0x98, - VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT = 0x99, - VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT = 0x9a, - VKD3D_SM5_OP_DCL_THREAD_GROUP = 0x9b, - VKD3D_SM5_OP_DCL_UAV_TYPED = 0x9c, - VKD3D_SM5_OP_DCL_UAV_RAW = 0x9d, - VKD3D_SM5_OP_DCL_UAV_STRUCTURED = 0x9e, - VKD3D_SM5_OP_DCL_TGSM_RAW = 0x9f, - VKD3D_SM5_OP_DCL_TGSM_STRUCTURED = 0xa0, - VKD3D_SM5_OP_DCL_RESOURCE_RAW = 0xa1, - VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED = 0xa2, - VKD3D_SM5_OP_LD_UAV_TYPED = 0xa3, - VKD3D_SM5_OP_STORE_UAV_TYPED = 0xa4, - VKD3D_SM5_OP_LD_RAW = 0xa5, - VKD3D_SM5_OP_STORE_RAW = 0xa6, - VKD3D_SM5_OP_LD_STRUCTURED = 0xa7, - VKD3D_SM5_OP_STORE_STRUCTURED = 0xa8, - VKD3D_SM5_OP_ATOMIC_AND = 0xa9, - VKD3D_SM5_OP_ATOMIC_OR = 0xaa, - VKD3D_SM5_OP_ATOMIC_XOR = 0xab, - VKD3D_SM5_OP_ATOMIC_CMP_STORE = 0xac, - VKD3D_SM5_OP_ATOMIC_IADD = 0xad, - VKD3D_SM5_OP_ATOMIC_IMAX = 0xae, - VKD3D_SM5_OP_ATOMIC_IMIN = 0xaf, - VKD3D_SM5_OP_ATOMIC_UMAX = 0xb0, - VKD3D_SM5_OP_ATOMIC_UMIN = 0xb1, - VKD3D_SM5_OP_IMM_ATOMIC_ALLOC = 0xb2, - VKD3D_SM5_OP_IMM_ATOMIC_CONSUME = 0xb3, - VKD3D_SM5_OP_IMM_ATOMIC_IADD = 0xb4, - VKD3D_SM5_OP_IMM_ATOMIC_AND = 0xb5, - VKD3D_SM5_OP_IMM_ATOMIC_OR = 0xb6, - VKD3D_SM5_OP_IMM_ATOMIC_XOR = 0xb7, - VKD3D_SM5_OP_IMM_ATOMIC_EXCH = 0xb8, - VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH = 0xb9, - VKD3D_SM5_OP_IMM_ATOMIC_IMAX = 0xba, - VKD3D_SM5_OP_IMM_ATOMIC_IMIN = 0xbb, - VKD3D_SM5_OP_IMM_ATOMIC_UMAX = 0xbc, - VKD3D_SM5_OP_IMM_ATOMIC_UMIN = 0xbd, - VKD3D_SM5_OP_SYNC = 0xbe, - VKD3D_SM5_OP_DADD = 0xbf, - VKD3D_SM5_OP_DMAX = 0xc0, - VKD3D_SM5_OP_DMIN = 0xc1, - VKD3D_SM5_OP_DMUL = 0xc2, - VKD3D_SM5_OP_DEQ = 0xc3, - VKD3D_SM5_OP_DGE = 0xc4, - VKD3D_SM5_OP_DLT = 0xc5, - VKD3D_SM5_OP_DNE = 0xc6, - VKD3D_SM5_OP_DMOV = 0xc7, - VKD3D_SM5_OP_DMOVC = 0xc8, - VKD3D_SM5_OP_DTOF = 0xc9, - VKD3D_SM5_OP_FTOD = 0xca, - VKD3D_SM5_OP_EVAL_SAMPLE_INDEX = 0xcc, - VKD3D_SM5_OP_EVAL_CENTROID = 0xcd, - VKD3D_SM5_OP_DCL_GS_INSTANCES = 0xce, - VKD3D_SM5_OP_DDIV = 0xd2, - VKD3D_SM5_OP_DFMA = 0xd3, - VKD3D_SM5_OP_DRCP = 0xd4, - VKD3D_SM5_OP_MSAD = 0xd5, - VKD3D_SM5_OP_DTOI = 0xd6, - VKD3D_SM5_OP_DTOU = 0xd7, - VKD3D_SM5_OP_ITOD = 0xd8, - VKD3D_SM5_OP_UTOD = 0xd9, - VKD3D_SM5_OP_GATHER4_S = 0xdb, - VKD3D_SM5_OP_GATHER4_C_S = 0xdc, - VKD3D_SM5_OP_GATHER4_PO_S = 0xdd, - VKD3D_SM5_OP_GATHER4_PO_C_S = 0xde, - VKD3D_SM5_OP_LD_S = 0xdf, - VKD3D_SM5_OP_LD2DMS_S = 0xe0, - VKD3D_SM5_OP_LD_UAV_TYPED_S = 0xe1, - VKD3D_SM5_OP_LD_RAW_S = 0xe2, - VKD3D_SM5_OP_LD_STRUCTURED_S = 0xe3, - VKD3D_SM5_OP_SAMPLE_LOD_S = 0xe4, - VKD3D_SM5_OP_SAMPLE_C_LZ_S = 0xe5, - VKD3D_SM5_OP_SAMPLE_CL_S = 0xe6, - VKD3D_SM5_OP_SAMPLE_B_CL_S = 0xe7, - VKD3D_SM5_OP_SAMPLE_GRAD_CL_S = 0xe8, - VKD3D_SM5_OP_SAMPLE_C_CL_S = 0xe9, - VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED = 0xea, -}; - -enum vkd3d_sm4_instruction_modifier -{ - VKD3D_SM4_MODIFIER_AOFFIMMI = 0x1, - VKD3D_SM5_MODIFIER_RESOURCE_TYPE = 0x2, - VKD3D_SM5_MODIFIER_DATA_TYPE = 0x3, -}; - -enum vkd3d_sm4_register_type -{ - VKD3D_SM4_RT_TEMP = 0x00, - VKD3D_SM4_RT_INPUT = 0x01, - VKD3D_SM4_RT_OUTPUT = 0x02, - VKD3D_SM4_RT_INDEXABLE_TEMP = 0x03, - VKD3D_SM4_RT_IMMCONST = 0x04, - VKD3D_SM4_RT_IMMCONST64 = 0x05, - VKD3D_SM4_RT_SAMPLER = 0x06, - VKD3D_SM4_RT_RESOURCE = 0x07, - VKD3D_SM4_RT_CONSTBUFFER = 0x08, - VKD3D_SM4_RT_IMMCONSTBUFFER = 0x09, - VKD3D_SM4_RT_PRIMID = 0x0b, - VKD3D_SM4_RT_DEPTHOUT = 0x0c, - VKD3D_SM4_RT_NULL = 0x0d, - VKD3D_SM4_RT_RASTERIZER = 0x0e, - VKD3D_SM4_RT_OMASK = 0x0f, - VKD3D_SM5_RT_STREAM = 0x10, - VKD3D_SM5_RT_FUNCTION_BODY = 0x11, - VKD3D_SM5_RT_FUNCTION_POINTER = 0x13, - VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID = 0x16, - VKD3D_SM5_RT_FORK_INSTANCE_ID = 0x17, - VKD3D_SM5_RT_JOIN_INSTANCE_ID = 0x18, - VKD3D_SM5_RT_INPUT_CONTROL_POINT = 0x19, - VKD3D_SM5_RT_OUTPUT_CONTROL_POINT = 0x1a, - VKD3D_SM5_RT_PATCH_CONSTANT_DATA = 0x1b, - VKD3D_SM5_RT_DOMAIN_LOCATION = 0x1c, - VKD3D_SM5_RT_UAV = 0x1e, - VKD3D_SM5_RT_SHARED_MEMORY = 0x1f, - VKD3D_SM5_RT_THREAD_ID = 0x20, - VKD3D_SM5_RT_THREAD_GROUP_ID = 0x21, - VKD3D_SM5_RT_LOCAL_THREAD_ID = 0x22, - VKD3D_SM5_RT_COVERAGE = 0x23, - VKD3D_SM5_RT_LOCAL_THREAD_INDEX = 0x24, - VKD3D_SM5_RT_GS_INSTANCE_ID = 0x25, - VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, - VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, - VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, -}; - -enum vkd3d_sm4_extended_operand_type -{ - VKD3D_SM4_EXTENDED_OPERAND_NONE = 0x0, - VKD3D_SM4_EXTENDED_OPERAND_MODIFIER = 0x1, -}; - -enum vkd3d_sm4_register_modifier -{ - VKD3D_SM4_REGISTER_MODIFIER_NONE = 0x00, - VKD3D_SM4_REGISTER_MODIFIER_NEGATE = 0x01, - VKD3D_SM4_REGISTER_MODIFIER_ABS = 0x02, - VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE = 0x03, -}; - -enum vkd3d_sm4_register_precision -{ - VKD3D_SM4_REGISTER_PRECISION_DEFAULT = 0x0, - VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 = 0x1, - VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 = 0x2, - VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 = 0x4, - VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 = 0x5, -}; - -enum vkd3d_sm4_output_primitive_type -{ - VKD3D_SM4_OUTPUT_PT_POINTLIST = 0x1, - VKD3D_SM4_OUTPUT_PT_LINESTRIP = 0x3, - VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP = 0x5, -}; - -enum vkd3d_sm4_input_primitive_type -{ - VKD3D_SM4_INPUT_PT_POINT = 0x01, - VKD3D_SM4_INPUT_PT_LINE = 0x02, - VKD3D_SM4_INPUT_PT_TRIANGLE = 0x03, - VKD3D_SM4_INPUT_PT_LINEADJ = 0x06, - VKD3D_SM4_INPUT_PT_TRIANGLEADJ = 0x07, - VKD3D_SM5_INPUT_PT_PATCH1 = 0x08, - VKD3D_SM5_INPUT_PT_PATCH2 = 0x09, - VKD3D_SM5_INPUT_PT_PATCH3 = 0x0a, - VKD3D_SM5_INPUT_PT_PATCH4 = 0x0b, - VKD3D_SM5_INPUT_PT_PATCH5 = 0x0c, - VKD3D_SM5_INPUT_PT_PATCH6 = 0x0d, - VKD3D_SM5_INPUT_PT_PATCH7 = 0x0e, - VKD3D_SM5_INPUT_PT_PATCH8 = 0x0f, - VKD3D_SM5_INPUT_PT_PATCH9 = 0x10, - VKD3D_SM5_INPUT_PT_PATCH10 = 0x11, - VKD3D_SM5_INPUT_PT_PATCH11 = 0x12, - VKD3D_SM5_INPUT_PT_PATCH12 = 0x13, - VKD3D_SM5_INPUT_PT_PATCH13 = 0x14, - VKD3D_SM5_INPUT_PT_PATCH14 = 0x15, - VKD3D_SM5_INPUT_PT_PATCH15 = 0x16, - VKD3D_SM5_INPUT_PT_PATCH16 = 0x17, - VKD3D_SM5_INPUT_PT_PATCH17 = 0x18, - VKD3D_SM5_INPUT_PT_PATCH18 = 0x19, - VKD3D_SM5_INPUT_PT_PATCH19 = 0x1a, - VKD3D_SM5_INPUT_PT_PATCH20 = 0x1b, - VKD3D_SM5_INPUT_PT_PATCH21 = 0x1c, - VKD3D_SM5_INPUT_PT_PATCH22 = 0x1d, - VKD3D_SM5_INPUT_PT_PATCH23 = 0x1e, - VKD3D_SM5_INPUT_PT_PATCH24 = 0x1f, - VKD3D_SM5_INPUT_PT_PATCH25 = 0x20, - VKD3D_SM5_INPUT_PT_PATCH26 = 0x21, - VKD3D_SM5_INPUT_PT_PATCH27 = 0x22, - VKD3D_SM5_INPUT_PT_PATCH28 = 0x23, - VKD3D_SM5_INPUT_PT_PATCH29 = 0x24, - VKD3D_SM5_INPUT_PT_PATCH30 = 0x25, - VKD3D_SM5_INPUT_PT_PATCH31 = 0x26, - VKD3D_SM5_INPUT_PT_PATCH32 = 0x27, -}; - -enum vkd3d_sm4_swizzle_type -{ - VKD3D_SM4_SWIZZLE_NONE = 0x0, - VKD3D_SM4_SWIZZLE_VEC4 = 0x1, - VKD3D_SM4_SWIZZLE_SCALAR = 0x2, -}; - -enum vkd3d_sm4_dimension -{ - VKD3D_SM4_DIMENSION_NONE = 0x0, - VKD3D_SM4_DIMENSION_SCALAR = 0x1, - VKD3D_SM4_DIMENSION_VEC4 = 0x2, -}; - -enum vkd3d_sm4_resource_type -{ - VKD3D_SM4_RESOURCE_BUFFER = 0x1, - VKD3D_SM4_RESOURCE_TEXTURE_1D = 0x2, - VKD3D_SM4_RESOURCE_TEXTURE_2D = 0x3, - VKD3D_SM4_RESOURCE_TEXTURE_2DMS = 0x4, - VKD3D_SM4_RESOURCE_TEXTURE_3D = 0x5, - VKD3D_SM4_RESOURCE_TEXTURE_CUBE = 0x6, - VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY = 0x7, - VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY = 0x8, - VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY = 0x9, - VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY = 0xa, - VKD3D_SM4_RESOURCE_RAW_BUFFER = 0xb, - VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER = 0xc, -}; - -enum vkd3d_sm4_data_type -{ - VKD3D_SM4_DATA_UNORM = 0x1, - VKD3D_SM4_DATA_SNORM = 0x2, - VKD3D_SM4_DATA_INT = 0x3, - VKD3D_SM4_DATA_UINT = 0x4, - VKD3D_SM4_DATA_FLOAT = 0x5, - VKD3D_SM4_DATA_MIXED = 0x6, - VKD3D_SM4_DATA_DOUBLE = 0x7, - VKD3D_SM4_DATA_CONTINUED = 0x8, - VKD3D_SM4_DATA_UNUSED = 0x9, -}; - -enum vkd3d_sm4_sampler_mode -{ - VKD3D_SM4_SAMPLER_DEFAULT = 0x0, - VKD3D_SM4_SAMPLER_COMPARISON = 0x1, -}; - -enum vkd3d_sm4_shader_data_type -{ - VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER = 0x3, - VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4, -}; - -#endif /* __VKD3D_SM4_H */ diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 53e13735937..bfe5272fd29 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -168,7 +168,7 @@ static void vkd3d_spirv_validate(const struct vkd3d_shader_code *spirv,
#endif /* HAVE_SPIRV_TOOLS */
-static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, +enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, unsigned int index) { switch (sysval) @@ -199,14 +199,9 @@ static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enu } }
-static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval(enum vkd3d_shader_sysval_semantic sysval) -{ - return vkd3d_siv_from_sysval_indexed(sysval, 0); -} - #define VKD3D_SPIRV_VERSION 0x00010000 #define VKD3D_SPIRV_GENERATOR_ID 18 -#define VKD3D_SPIRV_GENERATOR_VERSION 7 +#define VKD3D_SPIRV_GENERATOR_VERSION 8 #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID)
struct vkd3d_spirv_stream @@ -1967,11 +1962,9 @@ struct vkd3d_symbol_register_data uint32_t member_idx; enum vkd3d_shader_component_type component_type; unsigned int write_mask; - uint32_t dcl_mask; unsigned int structure_stride; unsigned int binding_base_idx; bool is_aggregate; /* An aggregate, i.e. a structure or an array. */ - bool is_dynamically_indexed; /* If member_idx is a variable ID instead of a constant. */ };
struct vkd3d_symbol_resource_data @@ -2064,10 +2057,14 @@ static void vkd3d_symbol_make_register(struct vkd3d_symbol *symbol, symbol->type = VKD3D_SYMBOL_REGISTER; memset(&symbol->key, 0, sizeof(symbol->key)); symbol->key.reg.type = reg->type; - if (vkd3d_shader_register_is_input(reg) && reg->idx[1].offset != ~0u) - symbol->key.reg.idx = reg->idx[1].offset; + if (vkd3d_shader_register_is_input(reg) || vkd3d_shader_register_is_output(reg) + || vkd3d_shader_register_is_patch_constant(reg)) + { + symbol->key.reg.idx = reg->idx_count ? reg->idx[reg->idx_count - 1].offset : ~0u; + assert(!reg->idx_count || symbol->key.reg.idx != ~0u); + } else if (reg->type != VKD3DSPR_IMMCONSTBUFFER) - symbol->key.reg.idx = reg->idx[0].offset; + symbol->key.reg.idx = reg->idx_count ? reg->idx[0].offset : ~0u; }
static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, @@ -2080,11 +2077,9 @@ static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, symbol->info.reg.member_idx = 0; symbol->info.reg.component_type = component_type; symbol->info.reg.write_mask = write_mask; - symbol->info.reg.dcl_mask = 0; symbol->info.reg.structure_stride = 0; symbol->info.reg.binding_base_idx = 0; symbol->info.reg.is_aggregate = false; - symbol->info.reg.is_dynamically_indexed = false; }
static void vkd3d_symbol_make_resource(struct vkd3d_symbol *symbol, @@ -2197,11 +2192,7 @@ struct vkd3d_push_constant_buffer_binding
struct vkd3d_shader_phase { - enum vkd3d_shader_opcode type; - unsigned int idx; - unsigned int instance_count; uint32_t function_id; - uint32_t instance_id; size_t function_location; };
@@ -2253,10 +2244,11 @@ struct spirv_compiler struct vkd3d_push_constant_buffer_binding *push_constants; const struct vkd3d_shader_spirv_target_info *spirv_target_info;
+ bool main_block_open; bool after_declarations_section; - const struct vkd3d_shader_signature *input_signature; - const struct vkd3d_shader_signature *output_signature; - const struct vkd3d_shader_signature *patch_constant_signature; + struct shader_signature input_signature; + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; const struct vkd3d_shader_transform_feedback_info *xfb_info; struct vkd3d_shader_output_info { @@ -2276,9 +2268,10 @@ struct spirv_compiler unsigned int output_control_point_count; bool use_vocp;
- unsigned int shader_phase_count; - struct vkd3d_shader_phase *shader_phases; - size_t shader_phases_size; + enum vkd3d_shader_opcode phase; + bool emit_default_control_point_phase; + struct vkd3d_shader_phase control_point_phase; + struct vkd3d_shader_phase patch_constant_phase;
uint32_t current_spec_constant_id; unsigned int spec_constant_count; @@ -2290,9 +2283,19 @@ struct spirv_compiler struct vkd3d_string_buffer_cache string_buffers; };
-static bool is_control_point_phase(const struct vkd3d_shader_phase *phase) +static bool is_in_default_phase(const struct spirv_compiler *compiler) +{ + return compiler->phase == VKD3DSIH_INVALID; +} + +static bool is_in_control_point_phase(const struct spirv_compiler *compiler) +{ + return compiler->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; +} + +static bool is_in_fork_or_join_phase(const struct spirv_compiler *compiler) { - return phase && phase->type == VKD3DSIH_HS_CONTROL_POINT_PHASE; + return compiler->phase == VKD3DSIH_HS_FORK_PHASE || compiler->phase == VKD3DSIH_HS_JOIN_PHASE; }
static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler); @@ -2304,13 +2307,37 @@ static const char *spirv_compiler_get_entry_point_name(const struct spirv_compil return info && info->entry_point ? info->entry_point : "main"; }
-struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, - const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, +static void spirv_compiler_destroy(struct spirv_compiler *compiler) +{ + vkd3d_free(compiler->control_flow_info); + + vkd3d_free(compiler->output_info); + + vkd3d_free(compiler->push_constants); + vkd3d_free(compiler->descriptor_offset_ids); + + vkd3d_spirv_builder_free(&compiler->spirv_builder); + + rb_destroy(&compiler->symbol_table, vkd3d_symbol_free, NULL); + + vkd3d_free(compiler->spec_constants); + + vkd3d_string_buffer_cache_cleanup(&compiler->string_buffers); + + shader_signature_cleanup(&compiler->input_signature); + shader_signature_cleanup(&compiler->output_signature); + shader_signature_cleanup(&compiler->patch_constant_signature); + + vkd3d_free(compiler); +} + +static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, + struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) { - const struct vkd3d_shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; - const struct vkd3d_shader_signature *output_signature = &shader_desc->output_signature; + const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; + const struct shader_signature *output_signature = &shader_desc->output_signature; const struct vkd3d_shader_interface_info *shader_interface; const struct vkd3d_shader_descriptor_offset_info *offset_info; const struct vkd3d_shader_spirv_target_info *target_info; @@ -2402,9 +2429,12 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *
compiler->shader_type = shader_version->type;
- compiler->input_signature = &shader_desc->input_signature; - compiler->output_signature = &shader_desc->output_signature; - compiler->patch_constant_signature = &shader_desc->patch_constant_signature; + compiler->input_signature = shader_desc->input_signature; + compiler->output_signature = shader_desc->output_signature; + compiler->patch_constant_signature = shader_desc->patch_constant_signature; + memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); + memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); + memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature));
if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) { @@ -2437,6 +2467,8 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *
compiler->scan_descriptor_info = scan_descriptor_info;
+ compiler->phase = VKD3DSIH_INVALID; + vkd3d_string_buffer_cache_init(&compiler->string_buffers);
spirv_compiler_emit_initial_declarations(compiler); @@ -2857,7 +2889,7 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s { unsigned int idx;
- idx = reg->idx[1].offset != ~0u ? reg->idx[1].offset : reg->idx[0].offset; + idx = reg->idx_count ? reg->idx[reg->idx_count - 1].offset : 0; switch (reg->type) { case VKD3DSPR_RESOURCE: @@ -2887,12 +2919,6 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s case VKD3DSPR_DEPTHOUTLE: snprintf(buffer, buffer_size, "oDepth"); break; - case VKD3DSPR_FORKINSTID: - snprintf(buffer, buffer_size, "vForkInstanceId"); - break; - case VKD3DSPR_JOININSTID: - snprintf(buffer, buffer_size, "vJoinInstanceId"); - break; case VKD3DSPR_GSINSTID: snprintf(buffer, buffer_size, "vGSInstanceID"); break; @@ -2965,18 +2991,26 @@ static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler,
static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compiler, struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, - enum vkd3d_shader_component_type component_type, unsigned int component_count, unsigned int array_length) + enum vkd3d_shader_component_type component_type, unsigned int component_count, + const unsigned int *array_lengths, unsigned int length_count) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t type_id, length_id, ptr_type_id; + unsigned int i;
- if (!array_length) + if (!length_count) return spirv_compiler_emit_variable(compiler, stream, storage_class, component_type, component_count);
type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - length_id = spirv_compiler_get_constant_uint(compiler, array_length); - type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); + for (i = 0; i < length_count; ++i) + { + if (!array_lengths[i]) + continue; + length_id = spirv_compiler_get_constant_uint(compiler, array_lengths[i]); + type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); + } + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); } @@ -3169,7 +3203,6 @@ struct vkd3d_shader_register_info unsigned int structure_stride; unsigned int binding_base_idx; bool is_aggregate; - bool is_dynamically_indexed; };
static bool spirv_compiler_get_register_info(const struct spirv_compiler *compiler, @@ -3192,7 +3225,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil register_info->structure_stride = 0; register_info->binding_base_idx = 0; register_info->is_aggregate = false; - register_info->is_dynamically_indexed = false; return true; }
@@ -3214,7 +3246,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil register_info->structure_stride = symbol->info.reg.structure_stride; register_info->binding_base_idx = symbol->info.reg.binding_base_idx; register_info->is_aggregate = symbol->info.reg.is_aggregate; - register_info->is_dynamically_indexed = symbol->info.reg.is_dynamically_indexed;
return true; } @@ -3344,41 +3375,22 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp } else if (register_info->is_aggregate) { - if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_INCONTROLPOINT) - { - /* Indices for these are swapped compared to the generated SPIR-V. */ - if (reg->idx[1].offset != ~0u) - indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[1]); - if (reg->idx[0].offset != ~0u) - indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); - } - else - { - struct vkd3d_shader_register_index reg_idx = reg->idx[0]; - - if (reg->idx[1].rel_addr) - FIXME("Relative addressing not implemented.\n"); - - if (register_info->is_dynamically_indexed) - { - indexes[index_count++] = vkd3d_spirv_build_op_load(builder, - vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, 1), - register_info->member_idx, SpvMemoryAccessMaskNone); - } - else - { - reg_idx.offset = register_info->member_idx; - indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®_idx); - } - } + /* Indices for these are swapped compared to the generated SPIR-V. */ + if (reg->idx_count > 2) + indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[1]); + if (reg->idx_count > 1) + indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); + if (!index_count) + /* A register sysval which is an array in SPIR-V, e.g. SAMPLEMASK. */ + indexes[index_count++] = spirv_compiler_get_constant_uint(compiler, 0); } else { - if (reg->idx[1].rel_addr || (reg->idx[1].offset == ~0u && reg->idx[0].rel_addr)) + if (reg->idx_count && reg->idx[reg->idx_count - 1].rel_addr) FIXME("Relative addressing not implemented.\n");
/* Handle arrayed registers, e.g. v[3][0]. */ - if (reg->idx[1].offset != ~0u && !register_is_descriptor(reg)) + if (reg->idx_count > 1 && !register_is_descriptor(reg)) indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); }
@@ -4249,35 +4261,12 @@ static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct sp if ((builtin = get_spirv_builtin_for_register(reg_type))) return builtin;
- if (sysval != VKD3D_SIV_NONE || (reg_type != VKD3DSPR_OUTPUT && reg_type != VKD3DSPR_COLOROUT)) + if (sysval != VKD3D_SIV_NONE || (reg_type != VKD3DSPR_OUTPUT && reg_type != VKD3DSPR_COLOROUT + && reg_type != VKD3DSPR_PATCHCONST)) FIXME("Unhandled builtin (register type %#x, sysval %#x).\n", reg_type, sysval); return NULL; }
-static const struct vkd3d_shader_signature_element *vkd3d_find_signature_element_for_reg( - const struct vkd3d_shader_signature *signature, unsigned int *signature_element_index, - unsigned int reg_idx, DWORD write_mask) -{ - unsigned int signature_idx; - - for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) - { - if (signature->elements[signature_idx].register_index == reg_idx - && (signature->elements[signature_idx].mask & write_mask) == write_mask) - { - if (signature_element_index) - *signature_element_index = signature_idx; - return &signature->elements[signature_idx]; - } - } - - FIXME("Could not find shader signature element (register %u, write mask %#x).\n", - reg_idx, write_mask); - if (signature_element_index) - *signature_element_index = ~0u; - return NULL; -} - static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler) { struct vkd3d_shader_register r; @@ -4288,6 +4277,7 @@ static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler r.type = VKD3DSPR_OUTPOINTID; r.idx[0].offset = ~0u; r.idx[1].offset = ~0u; + r.idx_count = 0; return spirv_compiler_get_register_id(compiler, &r); }
@@ -4302,7 +4292,7 @@ static uint32_t spirv_compiler_emit_load_invocation_id(struct spirv_compiler *co }
static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compiler, - uint32_t id, const struct vkd3d_shader_phase *phase, const char *suffix) + uint32_t id, const char *suffix) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const char *name; @@ -4310,7 +4300,7 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile if (!suffix) suffix = "";
- switch (phase->type) + switch (compiler->phase) { case VKD3DSIH_HS_CONTROL_POINT_PHASE: name = "control"; @@ -4322,62 +4312,23 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile name = "join"; break; default: - ERR("Invalid phase type %#x.\n", phase->type); + ERR("Invalid phase type %#x.\n", compiler->phase); return; } - vkd3d_spirv_build_op_name(builder, id, "%s%u%s", name, phase->idx, suffix); -} - -static void spirv_compiler_begin_shader_phase(struct spirv_compiler *compiler, - struct vkd3d_shader_phase *phase) -{ - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t void_id, function_type_id; - unsigned int param_count; - uint32_t param_type_id; - - if (phase->instance_count) - { - param_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - param_count = 1; - } - else - { - param_count = 0; - } - - phase->function_id = vkd3d_spirv_alloc_id(builder); - - void_id = vkd3d_spirv_get_op_type_void(builder); - function_type_id = vkd3d_spirv_get_op_type_function(builder, void_id, ¶m_type_id, param_count); - vkd3d_spirv_build_op_function(builder, void_id, phase->function_id, - SpvFunctionControlMaskNone, function_type_id); - - if (phase->instance_count) - phase->instance_id = vkd3d_spirv_build_op_function_parameter(builder, param_type_id); - - vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); - phase->function_location = vkd3d_spirv_stream_current_location(&builder->function_stream); - - spirv_compiler_emit_shader_phase_name(compiler, phase->function_id, phase, NULL); + vkd3d_spirv_build_op_name(builder, id, "%s%s", name, suffix); }
static const struct vkd3d_shader_phase *spirv_compiler_get_current_shader_phase( struct spirv_compiler *compiler) { - struct vkd3d_shader_phase *phase; - - if (!compiler->shader_phase_count) + if (is_in_default_phase(compiler)) return NULL;
- phase = &compiler->shader_phases[compiler->shader_phase_count - 1]; - if (!phase->function_id) - spirv_compiler_begin_shader_phase(compiler, phase); - return phase; + return is_in_control_point_phase(compiler) ? &compiler->control_point_phase : &compiler->patch_constant_phase; }
static void spirv_compiler_decorate_xfb_output(struct spirv_compiler *compiler, - uint32_t id, unsigned int component_count, const struct vkd3d_shader_signature_element *signature_element) + uint32_t id, unsigned int component_count, const struct signature_element *signature_element) { const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; const struct vkd3d_shader_transform_feedback_element *xfb_element; @@ -4436,17 +4387,21 @@ static void spirv_compiler_decorate_xfb_output(struct spirv_compiler *compiler, vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationOffset, offset); }
-static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *compiler, - const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, unsigned int array_size) +static uint32_t spirv_compiler_emit_builtin_variable_v(struct spirv_compiler *compiler, + const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, const unsigned int *array_sizes, + unsigned int size_count) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int sizes[2]; uint32_t id;
- array_size = max(array_size, builtin->spirv_array_size); + assert(size_count <= ARRAY_SIZE(sizes)); + memcpy(sizes, array_sizes, size_count * sizeof(sizes[0])); + array_sizes = sizes; + sizes[0] = max(sizes[0], builtin->spirv_array_size);
- id = spirv_compiler_emit_array_variable(compiler, - &builder->global_stream, storage_class, - builtin->component_type, builtin->component_count, array_size); + id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, storage_class, + builtin->component_type, builtin->component_count, array_sizes, size_count); vkd3d_spirv_add_iface_variable(builder, id); spirv_compiler_decorate_builtin(compiler, id, builtin->spirv_builtin);
@@ -4458,54 +4413,45 @@ static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *comp return id; }
-static bool needs_private_io_variable(const struct vkd3d_shader_signature *signature, - unsigned int reg_idx, const struct vkd3d_spirv_builtin *builtin, - unsigned int *component_count, unsigned int *out_write_mask) +static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *compiler, + const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, unsigned int array_size) { - unsigned int write_mask = 0; - bool have_sysval = false; - unsigned int i, count; - - /* Always use private variables for arrayed builtins. These are generally - * scalars on the D3D side, so would need extra array indices when - * accessing them. It may be feasible to insert those indices at the point - * where the builtins are used, but it's not clear it's worth the effort. */ - if (builtin && (builtin->spirv_array_size || builtin->fixup_pfn)) - return true; - - if (*component_count == VKD3D_VEC4_SIZE) - return false; - - for (i = 0, count = 0; i < signature->element_count; ++i) - { - const struct vkd3d_shader_signature_element *current = &signature->elements[i]; + return spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, &array_size, 1); +}
- if (current->register_index != reg_idx) - continue; +static bool needs_private_io_variable(const struct vkd3d_spirv_builtin *builtin) +{ + return builtin && builtin->fixup_pfn; +}
- write_mask |= current->mask; - ++count; +static unsigned int shader_signature_next_location(const struct shader_signature *signature) +{ + unsigned int i, max_row;
- if (current->sysval_semantic) - have_sysval = true; - } + if (!signature) + return 0;
- if (count == 1) - return false; + for (i = 0, max_row = 0; i < signature->element_count; ++i) + max_row = max(max_row, signature->elements[i].register_index + signature->elements[i].register_count); + return max_row; +}
- if (builtin || have_sysval) - return true; +static unsigned int shader_register_get_io_indices(const struct vkd3d_shader_register *reg, + unsigned int *array_sizes) +{ + unsigned int i, element_idx;
- if (!vkd3d_bitmask_is_contiguous(write_mask)) + array_sizes[0] = 0; + array_sizes[1] = 0; + element_idx = reg->idx[0].offset; + for (i = 1; i < reg->idx_count; ++i) { - FIXME("Write mask %#x is non-contiguous.\n", write_mask); - return true; + array_sizes[1] = array_sizes[0]; + array_sizes[0] = element_idx; + element_idx = reg->idx[i].offset; }
- assert(vkd3d_write_mask_component_count(write_mask) >= *component_count); - *component_count = vkd3d_write_mask_component_count(write_mask); - *out_write_mask = write_mask; - return false; + return element_idx; }
static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, @@ -4513,50 +4459,35 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, enum vkd3d_shader_interpolation_mode interpolation_mode) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_signature_element *signature_element; - const struct vkd3d_shader_signature *shader_signature; const struct vkd3d_shader_register *reg = &dst->reg; unsigned int component_idx, input_component_count; + const struct signature_element *signature_element; + const struct shader_signature *shader_signature; enum vkd3d_shader_component_type component_type; uint32_t type_id, ptr_type_id, float_type_id; const struct vkd3d_spirv_builtin *builtin; + unsigned int write_mask, reg_write_mask; struct vkd3d_symbol *symbol = NULL; uint32_t val_id, input_id, var_id; struct vkd3d_symbol reg_symbol; - struct vkd3d_symbol tmp_symbol; SpvStorageClass storage_class; struct rb_entry *entry = NULL; bool use_private_var = false; - unsigned int write_mask; - unsigned int array_size; - unsigned int reg_idx; + unsigned int array_sizes[2]; + unsigned int element_idx; uint32_t i, index;
- assert(!reg->idx[0].rel_addr); - assert(!reg->idx[1].rel_addr); - - if (reg->idx[1].offset != ~0u) - { - array_size = reg->idx[0].offset; - reg_idx = reg->idx[1].offset; - } - else - { - array_size = 0; - reg_idx = reg->idx[0].offset; - } + assert(!reg->idx_count || !reg->idx[0].rel_addr); + assert(reg->idx_count < 2 || !reg->idx[1].rel_addr);
shader_signature = reg->type == VKD3DSPR_PATCHCONST - ? compiler->patch_constant_signature : compiler->input_signature; + ? &compiler->patch_constant_signature : &compiler->input_signature;
- if (!(signature_element = vkd3d_find_signature_element_for_reg(shader_signature, - NULL, reg_idx, dst->write_mask))) - { - FIXME("No signature element for shader input, ignoring shader input.\n"); - return 0; - } + element_idx = shader_register_get_io_indices(reg, array_sizes); + signature_element = &shader_signature->elements[element_idx];
- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && !sysval && signature_element->sysval_semantic) + if ((compiler->shader_type == VKD3D_SHADER_TYPE_HULL || compiler->shader_type == VKD3D_SHADER_TYPE_GEOMETRY) + && !sysval && signature_element->sysval_semantic) sysval = vkd3d_siv_from_sysval(signature_element->sysval_semantic);
builtin = get_spirv_builtin_for_sysval(compiler, sysval); @@ -4576,12 +4507,16 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, component_idx = vkd3d_write_mask_get_component_idx(signature_element->mask); }
- if (needs_private_io_variable(shader_signature, reg_idx, builtin, &input_component_count, &write_mask) - && (compiler->shader_type != VKD3D_SHADER_TYPE_HULL - || (reg->type != VKD3DSPR_INCONTROLPOINT && reg->type != VKD3DSPR_PATCHCONST))) + if (needs_private_io_variable(builtin)) + { use_private_var = true; + reg_write_mask = write_mask; + } else + { component_idx = vkd3d_write_mask_get_component_idx(write_mask); + reg_write_mask = write_mask >> component_idx; + }
storage_class = SpvStorageClassInput;
@@ -4589,111 +4524,68 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler,
if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) { + /* Except for vicp there should be one declaration per signature element. Sources of + * duplicate declarations are: a single register split into multiple declarations having + * different components, which should have been merged, and declarations in one phase + * being repeated in another (i.e. vcp/vocp), which should have been deleted. */ + if (reg->type != VKD3DSPR_INPUT || !is_in_fork_or_join_phase(compiler)) + FIXME("Duplicate input definition found.\n"); symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - input_id = symbol->id; - } - else if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL - && (reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST)) - { - /* Input/output registers from one phase can be used as inputs in - * subsequent phases. Specifically: - * - * - Control phase inputs are available as "vicp" in fork and join - * phases. - * - Control phase outputs are available as "vocp" in fork and join - * phases. - * - Fork phase patch constants are available as "vpc" in join - * phases. - * - * We handle "vicp" and "vpc" here by creating aliases to the shader's - * global inputs and outputs. We handle "vocp" in - * spirv_compiler_leave_shader_phase(). */ - - tmp_symbol = reg_symbol; - if (reg->type == VKD3DSPR_PATCHCONST) - tmp_symbol.key.reg.type = VKD3DSPR_OUTPUT; - else - tmp_symbol.key.reg.type = VKD3DSPR_INPUT; - - if ((entry = rb_get(&compiler->symbol_table, &tmp_symbol))) - { - symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - tmp_symbol = *symbol; - tmp_symbol.key.reg.type = reg->type; - spirv_compiler_put_symbol(compiler, &tmp_symbol); - - input_id = symbol->id; - } - else - { - if (reg->type == VKD3DSPR_PATCHCONST) - ERR("Patch constant register %u was not declared in a previous phase.\n", reg_idx); - else - ERR("Input control point register %u was not declared in a previous phase.\n", reg_idx); - } + return symbol->id; }
- if (!symbol || ~symbol->info.reg.dcl_mask & write_mask) + if (builtin) { - if (builtin) - { - input_id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, array_size); - if (reg->type == VKD3DSPR_PATCHCONST) - vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); - } - else - { - unsigned int location = reg_idx; - - input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - storage_class, component_type, input_component_count, array_size); - vkd3d_spirv_add_iface_variable(builder, input_id); - if (reg->type == VKD3DSPR_PATCHCONST) - { - vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); - location += compiler->input_signature->element_count; - } - vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationLocation, location); - if (component_idx) - vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationComponent, component_idx); - - spirv_compiler_emit_interpolation_decorations(compiler, input_id, interpolation_mode); - } + input_id = spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, array_sizes, 2); + if (reg->type == VKD3DSPR_PATCHCONST) + vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); } - - if (!symbol) + else { - var_id = input_id; - if (use_private_var) + unsigned int location = signature_element->register_index; + + input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, component_type, input_component_count, array_sizes, 2); + vkd3d_spirv_add_iface_variable(builder, input_id); + if (reg->type == VKD3DSPR_PATCHCONST) { - storage_class = SpvStorageClassPrivate; - var_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, array_size); + vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); + location += shader_signature_next_location(&compiler->input_signature); } + vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationLocation, location); + if (component_idx) + vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationComponent, component_idx);
- vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, - use_private_var ? VKD3DSP_WRITEMASK_ALL : write_mask); - reg_symbol.info.reg.dcl_mask |= write_mask; - spirv_compiler_put_symbol(compiler, ®_symbol); - - spirv_compiler_emit_register_debug_name(builder, var_id, reg); + spirv_compiler_emit_interpolation_decorations(compiler, input_id, interpolation_mode); } - else + + var_id = input_id; + if (use_private_var) { - symbol->info.reg.dcl_mask |= write_mask; + storage_class = SpvStorageClassPrivate; + var_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, array_sizes, 2); }
+ vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, + use_private_var ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); + reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; + assert(!builtin || !builtin->spirv_array_size || use_private_var || array_sizes[0] || array_sizes[1]); + spirv_compiler_put_symbol(compiler, ®_symbol); + + spirv_compiler_emit_register_debug_name(builder, var_id, reg); + if (use_private_var) { type_id = vkd3d_spirv_get_type_id(builder, component_type, input_component_count); - for (i = 0; i < max(array_size, 1); ++i) + for (i = 0; i < max(array_sizes[0], 1); ++i) { struct vkd3d_shader_register dst_reg = *reg; dst_reg.data_type = VKD3D_DATA_FLOAT;
val_id = input_id; - if (array_size) + if (array_sizes[0]) { ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); index = spirv_compiler_get_constant_uint(compiler, i); @@ -4708,7 +4600,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); index = spirv_compiler_get_constant_uint(compiler, builtin->member_idx); val_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, ptr_type_id, input_id, index); - dst_reg.idx[0].offset = reg_idx + i; + dst_reg.idx[0].offset = element_idx + i; } val_id = vkd3d_spirv_build_op_load(builder, type_id, val_id, SpvMemoryAccessMaskNone);
@@ -4743,9 +4635,8 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, uint32_t write_mask; uint32_t input_id;
- assert(!reg->idx[0].rel_addr); - assert(!reg->idx[1].rel_addr); - assert(reg->idx[1].offset == ~0u); + assert(!reg->idx_count || !reg->idx[0].rel_addr); + assert(reg->idx_count < 2);
if (!(builtin = get_spirv_builtin_for_register(reg->type))) { @@ -4763,19 +4654,15 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); vkd3d_symbol_set_register_info(®_symbol, input_id, SpvStorageClassInput, builtin->component_type, write_mask); - reg_symbol.info.reg.dcl_mask = write_mask; reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; spirv_compiler_put_symbol(compiler, ®_symbol); spirv_compiler_emit_register_debug_name(builder, input_id, reg); }
static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compiler, - const struct vkd3d_shader_phase *phase, const struct vkd3d_shader_dst_param *dst) + const struct vkd3d_shader_dst_param *dst) { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_register *reg = &dst->reg; - struct vkd3d_symbol reg_symbol; - uint32_t val_id;
switch (reg->type) { @@ -4787,10 +4674,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil case VKD3DSPR_PRIMID: spirv_compiler_emit_input_register(compiler, dst); return; - case VKD3DSPR_FORKINSTID: - case VKD3DSPR_JOININSTID: - val_id = phase->instance_id; - break; case VKD3DSPR_OUTPOINTID: /* Emitted in spirv_compiler_emit_initial_declarations(). */ case VKD3DSPR_OUTCONTROLPOINT: /* See spirv_compiler_leave_shader_phase(). */ return; @@ -4798,22 +4681,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil FIXME("Unhandled shader phase input register %#x.\n", reg->type); return; } - - vkd3d_symbol_make_register(®_symbol, reg); - vkd3d_symbol_set_register_info(®_symbol, val_id, - SpvStorageClassMax /* Intermediate value */, - VKD3D_SHADER_COMPONENT_UINT, VKD3DSP_WRITEMASK_0); - spirv_compiler_put_symbol(compiler, ®_symbol); - spirv_compiler_emit_register_debug_name(builder, val_id, reg); -} - -static unsigned int spirv_compiler_get_output_variable_index( - struct spirv_compiler *compiler, unsigned int register_idx) -{ - if (register_idx == ~0u) /* oDepth */ - return ARRAY_SIZE(compiler->private_output_variable) - 1; - assert(register_idx < ARRAY_SIZE(compiler->private_output_variable) - 1); - return register_idx; }
static unsigned int get_shader_output_swizzle(const struct spirv_compiler *compiler, @@ -4835,8 +4702,7 @@ static bool is_dual_source_blending(const struct spirv_compiler *compiler) return compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL && info && info->dual_source_blending; }
-static void calculate_clip_or_cull_distance_mask(const struct vkd3d_shader_signature_element *e, - uint32_t *mask) +static void calculate_clip_or_cull_distance_mask(const struct signature_element *e, uint32_t *mask) { if (e->semantic_index >= sizeof(*mask) * CHAR_BIT / VKD3D_VEC4_SIZE) { @@ -4847,38 +4713,10 @@ static void calculate_clip_or_cull_distance_mask(const struct vkd3d_shader_signa *mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); }
-static uint32_t calculate_sysval_array_mask(struct spirv_compiler *compiler, - const struct vkd3d_shader_signature *signature, enum vkd3d_shader_input_sysval_semantic sysval) -{ - const struct vkd3d_shader_signature_element *e; - const struct vkd3d_spirv_builtin *sig_builtin; - const struct vkd3d_spirv_builtin *builtin; - uint32_t signature_idx, mask = 0; - - if (!(builtin = get_spirv_builtin_for_sysval(compiler, sysval))) - { - FIXME("Unhandled sysval %#x.\n", sysval); - return 0; - } - - for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) - { - e = &signature->elements[signature_idx]; - - sig_builtin = get_spirv_builtin_for_sysval(compiler, - vkd3d_siv_from_sysval_indexed(e->sysval_semantic, e->semantic_index)); - - if (sig_builtin && sig_builtin->spirv_builtin == builtin->spirv_builtin) - mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * sig_builtin->member_idx); - } - - return mask; -} - /* Emits arrayed SPIR-V built-in variables. */ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *compiler) { - const struct vkd3d_shader_signature *output_signature = compiler->output_signature; + const struct shader_signature *output_signature = &compiler->output_signature; uint32_t clip_distance_mask = 0, clip_distance_id = 0; uint32_t cull_distance_mask = 0, cull_distance_id = 0; const struct vkd3d_spirv_builtin *builtin; @@ -4886,7 +4724,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *
for (i = 0; i < output_signature->element_count; ++i) { - const struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; + const struct signature_element *e = &output_signature->elements[i];
switch (e->sysval_semantic) { @@ -4921,7 +4759,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *
for (i = 0; i < output_signature->element_count; ++i) { - const struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; + const struct signature_element *e = &output_signature->elements[i];
switch (e->sysval_semantic) { @@ -4953,9 +4791,8 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, uint32_t write_mask; uint32_t output_id;
- assert(!reg->idx[0].rel_addr); - assert(!reg->idx[1].rel_addr); - assert(reg->idx[1].offset == ~0u); + assert(!reg->idx_count || !reg->idx[0].rel_addr); + assert(reg->idx_count < 2);
if (!(builtin = get_spirv_builtin_for_register(reg->type))) { @@ -4969,7 +4806,6 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); vkd3d_symbol_set_register_info(®_symbol, output_id, SpvStorageClassOutput, builtin->component_type, write_mask); - reg_symbol.info.reg.dcl_mask = write_mask; reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; spirv_compiler_put_symbol(compiler, ®_symbol); spirv_compiler_emit_register_execution_mode(compiler, reg); @@ -4977,7 +4813,7 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, }
static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_compiler *compiler, - const struct vkd3d_shader_phase *phase, const struct vkd3d_spirv_builtin *builtin) + const struct vkd3d_spirv_builtin *builtin) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t *variable_id, id; @@ -4993,7 +4829,7 @@ static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_c return *variable_id;
id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassOutput, 0); - if (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE) + if (is_in_fork_or_join_phase(compiler)) vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0);
if (variable_id) @@ -5005,44 +4841,34 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_input_sysval_semantic sysval) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_signature_element *signature_element; - const struct vkd3d_shader_signature *shader_signature; const struct vkd3d_shader_register *reg = &dst->reg; unsigned int component_idx, output_component_count; + const struct signature_element *signature_element; enum vkd3d_shader_component_type component_type; + const struct shader_signature *shader_signature; const struct vkd3d_spirv_builtin *builtin; - const struct vkd3d_shader_phase *phase; - struct vkd3d_symbol *symbol = NULL; + unsigned int write_mask, reg_write_mask; bool use_private_variable = false; struct vkd3d_symbol reg_symbol; SpvStorageClass storage_class; - struct rb_entry *entry = NULL; - unsigned int signature_idx; - unsigned int write_mask; - unsigned int array_size; + unsigned int array_sizes[2]; + unsigned int element_idx; bool is_patch_constant; uint32_t id, var_id;
- phase = spirv_compiler_get_current_shader_phase(compiler); - is_patch_constant = phase && (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE); + is_patch_constant = is_in_fork_or_join_phase(compiler);
- shader_signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; + shader_signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature;
- array_size = is_control_point_phase(phase) ? compiler->output_control_point_count : 0; - - if (!(signature_element = vkd3d_find_signature_element_for_reg(shader_signature, - &signature_idx, reg->idx[0].offset, dst->write_mask))) - { - FIXME("No signature element for shader output, ignoring shader output.\n"); - return; - } + element_idx = shader_register_get_io_indices(reg, array_sizes); + signature_element = &shader_signature->elements[element_idx];
builtin = vkd3d_get_spirv_builtin(compiler, dst->reg.type, sysval);
write_mask = signature_element->mask;
- component_idx = vkd3d_write_mask_get_component_idx(dst->write_mask); - output_component_count = vkd3d_write_mask_component_count(signature_element->mask); + component_idx = vkd3d_write_mask_get_component_idx(write_mask); + output_component_count = vkd3d_write_mask_component_count(write_mask); if (builtin) { component_type = builtin->component_type; @@ -5058,128 +4884,103 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, storage_class = SpvStorageClassOutput;
if (get_shader_output_swizzle(compiler, signature_element->register_index) != VKD3D_SHADER_NO_SWIZZLE - || needs_private_io_variable(shader_signature, signature_element->register_index, - builtin, &output_component_count, &write_mask) - || is_patch_constant) + || (compiler->output_info[element_idx].id && compiler->output_info[element_idx].array_element_mask) + || needs_private_io_variable(builtin)) + { use_private_variable = true; + reg_write_mask = write_mask; + } else + { component_idx = vkd3d_write_mask_get_component_idx(write_mask); + reg_write_mask = write_mask >> component_idx; + }
vkd3d_symbol_make_register(®_symbol, reg);
- if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) + if (rb_get(&compiler->symbol_table, ®_symbol)) { - symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - id = symbol->id; + /* See spirv_compiler_emit_input() for possible causes. */ + FIXME("Duplicate output definition found.\n"); + return; }
- if (!symbol || ~symbol->info.reg.dcl_mask & write_mask) + if (compiler->output_info[element_idx].id) { - if (compiler->output_info[signature_idx].id) - { - id = compiler->output_info[signature_idx].id; - if (compiler->output_info[signature_idx].array_element_mask) - use_private_variable = true; - } - else if (builtin) - { - if (phase) - id = spirv_compiler_emit_shader_phase_builtin_variable(compiler, phase, builtin); - else - id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, array_size); - - if (builtin->spirv_array_size) - compiler->output_info[signature_idx].array_element_mask = - calculate_sysval_array_mask(compiler, shader_signature, sysval); - - spirv_compiler_emit_register_execution_mode(compiler, &dst->reg); - } + id = compiler->output_info[element_idx].id; + } + else if (builtin) + { + if (spirv_compiler_get_current_shader_phase(compiler)) + id = spirv_compiler_emit_shader_phase_builtin_variable(compiler, builtin); else - { - unsigned int location = reg->idx[0].offset; - - if (is_patch_constant) - location += compiler->output_signature->element_count; - - id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - storage_class, component_type, output_component_count, array_size); - vkd3d_spirv_add_iface_variable(builder, id); - - if (is_dual_source_blending(compiler) && reg->idx[0].offset < 2) - { - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, reg->idx[0].offset); - } - else - { - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, location); - } - - if (component_idx) - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationComponent, component_idx); - } + id = spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, array_sizes, 2);
- if (is_patch_constant) - vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); - - spirv_compiler_decorate_xfb_output(compiler, id, output_component_count, signature_element); - - compiler->output_info[signature_idx].id = id; - compiler->output_info[signature_idx].component_type = component_type; + spirv_compiler_emit_register_execution_mode(compiler, &dst->reg); } - - if (!symbol) + else { - var_id = id; - if (use_private_variable) - storage_class = SpvStorageClassPrivate; + unsigned int location = signature_element->register_index; + if (is_patch_constant) - var_id = compiler->hs.patch_constants_id; - else if (use_private_variable) - var_id = spirv_compiler_emit_variable(compiler, &builder->global_stream, - storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + location += shader_signature_next_location(&compiler->output_signature);
- vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, - use_private_variable ? VKD3DSP_WRITEMASK_ALL : write_mask); - reg_symbol.info.reg.is_aggregate = use_private_variable ? is_patch_constant : array_size; - if (!use_private_variable && is_control_point_phase(phase)) + id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, component_type, output_component_count, array_sizes, 2); + vkd3d_spirv_add_iface_variable(builder, id); + + if (is_dual_source_blending(compiler) && signature_element->register_index < 2) { - reg_symbol.info.reg.member_idx = spirv_compiler_get_invocation_id(compiler); - reg_symbol.info.reg.is_dynamically_indexed = true; + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, signature_element->register_index); } - else if (is_patch_constant) + else { - reg_symbol.info.reg.member_idx = reg->idx[0].offset; + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, location); } - reg_symbol.info.reg.dcl_mask = write_mask; - - spirv_compiler_put_symbol(compiler, ®_symbol);
- if (!is_patch_constant) - spirv_compiler_emit_register_debug_name(builder, var_id, reg); + if (component_idx) + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationComponent, component_idx); } - else + + if (is_patch_constant) + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); + + spirv_compiler_decorate_xfb_output(compiler, id, output_component_count, signature_element); + + compiler->output_info[element_idx].id = id; + compiler->output_info[element_idx].component_type = component_type; + + var_id = id; + if (use_private_variable) { - symbol->info.reg.dcl_mask |= write_mask; - var_id = symbol->id; + storage_class = SpvStorageClassPrivate; + var_id = spirv_compiler_emit_variable(compiler, &builder->global_stream, + storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); }
+ vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, + use_private_variable ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); + reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; + assert(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); + + spirv_compiler_put_symbol(compiler, ®_symbol); + + if (!is_patch_constant) + spirv_compiler_emit_register_debug_name(builder, var_id, reg); + if (use_private_variable) { - unsigned int idx = spirv_compiler_get_output_variable_index(compiler, reg->idx[0].offset); - compiler->private_output_variable[idx] = var_id; - compiler->private_output_variable_write_mask[idx] |= dst->write_mask; - if (is_patch_constant) - compiler->private_output_variable_array_idx[idx] = spirv_compiler_get_constant_uint( - compiler, reg->idx[0].offset); + compiler->private_output_variable[element_idx] = var_id; + compiler->private_output_variable_write_mask[element_idx] |= reg_write_mask; if (!compiler->epilogue_function_id) compiler->epilogue_function_id = vkd3d_spirv_alloc_id(builder); } }
static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *compiler, - const struct vkd3d_shader_signature_element *e) + const struct signature_element *e) { enum vkd3d_shader_input_sysval_semantic sysval; const struct vkd3d_spirv_builtin *builtin; @@ -5198,14 +4999,14 @@ static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *com }
static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compiler, - const struct vkd3d_shader_signature *signature, const struct vkd3d_shader_signature_element *output, + const struct shader_signature *signature, const struct signature_element *output, const struct vkd3d_shader_output_info *output_info, uint32_t output_index_id, uint32_t val_id, unsigned int write_mask) { unsigned int dst_write_mask, use_mask, uninit_mask, swizzle, mask; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t type_id, zero_id, ptr_type_id, chain_id, object_id; - const struct vkd3d_shader_signature_element *element; + const struct signature_element *element; unsigned int i, index, array_idx; uint32_t output_id;
@@ -5224,6 +5025,9 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi use_mask |= element->used_mask; } } + index = vkd3d_write_mask_get_component_idx(output->mask); + dst_write_mask >>= index; + use_mask >>= index; write_mask &= dst_write_mask;
if (!write_mask) @@ -5294,22 +5098,19 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * uint32_t param_type_id[MAX_REG_OUTPUT + 1], param_id[MAX_REG_OUTPUT + 1] = {0}; uint32_t void_id, type_id, ptr_type_id, function_type_id, function_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_signature *signature; - const struct vkd3d_shader_phase *phase; + const struct shader_signature *signature; uint32_t output_index_id = 0; bool is_patch_constant; unsigned int i, count; - DWORD variable_idx;
STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(param_id)); STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(param_type_id)); STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(compiler->private_output_variable_array_idx)); STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(compiler->private_output_variable_write_mask));
- phase = spirv_compiler_get_current_shader_phase(compiler); - is_patch_constant = phase && (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE); + is_patch_constant = is_in_fork_or_join_phase(compiler);
- signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; + signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature;
function_id = compiler->epilogue_function_id;
@@ -5340,7 +5141,7 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * param_id[i] = vkd3d_spirv_build_op_load(builder, type_id, param_id[i], SpvMemoryAccessMaskNone); }
- if (is_control_point_phase(phase)) + if (is_in_control_point_phase(compiler)) output_index_id = spirv_compiler_emit_load_invocation_id(compiler);
for (i = 0; i < signature->element_count; ++i) @@ -5348,14 +5149,12 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * if (!compiler->output_info[i].id) continue;
- variable_idx = spirv_compiler_get_output_variable_index(compiler, - signature->elements[i].register_index); - if (!param_id[variable_idx]) + if (!param_id[i]) continue;
spirv_compiler_emit_store_shader_output(compiler, signature, &signature->elements[i], &compiler->output_info[i], output_index_id, - param_id[variable_idx], compiler->private_output_variable_write_mask[variable_idx]); + param_id[i], compiler->private_output_variable_write_mask[i]); }
vkd3d_spirv_build_op_return(&compiler->spirv_builder); @@ -5375,28 +5174,11 @@ static void spirv_compiler_emit_hull_shader_builtins(struct spirv_compiler *comp dst.reg.type = VKD3DSPR_OUTPOINTID; dst.reg.idx[0].offset = ~0u; dst.reg.idx[1].offset = ~0u; + dst.reg.idx_count = 0; dst.write_mask = VKD3DSP_WRITEMASK_0; spirv_compiler_emit_input_register(compiler, &dst); }
-static void spirv_compiler_emit_hull_shader_patch_constants(struct spirv_compiler *compiler) -{ - const struct vkd3d_shader_signature *signature = compiler->patch_constant_signature; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t register_count = 0; - unsigned int signature_idx; - - for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) - register_count = max(register_count, signature->elements[signature_idx].register_index + 1); - - if (!register_count) - return; - - compiler->hs.patch_constants_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, register_count); - vkd3d_spirv_build_op_name(builder, compiler->hs.patch_constants_id, "opc"); -} - static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler) { const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; @@ -5410,7 +5192,6 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp case VKD3D_SHADER_TYPE_HULL: vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationControl); spirv_compiler_emit_hull_shader_builtins(compiler); - spirv_compiler_emit_hull_shader_patch_constants(compiler); break; case VKD3D_SHADER_TYPE_DOMAIN: vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationEvaluation); @@ -5439,8 +5220,7 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) { vkd3d_spirv_builder_begin_main_function(builder); - - spirv_compiler_emit_shader_signature_outputs(compiler); + compiler->main_block_open = true; } }
@@ -5522,12 +5302,13 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil reg.type = VKD3DSPR_IDXTEMP; reg.idx[0].offset = temp->register_idx; reg.idx[1].offset = ~0u; + reg.idx_count = 1;
function_location = spirv_compiler_get_current_function_location(compiler); vkd3d_spirv_begin_function_stream_insertion(builder, function_location);
id = spirv_compiler_emit_array_variable(compiler, &builder->function_stream, - SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, temp->register_size); + SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, &temp->register_size, 1);
spirv_compiler_emit_register_debug_name(builder, id, ®);
@@ -6097,6 +5878,7 @@ static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_semantic *semantic = &instruction->declaration.semantic; + enum vkd3d_shader_resource_type resource_type = semantic->resource_type; uint32_t flags = instruction->flags;
/* We don't distinguish between APPEND and COUNTER UAVs. */ @@ -6104,8 +5886,13 @@ static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, if (flags) FIXME("Unhandled UAV flags %#x.\n", flags);
+ if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && semantic->sample_count == 1) + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && semantic->sample_count == 1) + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; + spirv_compiler_emit_resource_declaration(compiler, &semantic->resource, - semantic->resource_type, semantic->resource_data_type[0], 0, false); + resource_type, semantic->resource_data_type[0], 0, false); }
static void spirv_compiler_emit_dcl_resource_raw(struct spirv_compiler *compiler, @@ -6185,10 +5972,9 @@ static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; - const struct vkd3d_shader_phase *phase;
- if ((phase = spirv_compiler_get_current_shader_phase(compiler))) - spirv_compiler_emit_shader_phase_input(compiler, phase, dst); + if (spirv_compiler_get_current_shader_phase(compiler)) + spirv_compiler_emit_shader_phase_input(compiler, dst); else if (vkd3d_shader_register_is_input(&dst->reg) || dst->reg.type == VKD3DSPR_PATCHCONST) spirv_compiler_emit_input(compiler, dst, VKD3D_SIV_NONE, VKD3DSIM_NONE); else @@ -6224,7 +6010,8 @@ static void spirv_compiler_emit_dcl_output(struct spirv_compiler *compiler, { const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst;
- if (vkd3d_shader_register_is_output(&dst->reg)) + if (vkd3d_shader_register_is_output(&dst->reg) + || (is_in_fork_or_join_phase(compiler) && vkd3d_shader_register_is_patch_constant(&dst->reg))) spirv_compiler_emit_output(compiler, dst, VKD3D_SIV_NONE); else spirv_compiler_emit_output_register(compiler, dst); @@ -6242,64 +6029,6 @@ static void spirv_compiler_emit_dcl_output_siv(struct spirv_compiler *compiler, spirv_compiler_emit_output(compiler, dst, sysval); }
-static bool spirv_compiler_check_index_range(struct spirv_compiler *compiler, - const struct vkd3d_shader_index_range *range) -{ - const struct vkd3d_shader_register *reg = &range->dst.reg; - struct vkd3d_shader_register_info reg_info; - struct vkd3d_shader_register current_reg; - struct vkd3d_symbol reg_symbol; - unsigned int i; - uint32_t id; - - current_reg = *reg; - vkd3d_symbol_make_register(®_symbol, ¤t_reg); - if (!spirv_compiler_get_register_info(compiler, ¤t_reg, ®_info)) - { - ERR("Failed to get register info.\n"); - return false; - } - - /* FIXME: We should check if it's an array. */ - if (!reg_info.is_aggregate) - { - FIXME("Unhandled register %#x.\n", reg->type); - return false; - } - id = reg_info.id; - - for (i = reg->idx[0].offset; i < reg->idx[0].offset + range->register_count; ++i) - { - current_reg.idx[0].offset = i; - vkd3d_symbol_make_register(®_symbol, ¤t_reg); - - if (range->dst.write_mask != reg_info.write_mask - || vkd3d_write_mask_component_count(reg_info.write_mask) != 1) - { - FIXME("Unhandled index range write mask %#x (%#x).\n", - range->dst.write_mask, reg_info.write_mask); - return false; - } - - if (reg_info.id != id) - { - FIXME("Unhandled index range %#x, %u.\n", reg->type, i); - return false; - } - } - - return true; -} - -static void spirv_compiler_emit_dcl_index_range(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - const struct vkd3d_shader_index_range *range = &instruction->declaration.index_range; - - if (!spirv_compiler_check_index_range(compiler, range)) - FIXME("Ignoring dcl_index_range %#x %u.\n", range->dst.reg.type, range->register_count); -} - static void spirv_compiler_emit_dcl_stream(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { @@ -6495,157 +6224,83 @@ static void spirv_compiler_emit_dcl_thread_group(struct spirv_compiler *compiler SpvExecutionModeLocalSize, local_size, ARRAY_SIZE(local_size)); }
-static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler, - const struct vkd3d_shader_phase *phase) +static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler); + +static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler) { - const struct vkd3d_shader_signature *signature = compiler->output_signature; + const struct shader_signature *signature = &compiler->output_signature; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - struct vkd3d_symbol reg_symbol, *symbol; - struct vkd3d_shader_register reg; - struct rb_entry *entry; - unsigned int i; + + if (is_in_control_point_phase(compiler) && compiler->emit_default_control_point_phase) + spirv_compiler_emit_default_control_point_phase(compiler);
vkd3d_spirv_build_op_function_end(builder);
compiler->temp_id = 0; compiler->temp_count = 0;
- /* - * vocp inputs in fork and join shader phases are outputs of the control - * point phase. Reinsert symbols for vocp registers while leaving the - * control point phase. - */ - if (is_control_point_phase(phase)) + if (is_in_control_point_phase(compiler)) { if (compiler->epilogue_function_id) { - spirv_compiler_emit_shader_phase_name(compiler, compiler->epilogue_function_id, phase, "_epilogue"); + spirv_compiler_emit_shader_phase_name(compiler, compiler->epilogue_function_id, "_epilogue"); spirv_compiler_emit_shader_epilogue_function(compiler); }
- memset(®, 0, sizeof(reg)); - reg.idx[1].offset = ~0u; - /* Fork and join phases share output registers (patch constants). * Control point phase has separate output registers. */ memset(compiler->output_info, 0, signature->element_count * sizeof(*compiler->output_info)); memset(compiler->private_output_variable, 0, sizeof(compiler->private_output_variable)); memset(compiler->private_output_variable_array_idx, 0, sizeof(compiler->private_output_variable_array_idx)); memset(compiler->private_output_variable_write_mask, 0, sizeof(compiler->private_output_variable_write_mask)); - - for (i = 0; i < signature->element_count; ++i) - { - const struct vkd3d_shader_signature_element *e = &signature->elements[i]; - - reg.type = VKD3DSPR_OUTPUT; - reg.idx[0].offset = e->register_index; - vkd3d_symbol_make_register(®_symbol, ®); - if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) - { - rb_remove(&compiler->symbol_table, entry); - - symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - - reg.type = VKD3DSPR_OUTCONTROLPOINT; - reg.idx[1].offset = reg.idx[0].offset; - reg.idx[0].offset = compiler->output_control_point_count; - vkd3d_symbol_make_register(symbol, ®); - symbol->info.reg.is_aggregate = false; - - if (rb_put(&compiler->symbol_table, symbol, entry) == -1) - { - ERR("Failed to insert vocp symbol entry (%s).\n", debug_vkd3d_symbol(symbol)); - vkd3d_symbol_free(entry, NULL); - } - } - } - } - - if (phase->instance_count) - { - memset(®, 0, sizeof(reg)); - reg.type = phase->type == VKD3DSIH_HS_FORK_PHASE ? VKD3DSPR_FORKINSTID : VKD3DSPR_JOININSTID; - reg.idx[0].offset = ~0u; - reg.idx[1].offset = ~0u; - vkd3d_symbol_make_register(®_symbol, ®); - if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) - { - rb_remove(&compiler->symbol_table, entry); - vkd3d_symbol_free(entry, NULL); - } } }
static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { - const struct vkd3d_shader_phase *previous_phase; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t function_id, void_id, function_type_id; struct vkd3d_shader_phase *phase;
- if ((previous_phase = spirv_compiler_get_current_shader_phase(compiler))) - spirv_compiler_leave_shader_phase(compiler, previous_phase); + assert(compiler->phase != instruction->handler_idx);
- if (!vkd3d_array_reserve((void **)&compiler->shader_phases, &compiler->shader_phases_size, - compiler->shader_phase_count + 1, sizeof(*compiler->shader_phases))) - return; - phase = &compiler->shader_phases[compiler->shader_phase_count]; + if (!is_in_default_phase(compiler)) + spirv_compiler_leave_shader_phase(compiler);
- phase->type = instruction->handler_idx; - phase->idx = compiler->shader_phase_count; - phase->instance_count = 0; - phase->function_id = 0; - phase->instance_id = 0; - phase->function_location = 0; + function_id = vkd3d_spirv_alloc_id(builder);
- ++compiler->shader_phase_count; -} - -static int spirv_compiler_emit_shader_phase_instance_count(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - struct vkd3d_shader_phase *phase = &compiler->shader_phases[compiler->shader_phase_count - 1]; - - if (!compiler->shader_phase_count - || (phase->type != VKD3DSIH_HS_FORK_PHASE && phase->type != VKD3DSIH_HS_JOIN_PHASE) - || phase->function_id) - { - WARN("Unexpected dcl_hs_{fork,join}_phase_instance_count instruction.\n"); - return VKD3D_ERROR_INVALID_SHADER; - } - - phase->instance_count = instruction->declaration.count; - - spirv_compiler_begin_shader_phase(compiler, phase); - - return VKD3D_OK; -} + void_id = vkd3d_spirv_get_op_type_void(builder); + function_type_id = vkd3d_spirv_get_op_type_function(builder, void_id, NULL, 0); + vkd3d_spirv_build_op_function(builder, void_id, function_id, + SpvFunctionControlMaskNone, function_type_id);
-static const struct vkd3d_shader_phase *spirv_compiler_get_control_point_phase( - struct spirv_compiler *compiler) -{ - const struct vkd3d_shader_phase *phase; + vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder));
- if (compiler->shader_phase_count < 1) - return NULL; + compiler->phase = instruction->handler_idx; + spirv_compiler_emit_shader_phase_name(compiler, function_id, NULL);
- phase = &compiler->shader_phases[0]; - if (is_control_point_phase(phase)) - return phase; + phase = (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) + ? &compiler->control_point_phase : &compiler->patch_constant_phase; + phase->function_id = function_id; + phase->function_location = vkd3d_spirv_stream_current_location(&builder->function_stream);
- return NULL; + if (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) + compiler->emit_default_control_point_phase = instruction->flags; }
static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler) { - const struct vkd3d_shader_signature *output_signature = compiler->output_signature; - const struct vkd3d_shader_signature *input_signature = compiler->input_signature; + const struct shader_signature *output_signature = &compiler->output_signature; + const struct shader_signature *input_signature = &compiler->input_signature; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; enum vkd3d_shader_component_type component_type; - uint32_t input_id, output_id, dst_id, src_id; struct vkd3d_shader_src_param invocation; struct vkd3d_shader_register input_reg; uint32_t type_id, output_ptr_type_id; + uint32_t input_id, output_id, dst_id; unsigned int component_count; + unsigned int array_sizes[2]; uint32_t invocation_id; unsigned int i;
@@ -6657,6 +6312,7 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile invocation.reg.idx[0].offset = ~0u; invocation.reg.idx[1].offset = ~0u; invocation.reg.idx[2].offset = ~0u; + invocation.reg.idx_count = 0; invocation.swizzle = VKD3D_SHADER_NO_SWIZZLE;
memset(&input_reg, 0, sizeof(input_reg)); @@ -6664,37 +6320,42 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile input_reg.data_type = VKD3D_DATA_FLOAT; input_reg.idx[0].rel_addr = &invocation; input_reg.idx[2].offset = ~0u; + input_reg.idx_count = 2; input_id = spirv_compiler_get_register_id(compiler, &input_reg);
assert(input_signature->element_count == output_signature->element_count); for (i = 0; i < output_signature->element_count; ++i) { - const struct vkd3d_shader_signature_element *output = &output_signature->elements[i]; - const struct vkd3d_shader_signature_element *input = &input_signature->elements[i]; + const struct signature_element *output = &output_signature->elements[i]; + const struct signature_element *input = &input_signature->elements[i];
assert(input->mask == output->mask); assert(input->component_type == output->component_type);
- input_reg.idx[1].offset = input->register_index; + input_reg.idx[1].offset = i; input_id = spirv_compiler_get_register_id(compiler, &input_reg); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); - src_id = vkd3d_spirv_build_op_load(builder, type_id, input_id, SpvMemoryAccessMaskNone);
component_type = output->component_type; component_count = vkd3d_write_mask_component_count(output->mask); - output_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - SpvStorageClassOutput, component_type, component_count, compiler->output_control_point_count); + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + if ((array_sizes[0] = (input->register_count > 1) ? input->register_count : 0)) + type_id = vkd3d_spirv_get_op_type_array(builder, type_id, spirv_compiler_get_constant_uint(compiler, + array_sizes[0])); + + array_sizes[1] = compiler->output_control_point_count; + output_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, SpvStorageClassOutput, + component_type, component_count, array_sizes, 2); vkd3d_spirv_add_iface_variable(builder, output_id); vkd3d_spirv_build_op_decorate1(builder, output_id, SpvDecorationLocation, output->register_index); vkd3d_spirv_build_op_name(builder, output_id, "vocp%u", output->register_index);
- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); output_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); dst_id = vkd3d_spirv_build_op_access_chain1(builder, output_ptr_type_id, output_id, invocation_id);
- spirv_compiler_emit_store(compiler, dst_id, output->mask, - component_type, SpvStorageClassOutput, VKD3DSP_WRITEMASK_ALL, src_id); + vkd3d_spirv_build_op_copy_memory(builder, dst_id, input_id, SpvMemoryAccessMaskNone); } + + vkd3d_spirv_build_op_return(builder); }
static void spirv_compiler_emit_barrier(struct spirv_compiler *compiler, @@ -6723,95 +6384,6 @@ static void spirv_compiler_emit_hull_shader_barrier(struct spirv_compiler *compi SpvScopeWorkgroup, SpvScopeInvocation, SpvMemorySemanticsMaskNone); }
-static void spirv_compiler_emit_hull_shader_input_initialisation(struct spirv_compiler *compiler) -{ - uint32_t type_id, length_id, register_index_id, src_array_id, dst_array_id, vicp_id, tmp_id; - const struct vkd3d_shader_signature *signature = compiler->input_signature; - uint32_t src_type_id, dst_type_id, src_id, dst_id, point_index_id; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_signature_element *element; - enum vkd3d_shader_input_sysval_semantic sysval; - const struct vkd3d_spirv_builtin *builtin; - struct vkd3d_symbol *symbol, symbol_key; - unsigned int register_count, i, j; - struct vkd3d_shader_register r; - struct rb_entry *entry; - uint32_t indices[2]; - - for (i = 0, register_count = 0; i < signature->element_count; ++i) - { - register_count = max(register_count, signature->elements[i].register_index + 1); - } - - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); - length_id = spirv_compiler_get_constant_uint(compiler, compiler->input_control_point_count); - type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); - type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); - - memset(&r, 0, sizeof(r)); - r.type = VKD3DSPR_INPUT; - r.idx[0].offset = 0; - r.idx[1].offset = ~0u; - vkd3d_symbol_make_register(&symbol_key, &r); - - for (i = 0; i < signature->element_count; ++i) - { - element = &signature->elements[i]; - - symbol_key.key.reg.idx = element->register_index; - entry = rb_get(&compiler->symbol_table, &symbol_key); - symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - - vicp_id = symbol->id; - register_index_id = spirv_compiler_get_constant_uint(compiler, element->register_index); - dst_array_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, type_id, vicp_id, register_index_id); - - if (element->sysval_semantic) - { - sysval = vkd3d_siv_from_sysval(element->sysval_semantic); - builtin = get_spirv_builtin_for_sysval(compiler, sysval); - src_array_id = spirv_compiler_emit_builtin_variable(compiler, builtin, - SpvStorageClassInput, compiler->input_control_point_count); - - if (builtin->component_count == 4) - { - vkd3d_spirv_build_op_copy_memory(builder, dst_array_id, src_array_id, SpvMemoryAccessMaskNone); - } - else - { - tmp_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, builtin->component_count); - src_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, tmp_id); - dst_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, tmp_id); - - for (j = 0; j < compiler->input_control_point_count; ++j) - { - point_index_id = spirv_compiler_get_constant_uint(compiler, j); - src_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, - src_type_id, src_array_id, point_index_id); - - indices[0] = point_index_id; - indices[1] = spirv_compiler_get_constant_uint(compiler, 0); - dst_id = vkd3d_spirv_build_op_in_bounds_access_chain(builder, - dst_type_id, dst_array_id, indices, 2); - - vkd3d_spirv_build_op_copy_memory(builder, dst_id, src_id, SpvMemoryAccessMaskNone); - } - } - } - else - { - src_array_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - SpvStorageClassInput, VKD3D_SHADER_COMPONENT_FLOAT, 4, compiler->input_control_point_count); - vkd3d_spirv_add_iface_variable(builder, src_array_id); - vkd3d_spirv_build_op_decorate1(builder, src_array_id, SpvDecorationLocation, element->register_index); - vkd3d_spirv_build_op_name(builder, src_array_id, "v%u", element->register_index); - - vkd3d_spirv_build_op_copy_memory(builder, dst_array_id, src_array_id, SpvMemoryAccessMaskNone); - } - symbol->info.reg.dcl_mask |= element->mask; - } -} - static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler *compiler) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -6854,46 +6426,21 @@ static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler static void spirv_compiler_emit_hull_shader_main(struct spirv_compiler *compiler) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_phase *control_point_phase, *phase; - uint32_t phase_instance_id; - unsigned int i, j; uint32_t void_id;
vkd3d_spirv_builder_begin_main_function(builder);
- spirv_compiler_emit_hull_shader_input_initialisation(compiler); - void_id = vkd3d_spirv_get_op_type_void(builder);
- if ((control_point_phase = spirv_compiler_get_control_point_phase(compiler))) - vkd3d_spirv_build_op_function_call(builder, void_id, control_point_phase->function_id, NULL, 0); - else - spirv_compiler_emit_default_control_point_phase(compiler); + vkd3d_spirv_build_op_function_call(builder, void_id, compiler->control_point_phase.function_id, NULL, 0);
if (compiler->use_vocp) spirv_compiler_emit_hull_shader_barrier(compiler);
- for (i = 0; i < compiler->shader_phase_count; ++i) - { - phase = &compiler->shader_phases[i]; - if (is_control_point_phase(phase)) - continue; - - if (phase->instance_count) - { - for (j = 0; j < phase->instance_count; ++j) - { - phase_instance_id = spirv_compiler_get_constant_uint(compiler, j); - vkd3d_spirv_build_op_function_call(builder, - void_id, phase->function_id, &phase_instance_id, 1); - } - } - else - { - vkd3d_spirv_build_op_function_call(builder, void_id, phase->function_id, NULL, 0); - } - } - + /* TODO: only call the patch constant function for invocation 0. The simplest way + * is to avoid use of private variables there, otherwise we would need a separate + * patch constant epilogue also only called from invocation 0. */ + vkd3d_spirv_build_op_function_call(builder, void_id, compiler->patch_constant_phase.function_id, NULL, 0); spirv_compiler_emit_shader_epilogue_invocation(compiler); vkd3d_spirv_build_op_return(builder); vkd3d_spirv_build_op_function_end(builder); @@ -7575,10 +7122,10 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co static void spirv_compiler_emit_return(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { - const struct vkd3d_shader_phase *phase = spirv_compiler_get_current_shader_phase(compiler); struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
- if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (!phase || is_control_point_phase(phase))) + if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (is_in_default_phase(compiler) + || is_in_control_point_phase(compiler))) spirv_compiler_emit_shader_epilogue_invocation(compiler);
vkd3d_spirv_build_op_return(builder); @@ -7972,12 +7519,15 @@ static int spirv_compiler_emit_control_flow_instruction(struct spirv_compiler *c
if (cf_info) cf_info->inside_block = false; + else + compiler->main_block_open = false; break;
case VKD3DSIH_RETP: spirv_compiler_emit_retc(compiler, instruction); break;
+ case VKD3DSIH_DISCARD: case VKD3DSIH_TEXKILL: spirv_compiler_emit_kill(compiler, instruction); break; @@ -8256,7 +7806,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, image_operands[image_operand_count++] = spirv_compiler_emit_texel_offset(compiler, instruction, image.resource_type_info); } - if (multisample) + if (multisample && image.resource_type_info->ms) { operands_mask |= SpvImageOperandsSampleMask; image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, @@ -9521,58 +9071,6 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, vkd3d_spirv_build_op_end_primitive(builder); }
-static void spirv_compiler_emit_hull_shader_inputs(struct spirv_compiler *compiler) -{ - const struct vkd3d_shader_signature *signature = compiler->input_signature; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id, length_id, vicp_id, vicp_type_id; - unsigned int register_count, register_idx, i; - struct vkd3d_shader_register r; - struct vkd3d_symbol symbol; - struct rb_entry *entry; - - for (i = 0, register_count = 0; i < signature->element_count; ++i) - { - register_count = max(register_count, signature->elements[i].register_index + 1); - } - - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); - length_id = spirv_compiler_get_constant_uint(compiler, compiler->input_control_point_count); - type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); - length_id = spirv_compiler_get_constant_uint(compiler, register_count); - type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); - vicp_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); - - vicp_id = vkd3d_spirv_build_op_variable(builder, - &builder->global_stream, vicp_type_id, SpvStorageClassPrivate, 0); - vkd3d_spirv_build_op_name(builder, vicp_id, "vicp"); - - memset(&r, 0, sizeof(r)); - r.type = VKD3DSPR_INPUT; - r.idx[0].offset = 0; - r.idx[1].offset = ~0u; - vkd3d_symbol_make_register(&symbol, &r); - - for (i = 0; i < signature->element_count; ++i) - { - register_idx = signature->elements[i].register_index; - - symbol.key.reg.idx = register_idx; - if ((entry = rb_get(&compiler->symbol_table, &symbol))) - { - struct vkd3d_symbol *s = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - s->info.reg.dcl_mask |= signature->elements[i].mask; - continue; - } - - vkd3d_symbol_set_register_info(&symbol, vicp_id, SpvStorageClassPrivate, - VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); - symbol.info.reg.dcl_mask = signature->elements[i].mask; - symbol.info.reg.is_aggregate = true; - spirv_compiler_put_symbol(compiler, &symbol); - } -} - /* This function is called after declarations are processed. */ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) { @@ -9581,8 +9079,6 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) if (compiler->xfb_info && compiler->xfb_info->element_count && compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY) spirv_compiler_emit_point_size(compiler); - if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL) - spirv_compiler_emit_hull_shader_inputs(compiler); }
static bool is_dcl_instruction(enum vkd3d_shader_opcode handler_idx) @@ -9660,9 +9156,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_OUTPUT_SIV: spirv_compiler_emit_dcl_output_siv(compiler, instruction); break; - case VKD3DSIH_DCL_INDEX_RANGE: - spirv_compiler_emit_dcl_index_range(compiler, instruction); - break; case VKD3DSIH_DCL_STREAM: spirv_compiler_emit_dcl_stream(compiler, instruction); break; @@ -9699,10 +9192,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_THREAD_GROUP: spirv_compiler_emit_dcl_thread_group(compiler, instruction); break; - case VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT: - case VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: - ret = spirv_compiler_emit_shader_phase_instance_count(compiler, instruction); - break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: @@ -9826,6 +9315,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_CONTINUE: case VKD3DSIH_CONTINUEP: case VKD3DSIH_DEFAULT: + case VKD3DSIH_DISCARD: case VKD3DSIH_ELSE: case VKD3DSIH_ENDIF: case VKD3DSIH_ENDLOOP: @@ -9947,28 +9437,55 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, return ret; }
-int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, +static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, struct vkd3d_shader_code *spirv) { - const struct vkd3d_shader_instruction_array *instructions = &parser->instructions; const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_phase *phase; + struct vkd3d_shader_instruction_array instructions; enum vkd3d_result result = VKD3D_OK; unsigned int i;
compiler->location.column = 0; - for (i = 0; i < instructions->count; ++i) + compiler->location.line = 1; + + instructions = parser->instructions; + memset(&parser->instructions, 0, sizeof(parser->instructions)); + + if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL + && (result = instruction_array_flatten_hull_shader_phases(&instructions)) >= 0) + { + result = instruction_array_normalise_hull_shader_control_point_io(&instructions, + &compiler->input_signature); + } + if (result >= 0) + result = instruction_array_normalise_io_registers(&instructions, parser->shader_version.type, + &compiler->input_signature, &compiler->output_signature, &compiler->patch_constant_signature); + + if (result >= 0 && TRACE_ON()) + vkd3d_shader_trace(&instructions, &parser->shader_version); + + if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) + spirv_compiler_emit_shader_signature_outputs(compiler); + + for (i = 0; i < instructions.count && result >= 0; ++i) { compiler->location.line = i + 1; - if ((result = spirv_compiler_handle_instruction(compiler, &instructions->elements[i])) < 0) - return result; + result = spirv_compiler_handle_instruction(compiler, &instructions.elements[i]); }
- if ((phase = spirv_compiler_get_current_shader_phase(compiler))) - spirv_compiler_leave_shader_phase(compiler, phase); + shader_instruction_array_destroy(&instructions); + + if (result < 0) + return result; + + if (compiler->main_block_open) + vkd3d_spirv_build_op_return(builder); + + if (!is_in_default_phase(compiler)) + spirv_compiler_leave_shader_phase(compiler); else vkd3d_spirv_build_op_function_end(builder);
@@ -10023,23 +9540,23 @@ int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, return VKD3D_OK; }
-void spirv_compiler_destroy(struct spirv_compiler *compiler) +int spirv_compile(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { - vkd3d_free(compiler->control_flow_info); - - vkd3d_free(compiler->output_info); - - vkd3d_free(compiler->push_constants); - vkd3d_free(compiler->descriptor_offset_ids); - - vkd3d_spirv_builder_free(&compiler->spirv_builder); - - rb_destroy(&compiler->symbol_table, vkd3d_symbol_free, NULL); + struct spirv_compiler *spirv_compiler; + int ret;
- vkd3d_free(compiler->shader_phases); - vkd3d_free(compiler->spec_constants); + if (!(spirv_compiler = spirv_compiler_create(&parser->shader_version, &parser->shader_desc, + compile_info, scan_descriptor_info, message_context, &parser->location))) + { + ERR("Failed to create SPIR-V compiler.\n"); + return VKD3D_ERROR; + }
- vkd3d_string_buffer_cache_cleanup(&compiler->string_buffers); + ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out);
- vkd3d_free(compiler); + spirv_compiler_destroy(spirv_compiler); + return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c new file mode 100644 index 00000000000..d066b13ee4e --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -0,0 +1,5234 @@ +/* + * TPF (Direct3D shader models 4 and 5 bytecode) support + * + * Copyright 2008-2009 Henri Verbeet for CodeWeavers + * Copyright 2010 Rico Schüller + * Copyright 2017 Józef Kucia for CodeWeavers + * Copyright 2019-2020 Zebediah Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "hlsl.h" + +#define SM4_MAX_SRC_COUNT 6 +#define SM4_MAX_DST_COUNT 2 + +STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); + +#define VKD3D_SM4_PS 0x0000u +#define VKD3D_SM4_VS 0x0001u +#define VKD3D_SM4_GS 0x0002u +#define VKD3D_SM5_HS 0x0003u +#define VKD3D_SM5_DS 0x0004u +#define VKD3D_SM5_CS 0x0005u +#define VKD3D_SM4_LIB 0xfff0u + +#define VKD3D_SM4_INSTRUCTION_MODIFIER (0x1u << 31) + +#define VKD3D_SM4_MODIFIER_MASK 0x3fu + +#define VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT 6 +#define VKD3D_SM5_MODIFIER_DATA_TYPE_MASK (0xffffu << VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT) + +#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT 6 +#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT) + +#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT 11 +#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK (0xfffu << VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT) + +#define VKD3D_SM4_AOFFIMMI_U_SHIFT 9 +#define VKD3D_SM4_AOFFIMMI_U_MASK (0xfu << VKD3D_SM4_AOFFIMMI_U_SHIFT) +#define VKD3D_SM4_AOFFIMMI_V_SHIFT 13 +#define VKD3D_SM4_AOFFIMMI_V_MASK (0xfu << VKD3D_SM4_AOFFIMMI_V_SHIFT) +#define VKD3D_SM4_AOFFIMMI_W_SHIFT 17 +#define VKD3D_SM4_AOFFIMMI_W_MASK (0xfu << VKD3D_SM4_AOFFIMMI_W_SHIFT) + +#define VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT 24 +#define VKD3D_SM4_INSTRUCTION_LENGTH_MASK (0x1fu << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT) + +#define VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT 11 +#define VKD3D_SM4_INSTRUCTION_FLAGS_MASK (0x7u << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT) + +#define VKD3D_SM4_RESOURCE_TYPE_SHIFT 11 +#define VKD3D_SM4_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM4_RESOURCE_TYPE_SHIFT) + +#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT 16 +#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK (0xfu << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT) + +#define VKD3D_SM4_PRIMITIVE_TYPE_SHIFT 11 +#define VKD3D_SM4_PRIMITIVE_TYPE_MASK (0x3fu << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT) + +#define VKD3D_SM4_INDEX_TYPE_SHIFT 11 +#define VKD3D_SM4_INDEX_TYPE_MASK (0x1u << VKD3D_SM4_INDEX_TYPE_SHIFT) + +#define VKD3D_SM4_SAMPLER_MODE_SHIFT 11 +#define VKD3D_SM4_SAMPLER_MODE_MASK (0xfu << VKD3D_SM4_SAMPLER_MODE_SHIFT) + +#define VKD3D_SM4_SHADER_DATA_TYPE_SHIFT 11 +#define VKD3D_SM4_SHADER_DATA_TYPE_MASK (0xfu << VKD3D_SM4_SHADER_DATA_TYPE_SHIFT) + +#define VKD3D_SM4_INTERPOLATION_MODE_SHIFT 11 +#define VKD3D_SM4_INTERPOLATION_MODE_MASK (0xfu << VKD3D_SM4_INTERPOLATION_MODE_SHIFT) + +#define VKD3D_SM4_GLOBAL_FLAGS_SHIFT 11 +#define VKD3D_SM4_GLOBAL_FLAGS_MASK (0xffu << VKD3D_SM4_GLOBAL_FLAGS_SHIFT) + +#define VKD3D_SM5_PRECISE_SHIFT 19 +#define VKD3D_SM5_PRECISE_MASK (0xfu << VKD3D_SM5_PRECISE_SHIFT) + +#define VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT 11 +#define VKD3D_SM5_CONTROL_POINT_COUNT_MASK (0xffu << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT) + +#define VKD3D_SM5_FP_ARRAY_SIZE_SHIFT 16 +#define VKD3D_SM5_FP_TABLE_COUNT_MASK 0xffffu + +#define VKD3D_SM5_UAV_FLAGS_SHIFT 15 +#define VKD3D_SM5_UAV_FLAGS_MASK (0x1ffu << VKD3D_SM5_UAV_FLAGS_SHIFT) + +#define VKD3D_SM5_SYNC_FLAGS_SHIFT 11 +#define VKD3D_SM5_SYNC_FLAGS_MASK (0xffu << VKD3D_SM5_SYNC_FLAGS_SHIFT) + +#define VKD3D_SM5_TESSELLATOR_SHIFT 11 +#define VKD3D_SM5_TESSELLATOR_MASK (0xfu << VKD3D_SM5_TESSELLATOR_SHIFT) + +#define VKD3D_SM4_OPCODE_MASK 0xff + +#define VKD3D_SM4_EXTENDED_OPERAND (0x1u << 31) + +#define VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK 0x3fu + +#define VKD3D_SM4_REGISTER_MODIFIER_SHIFT 6 +#define VKD3D_SM4_REGISTER_MODIFIER_MASK (0xffu << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) + +#define VKD3D_SM4_REGISTER_PRECISION_SHIFT 14 +#define VKD3D_SM4_REGISTER_PRECISION_MASK (0x7u << VKD3D_SM4_REGISTER_PRECISION_SHIFT) + +#define VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT 17 +#define VKD3D_SM4_REGISTER_NON_UNIFORM_MASK (0x1u << VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT) + +#define VKD3D_SM4_ADDRESSING_SHIFT2 28 +#define VKD3D_SM4_ADDRESSING_MASK2 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT2) + +#define VKD3D_SM4_ADDRESSING_SHIFT1 25 +#define VKD3D_SM4_ADDRESSING_MASK1 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT1) + +#define VKD3D_SM4_ADDRESSING_SHIFT0 22 +#define VKD3D_SM4_ADDRESSING_MASK0 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT0) + +#define VKD3D_SM4_REGISTER_ORDER_SHIFT 20 +#define VKD3D_SM4_REGISTER_ORDER_MASK (0x3u << VKD3D_SM4_REGISTER_ORDER_SHIFT) + +#define VKD3D_SM4_REGISTER_TYPE_SHIFT 12 +#define VKD3D_SM4_REGISTER_TYPE_MASK (0xffu << VKD3D_SM4_REGISTER_TYPE_SHIFT) + +#define VKD3D_SM4_SWIZZLE_TYPE_SHIFT 2 +#define VKD3D_SM4_SWIZZLE_TYPE_MASK (0x3u << VKD3D_SM4_SWIZZLE_TYPE_SHIFT) + +#define VKD3D_SM4_DIMENSION_SHIFT 0 +#define VKD3D_SM4_DIMENSION_MASK (0x3u << VKD3D_SM4_DIMENSION_SHIFT) + +#define VKD3D_SM4_WRITEMASK_SHIFT 4 +#define VKD3D_SM4_WRITEMASK_MASK (0xfu << VKD3D_SM4_WRITEMASK_SHIFT) + +#define VKD3D_SM4_SWIZZLE_SHIFT 4 +#define VKD3D_SM4_SWIZZLE_MASK (0xffu << VKD3D_SM4_SWIZZLE_SHIFT) + +#define VKD3D_SM4_VERSION_MAJOR(version) (((version) >> 4) & 0xf) +#define VKD3D_SM4_VERSION_MINOR(version) (((version) >> 0) & 0xf) + +#define VKD3D_SM4_ADDRESSING_RELATIVE 0x2 +#define VKD3D_SM4_ADDRESSING_OFFSET 0x1 + +#define VKD3D_SM4_INSTRUCTION_FLAG_SATURATE 0x4 + +#define VKD3D_SM4_CONDITIONAL_NZ (0x1u << 18) + +#define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu) + +/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ +#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 + +enum vkd3d_sm4_opcode +{ + VKD3D_SM4_OP_ADD = 0x00, + VKD3D_SM4_OP_AND = 0x01, + VKD3D_SM4_OP_BREAK = 0x02, + VKD3D_SM4_OP_BREAKC = 0x03, + VKD3D_SM4_OP_CASE = 0x06, + VKD3D_SM4_OP_CONTINUE = 0x07, + VKD3D_SM4_OP_CONTINUEC = 0x08, + VKD3D_SM4_OP_CUT = 0x09, + VKD3D_SM4_OP_DEFAULT = 0x0a, + VKD3D_SM4_OP_DERIV_RTX = 0x0b, + VKD3D_SM4_OP_DERIV_RTY = 0x0c, + VKD3D_SM4_OP_DISCARD = 0x0d, + VKD3D_SM4_OP_DIV = 0x0e, + VKD3D_SM4_OP_DP2 = 0x0f, + VKD3D_SM4_OP_DP3 = 0x10, + VKD3D_SM4_OP_DP4 = 0x11, + VKD3D_SM4_OP_ELSE = 0x12, + VKD3D_SM4_OP_EMIT = 0x13, + VKD3D_SM4_OP_ENDIF = 0x15, + VKD3D_SM4_OP_ENDLOOP = 0x16, + VKD3D_SM4_OP_ENDSWITCH = 0x17, + VKD3D_SM4_OP_EQ = 0x18, + VKD3D_SM4_OP_EXP = 0x19, + VKD3D_SM4_OP_FRC = 0x1a, + VKD3D_SM4_OP_FTOI = 0x1b, + VKD3D_SM4_OP_FTOU = 0x1c, + VKD3D_SM4_OP_GE = 0x1d, + VKD3D_SM4_OP_IADD = 0x1e, + VKD3D_SM4_OP_IF = 0x1f, + VKD3D_SM4_OP_IEQ = 0x20, + VKD3D_SM4_OP_IGE = 0x21, + VKD3D_SM4_OP_ILT = 0x22, + VKD3D_SM4_OP_IMAD = 0x23, + VKD3D_SM4_OP_IMAX = 0x24, + VKD3D_SM4_OP_IMIN = 0x25, + VKD3D_SM4_OP_IMUL = 0x26, + VKD3D_SM4_OP_INE = 0x27, + VKD3D_SM4_OP_INEG = 0x28, + VKD3D_SM4_OP_ISHL = 0x29, + VKD3D_SM4_OP_ISHR = 0x2a, + VKD3D_SM4_OP_ITOF = 0x2b, + VKD3D_SM4_OP_LABEL = 0x2c, + VKD3D_SM4_OP_LD = 0x2d, + VKD3D_SM4_OP_LD2DMS = 0x2e, + VKD3D_SM4_OP_LOG = 0x2f, + VKD3D_SM4_OP_LOOP = 0x30, + VKD3D_SM4_OP_LT = 0x31, + VKD3D_SM4_OP_MAD = 0x32, + VKD3D_SM4_OP_MIN = 0x33, + VKD3D_SM4_OP_MAX = 0x34, + VKD3D_SM4_OP_SHADER_DATA = 0x35, + VKD3D_SM4_OP_MOV = 0x36, + VKD3D_SM4_OP_MOVC = 0x37, + VKD3D_SM4_OP_MUL = 0x38, + VKD3D_SM4_OP_NE = 0x39, + VKD3D_SM4_OP_NOP = 0x3a, + VKD3D_SM4_OP_NOT = 0x3b, + VKD3D_SM4_OP_OR = 0x3c, + VKD3D_SM4_OP_RESINFO = 0x3d, + VKD3D_SM4_OP_RET = 0x3e, + VKD3D_SM4_OP_RETC = 0x3f, + VKD3D_SM4_OP_ROUND_NE = 0x40, + VKD3D_SM4_OP_ROUND_NI = 0x41, + VKD3D_SM4_OP_ROUND_PI = 0x42, + VKD3D_SM4_OP_ROUND_Z = 0x43, + VKD3D_SM4_OP_RSQ = 0x44, + VKD3D_SM4_OP_SAMPLE = 0x45, + VKD3D_SM4_OP_SAMPLE_C = 0x46, + VKD3D_SM4_OP_SAMPLE_C_LZ = 0x47, + VKD3D_SM4_OP_SAMPLE_LOD = 0x48, + VKD3D_SM4_OP_SAMPLE_GRAD = 0x49, + VKD3D_SM4_OP_SAMPLE_B = 0x4a, + VKD3D_SM4_OP_SQRT = 0x4b, + VKD3D_SM4_OP_SWITCH = 0x4c, + VKD3D_SM4_OP_SINCOS = 0x4d, + VKD3D_SM4_OP_UDIV = 0x4e, + VKD3D_SM4_OP_ULT = 0x4f, + VKD3D_SM4_OP_UGE = 0x50, + VKD3D_SM4_OP_UMUL = 0x51, + VKD3D_SM4_OP_UMAX = 0x53, + VKD3D_SM4_OP_UMIN = 0x54, + VKD3D_SM4_OP_USHR = 0x55, + VKD3D_SM4_OP_UTOF = 0x56, + VKD3D_SM4_OP_XOR = 0x57, + VKD3D_SM4_OP_DCL_RESOURCE = 0x58, + VKD3D_SM4_OP_DCL_CONSTANT_BUFFER = 0x59, + VKD3D_SM4_OP_DCL_SAMPLER = 0x5a, + VKD3D_SM4_OP_DCL_INDEX_RANGE = 0x5b, + VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY = 0x5c, + VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE = 0x5d, + VKD3D_SM4_OP_DCL_VERTICES_OUT = 0x5e, + VKD3D_SM4_OP_DCL_INPUT = 0x5f, + VKD3D_SM4_OP_DCL_INPUT_SGV = 0x60, + VKD3D_SM4_OP_DCL_INPUT_SIV = 0x61, + VKD3D_SM4_OP_DCL_INPUT_PS = 0x62, + VKD3D_SM4_OP_DCL_INPUT_PS_SGV = 0x63, + VKD3D_SM4_OP_DCL_INPUT_PS_SIV = 0x64, + VKD3D_SM4_OP_DCL_OUTPUT = 0x65, + VKD3D_SM4_OP_DCL_OUTPUT_SIV = 0x67, + VKD3D_SM4_OP_DCL_TEMPS = 0x68, + VKD3D_SM4_OP_DCL_INDEXABLE_TEMP = 0x69, + VKD3D_SM4_OP_DCL_GLOBAL_FLAGS = 0x6a, + VKD3D_SM4_OP_LOD = 0x6c, + VKD3D_SM4_OP_GATHER4 = 0x6d, + VKD3D_SM4_OP_SAMPLE_POS = 0x6e, + VKD3D_SM4_OP_SAMPLE_INFO = 0x6f, + VKD3D_SM5_OP_HS_DECLS = 0x71, + VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE = 0x72, + VKD3D_SM5_OP_HS_FORK_PHASE = 0x73, + VKD3D_SM5_OP_HS_JOIN_PHASE = 0x74, + VKD3D_SM5_OP_EMIT_STREAM = 0x75, + VKD3D_SM5_OP_CUT_STREAM = 0x76, + VKD3D_SM5_OP_FCALL = 0x78, + VKD3D_SM5_OP_BUFINFO = 0x79, + VKD3D_SM5_OP_DERIV_RTX_COARSE = 0x7a, + VKD3D_SM5_OP_DERIV_RTX_FINE = 0x7b, + VKD3D_SM5_OP_DERIV_RTY_COARSE = 0x7c, + VKD3D_SM5_OP_DERIV_RTY_FINE = 0x7d, + VKD3D_SM5_OP_GATHER4_C = 0x7e, + VKD3D_SM5_OP_GATHER4_PO = 0x7f, + VKD3D_SM5_OP_GATHER4_PO_C = 0x80, + VKD3D_SM5_OP_RCP = 0x81, + VKD3D_SM5_OP_F32TOF16 = 0x82, + VKD3D_SM5_OP_F16TOF32 = 0x83, + VKD3D_SM5_OP_COUNTBITS = 0x86, + VKD3D_SM5_OP_FIRSTBIT_HI = 0x87, + VKD3D_SM5_OP_FIRSTBIT_LO = 0x88, + VKD3D_SM5_OP_FIRSTBIT_SHI = 0x89, + VKD3D_SM5_OP_UBFE = 0x8a, + VKD3D_SM5_OP_IBFE = 0x8b, + VKD3D_SM5_OP_BFI = 0x8c, + VKD3D_SM5_OP_BFREV = 0x8d, + VKD3D_SM5_OP_SWAPC = 0x8e, + VKD3D_SM5_OP_DCL_STREAM = 0x8f, + VKD3D_SM5_OP_DCL_FUNCTION_BODY = 0x90, + VKD3D_SM5_OP_DCL_FUNCTION_TABLE = 0x91, + VKD3D_SM5_OP_DCL_INTERFACE = 0x92, + VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT = 0x93, + VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT = 0x94, + VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN = 0x95, + VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING = 0x96, + VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE = 0x97, + VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR = 0x98, + VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT = 0x99, + VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT = 0x9a, + VKD3D_SM5_OP_DCL_THREAD_GROUP = 0x9b, + VKD3D_SM5_OP_DCL_UAV_TYPED = 0x9c, + VKD3D_SM5_OP_DCL_UAV_RAW = 0x9d, + VKD3D_SM5_OP_DCL_UAV_STRUCTURED = 0x9e, + VKD3D_SM5_OP_DCL_TGSM_RAW = 0x9f, + VKD3D_SM5_OP_DCL_TGSM_STRUCTURED = 0xa0, + VKD3D_SM5_OP_DCL_RESOURCE_RAW = 0xa1, + VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED = 0xa2, + VKD3D_SM5_OP_LD_UAV_TYPED = 0xa3, + VKD3D_SM5_OP_STORE_UAV_TYPED = 0xa4, + VKD3D_SM5_OP_LD_RAW = 0xa5, + VKD3D_SM5_OP_STORE_RAW = 0xa6, + VKD3D_SM5_OP_LD_STRUCTURED = 0xa7, + VKD3D_SM5_OP_STORE_STRUCTURED = 0xa8, + VKD3D_SM5_OP_ATOMIC_AND = 0xa9, + VKD3D_SM5_OP_ATOMIC_OR = 0xaa, + VKD3D_SM5_OP_ATOMIC_XOR = 0xab, + VKD3D_SM5_OP_ATOMIC_CMP_STORE = 0xac, + VKD3D_SM5_OP_ATOMIC_IADD = 0xad, + VKD3D_SM5_OP_ATOMIC_IMAX = 0xae, + VKD3D_SM5_OP_ATOMIC_IMIN = 0xaf, + VKD3D_SM5_OP_ATOMIC_UMAX = 0xb0, + VKD3D_SM5_OP_ATOMIC_UMIN = 0xb1, + VKD3D_SM5_OP_IMM_ATOMIC_ALLOC = 0xb2, + VKD3D_SM5_OP_IMM_ATOMIC_CONSUME = 0xb3, + VKD3D_SM5_OP_IMM_ATOMIC_IADD = 0xb4, + VKD3D_SM5_OP_IMM_ATOMIC_AND = 0xb5, + VKD3D_SM5_OP_IMM_ATOMIC_OR = 0xb6, + VKD3D_SM5_OP_IMM_ATOMIC_XOR = 0xb7, + VKD3D_SM5_OP_IMM_ATOMIC_EXCH = 0xb8, + VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH = 0xb9, + VKD3D_SM5_OP_IMM_ATOMIC_IMAX = 0xba, + VKD3D_SM5_OP_IMM_ATOMIC_IMIN = 0xbb, + VKD3D_SM5_OP_IMM_ATOMIC_UMAX = 0xbc, + VKD3D_SM5_OP_IMM_ATOMIC_UMIN = 0xbd, + VKD3D_SM5_OP_SYNC = 0xbe, + VKD3D_SM5_OP_DADD = 0xbf, + VKD3D_SM5_OP_DMAX = 0xc0, + VKD3D_SM5_OP_DMIN = 0xc1, + VKD3D_SM5_OP_DMUL = 0xc2, + VKD3D_SM5_OP_DEQ = 0xc3, + VKD3D_SM5_OP_DGE = 0xc4, + VKD3D_SM5_OP_DLT = 0xc5, + VKD3D_SM5_OP_DNE = 0xc6, + VKD3D_SM5_OP_DMOV = 0xc7, + VKD3D_SM5_OP_DMOVC = 0xc8, + VKD3D_SM5_OP_DTOF = 0xc9, + VKD3D_SM5_OP_FTOD = 0xca, + VKD3D_SM5_OP_EVAL_SAMPLE_INDEX = 0xcc, + VKD3D_SM5_OP_EVAL_CENTROID = 0xcd, + VKD3D_SM5_OP_DCL_GS_INSTANCES = 0xce, + VKD3D_SM5_OP_DDIV = 0xd2, + VKD3D_SM5_OP_DFMA = 0xd3, + VKD3D_SM5_OP_DRCP = 0xd4, + VKD3D_SM5_OP_MSAD = 0xd5, + VKD3D_SM5_OP_DTOI = 0xd6, + VKD3D_SM5_OP_DTOU = 0xd7, + VKD3D_SM5_OP_ITOD = 0xd8, + VKD3D_SM5_OP_UTOD = 0xd9, + VKD3D_SM5_OP_GATHER4_S = 0xdb, + VKD3D_SM5_OP_GATHER4_C_S = 0xdc, + VKD3D_SM5_OP_GATHER4_PO_S = 0xdd, + VKD3D_SM5_OP_GATHER4_PO_C_S = 0xde, + VKD3D_SM5_OP_LD_S = 0xdf, + VKD3D_SM5_OP_LD2DMS_S = 0xe0, + VKD3D_SM5_OP_LD_UAV_TYPED_S = 0xe1, + VKD3D_SM5_OP_LD_RAW_S = 0xe2, + VKD3D_SM5_OP_LD_STRUCTURED_S = 0xe3, + VKD3D_SM5_OP_SAMPLE_LOD_S = 0xe4, + VKD3D_SM5_OP_SAMPLE_C_LZ_S = 0xe5, + VKD3D_SM5_OP_SAMPLE_CL_S = 0xe6, + VKD3D_SM5_OP_SAMPLE_B_CL_S = 0xe7, + VKD3D_SM5_OP_SAMPLE_GRAD_CL_S = 0xe8, + VKD3D_SM5_OP_SAMPLE_C_CL_S = 0xe9, + VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED = 0xea, +}; + +enum vkd3d_sm4_instruction_modifier +{ + VKD3D_SM4_MODIFIER_AOFFIMMI = 0x1, + VKD3D_SM5_MODIFIER_RESOURCE_TYPE = 0x2, + VKD3D_SM5_MODIFIER_DATA_TYPE = 0x3, +}; + +enum vkd3d_sm4_register_type +{ + VKD3D_SM4_RT_TEMP = 0x00, + VKD3D_SM4_RT_INPUT = 0x01, + VKD3D_SM4_RT_OUTPUT = 0x02, + VKD3D_SM4_RT_INDEXABLE_TEMP = 0x03, + VKD3D_SM4_RT_IMMCONST = 0x04, + VKD3D_SM4_RT_IMMCONST64 = 0x05, + VKD3D_SM4_RT_SAMPLER = 0x06, + VKD3D_SM4_RT_RESOURCE = 0x07, + VKD3D_SM4_RT_CONSTBUFFER = 0x08, + VKD3D_SM4_RT_IMMCONSTBUFFER = 0x09, + VKD3D_SM4_RT_PRIMID = 0x0b, + VKD3D_SM4_RT_DEPTHOUT = 0x0c, + VKD3D_SM4_RT_NULL = 0x0d, + VKD3D_SM4_RT_RASTERIZER = 0x0e, + VKD3D_SM4_RT_OMASK = 0x0f, + VKD3D_SM5_RT_STREAM = 0x10, + VKD3D_SM5_RT_FUNCTION_BODY = 0x11, + VKD3D_SM5_RT_FUNCTION_POINTER = 0x13, + VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID = 0x16, + VKD3D_SM5_RT_FORK_INSTANCE_ID = 0x17, + VKD3D_SM5_RT_JOIN_INSTANCE_ID = 0x18, + VKD3D_SM5_RT_INPUT_CONTROL_POINT = 0x19, + VKD3D_SM5_RT_OUTPUT_CONTROL_POINT = 0x1a, + VKD3D_SM5_RT_PATCH_CONSTANT_DATA = 0x1b, + VKD3D_SM5_RT_DOMAIN_LOCATION = 0x1c, + VKD3D_SM5_RT_UAV = 0x1e, + VKD3D_SM5_RT_SHARED_MEMORY = 0x1f, + VKD3D_SM5_RT_THREAD_ID = 0x20, + VKD3D_SM5_RT_THREAD_GROUP_ID = 0x21, + VKD3D_SM5_RT_LOCAL_THREAD_ID = 0x22, + VKD3D_SM5_RT_COVERAGE = 0x23, + VKD3D_SM5_RT_LOCAL_THREAD_INDEX = 0x24, + VKD3D_SM5_RT_GS_INSTANCE_ID = 0x25, + VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, + VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, + VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, +}; + +enum vkd3d_sm4_extended_operand_type +{ + VKD3D_SM4_EXTENDED_OPERAND_NONE = 0x0, + VKD3D_SM4_EXTENDED_OPERAND_MODIFIER = 0x1, +}; + +enum vkd3d_sm4_register_modifier +{ + VKD3D_SM4_REGISTER_MODIFIER_NONE = 0x00, + VKD3D_SM4_REGISTER_MODIFIER_NEGATE = 0x01, + VKD3D_SM4_REGISTER_MODIFIER_ABS = 0x02, + VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE = 0x03, +}; + +enum vkd3d_sm4_register_precision +{ + VKD3D_SM4_REGISTER_PRECISION_DEFAULT = 0x0, + VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 = 0x1, + VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 = 0x2, + VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 = 0x4, + VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 = 0x5, +}; + +enum vkd3d_sm4_output_primitive_type +{ + VKD3D_SM4_OUTPUT_PT_POINTLIST = 0x1, + VKD3D_SM4_OUTPUT_PT_LINESTRIP = 0x3, + VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP = 0x5, +}; + +enum vkd3d_sm4_input_primitive_type +{ + VKD3D_SM4_INPUT_PT_POINT = 0x01, + VKD3D_SM4_INPUT_PT_LINE = 0x02, + VKD3D_SM4_INPUT_PT_TRIANGLE = 0x03, + VKD3D_SM4_INPUT_PT_LINEADJ = 0x06, + VKD3D_SM4_INPUT_PT_TRIANGLEADJ = 0x07, + VKD3D_SM5_INPUT_PT_PATCH1 = 0x08, + VKD3D_SM5_INPUT_PT_PATCH2 = 0x09, + VKD3D_SM5_INPUT_PT_PATCH3 = 0x0a, + VKD3D_SM5_INPUT_PT_PATCH4 = 0x0b, + VKD3D_SM5_INPUT_PT_PATCH5 = 0x0c, + VKD3D_SM5_INPUT_PT_PATCH6 = 0x0d, + VKD3D_SM5_INPUT_PT_PATCH7 = 0x0e, + VKD3D_SM5_INPUT_PT_PATCH8 = 0x0f, + VKD3D_SM5_INPUT_PT_PATCH9 = 0x10, + VKD3D_SM5_INPUT_PT_PATCH10 = 0x11, + VKD3D_SM5_INPUT_PT_PATCH11 = 0x12, + VKD3D_SM5_INPUT_PT_PATCH12 = 0x13, + VKD3D_SM5_INPUT_PT_PATCH13 = 0x14, + VKD3D_SM5_INPUT_PT_PATCH14 = 0x15, + VKD3D_SM5_INPUT_PT_PATCH15 = 0x16, + VKD3D_SM5_INPUT_PT_PATCH16 = 0x17, + VKD3D_SM5_INPUT_PT_PATCH17 = 0x18, + VKD3D_SM5_INPUT_PT_PATCH18 = 0x19, + VKD3D_SM5_INPUT_PT_PATCH19 = 0x1a, + VKD3D_SM5_INPUT_PT_PATCH20 = 0x1b, + VKD3D_SM5_INPUT_PT_PATCH21 = 0x1c, + VKD3D_SM5_INPUT_PT_PATCH22 = 0x1d, + VKD3D_SM5_INPUT_PT_PATCH23 = 0x1e, + VKD3D_SM5_INPUT_PT_PATCH24 = 0x1f, + VKD3D_SM5_INPUT_PT_PATCH25 = 0x20, + VKD3D_SM5_INPUT_PT_PATCH26 = 0x21, + VKD3D_SM5_INPUT_PT_PATCH27 = 0x22, + VKD3D_SM5_INPUT_PT_PATCH28 = 0x23, + VKD3D_SM5_INPUT_PT_PATCH29 = 0x24, + VKD3D_SM5_INPUT_PT_PATCH30 = 0x25, + VKD3D_SM5_INPUT_PT_PATCH31 = 0x26, + VKD3D_SM5_INPUT_PT_PATCH32 = 0x27, +}; + +enum vkd3d_sm4_swizzle_type +{ + VKD3D_SM4_SWIZZLE_NONE = 0x0, + VKD3D_SM4_SWIZZLE_VEC4 = 0x1, + VKD3D_SM4_SWIZZLE_SCALAR = 0x2, +}; + +enum vkd3d_sm4_dimension +{ + VKD3D_SM4_DIMENSION_NONE = 0x0, + VKD3D_SM4_DIMENSION_SCALAR = 0x1, + VKD3D_SM4_DIMENSION_VEC4 = 0x2, +}; + +enum vkd3d_sm4_resource_type +{ + VKD3D_SM4_RESOURCE_BUFFER = 0x1, + VKD3D_SM4_RESOURCE_TEXTURE_1D = 0x2, + VKD3D_SM4_RESOURCE_TEXTURE_2D = 0x3, + VKD3D_SM4_RESOURCE_TEXTURE_2DMS = 0x4, + VKD3D_SM4_RESOURCE_TEXTURE_3D = 0x5, + VKD3D_SM4_RESOURCE_TEXTURE_CUBE = 0x6, + VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY = 0x7, + VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY = 0x8, + VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY = 0x9, + VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY = 0xa, + VKD3D_SM4_RESOURCE_RAW_BUFFER = 0xb, + VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER = 0xc, +}; + +enum vkd3d_sm4_data_type +{ + VKD3D_SM4_DATA_UNORM = 0x1, + VKD3D_SM4_DATA_SNORM = 0x2, + VKD3D_SM4_DATA_INT = 0x3, + VKD3D_SM4_DATA_UINT = 0x4, + VKD3D_SM4_DATA_FLOAT = 0x5, + VKD3D_SM4_DATA_MIXED = 0x6, + VKD3D_SM4_DATA_DOUBLE = 0x7, + VKD3D_SM4_DATA_CONTINUED = 0x8, + VKD3D_SM4_DATA_UNUSED = 0x9, +}; + +enum vkd3d_sm4_sampler_mode +{ + VKD3D_SM4_SAMPLER_DEFAULT = 0x0, + VKD3D_SM4_SAMPLER_COMPARISON = 0x1, +}; + +enum vkd3d_sm4_shader_data_type +{ + VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER = 0x3, + VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4, +}; + +struct sm4_index_range +{ + unsigned int index; + unsigned int count; + unsigned int mask; +}; + +struct sm4_index_range_array +{ + unsigned int count; + struct sm4_index_range ranges[MAX_REG_OUTPUT * 2]; +}; + +struct vkd3d_shader_sm4_parser +{ + const uint32_t *start, *end, *ptr; + + unsigned int output_map[MAX_REG_OUTPUT]; + + enum vkd3d_shader_opcode phase; + bool has_control_point_phase; + unsigned int input_register_masks[MAX_REG_OUTPUT]; + unsigned int output_register_masks[MAX_REG_OUTPUT]; + unsigned int patch_constant_register_masks[MAX_REG_OUTPUT]; + + struct sm4_index_range_array input_index_ranges; + struct sm4_index_range_array output_index_ranges; + struct sm4_index_range_array patch_constant_index_ranges; + + struct vkd3d_shader_parser p; +}; + +struct vkd3d_sm4_opcode_info +{ + enum vkd3d_sm4_opcode opcode; + enum vkd3d_shader_opcode handler_idx; + char dst_info[SM4_MAX_DST_COUNT]; + char src_info[SM4_MAX_SRC_COUNT]; + void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); +}; + +static const enum vkd3d_primitive_type output_primitive_type_table[] = +{ + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_OUTPUT_PT_POINTLIST */ VKD3D_PT_POINTLIST, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_OUTPUT_PT_LINESTRIP */ VKD3D_PT_LINESTRIP, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP */ VKD3D_PT_TRIANGLESTRIP, +}; + +static const enum vkd3d_primitive_type input_primitive_type_table[] = +{ + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_INPUT_PT_POINT */ VKD3D_PT_POINTLIST, + /* VKD3D_SM4_INPUT_PT_LINE */ VKD3D_PT_LINELIST, + /* VKD3D_SM4_INPUT_PT_TRIANGLE */ VKD3D_PT_TRIANGLELIST, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_INPUT_PT_LINEADJ */ VKD3D_PT_LINELIST_ADJ, + /* VKD3D_SM4_INPUT_PT_TRIANGLEADJ */ VKD3D_PT_TRIANGLELIST_ADJ, +}; + +static const enum vkd3d_shader_resource_type resource_type_table[] = +{ + /* 0 */ VKD3D_SHADER_RESOURCE_NONE, + /* VKD3D_SM4_RESOURCE_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, + /* VKD3D_SM4_RESOURCE_TEXTURE_1D */ VKD3D_SHADER_RESOURCE_TEXTURE_1D, + /* VKD3D_SM4_RESOURCE_TEXTURE_2D */ VKD3D_SHADER_RESOURCE_TEXTURE_2D, + /* VKD3D_SM4_RESOURCE_TEXTURE_2DMS */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, + /* VKD3D_SM4_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, + /* VKD3D_SM4_RESOURCE_TEXTURE_CUBE */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, + /* VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, + /* VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, + /* VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, + /* VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, + /* VKD3D_SM4_RESOURCE_RAW_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, + /* VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, +}; + +static const enum vkd3d_data_type data_type_table[] = +{ + /* 0 */ VKD3D_DATA_FLOAT, + /* VKD3D_SM4_DATA_UNORM */ VKD3D_DATA_UNORM, + /* VKD3D_SM4_DATA_SNORM */ VKD3D_DATA_SNORM, + /* VKD3D_SM4_DATA_INT */ VKD3D_DATA_INT, + /* VKD3D_SM4_DATA_UINT */ VKD3D_DATA_UINT, + /* VKD3D_SM4_DATA_FLOAT */ VKD3D_DATA_FLOAT, + /* VKD3D_SM4_DATA_MIXED */ VKD3D_DATA_MIXED, + /* VKD3D_SM4_DATA_DOUBLE */ VKD3D_DATA_DOUBLE, + /* VKD3D_SM4_DATA_CONTINUED */ VKD3D_DATA_CONTINUED, + /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, +}; + +static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) +{ + return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); +} + +static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) +{ + const struct vkd3d_shader_version *version = &sm4->p.shader_version; + + return version->major >= 5 && version->minor >= 1; +} + +static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param); +static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param); + +static bool shader_sm4_read_register_space(struct vkd3d_shader_sm4_parser *priv, + const uint32_t **ptr, const uint32_t *end, unsigned int *register_space) +{ + *register_space = 0; + + if (!shader_is_sm_5_1(priv)) + return true; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + + *register_space = *(*ptr)++; + return true; +} + +static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, + (struct vkd3d_shader_src_param *)&ins->src[0]); + ins->flags = (opcode_token & VKD3D_SM4_CONDITIONAL_NZ) ? + VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; +} + +static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_immediate_constant_buffer *icb; + enum vkd3d_sm4_shader_data_type type; + unsigned int icb_size; + + type = (opcode_token & VKD3D_SM4_SHADER_DATA_TYPE_MASK) >> VKD3D_SM4_SHADER_DATA_TYPE_SHIFT; + if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) + { + FIXME("Ignoring shader data type %#x.\n", type); + ins->handler_idx = VKD3DSIH_NOP; + return; + } + + ++tokens; + icb_size = token_count - 1; + if (icb_size % 4) + { + FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + + if (!(icb = vkd3d_malloc(offsetof(struct vkd3d_shader_immediate_constant_buffer, data[icb_size])))) + { + ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + icb->vec4_count = icb_size / 4; + memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); + shader_instruction_array_add_icb(&priv->p.instructions, icb); + ins->declaration.icb = icb; +} + +static void shader_sm4_set_descriptor_register_range(struct vkd3d_shader_sm4_parser *sm4, + const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_range *range) +{ + range->first = reg->idx[1].offset; + range->last = reg->idx[shader_is_sm_5_1(sm4) ? 2 : 1].offset; + if (range->last < range->first) + { + FIXME("Invalid register range [%u:%u].\n", range->first, range->last); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE, + "Last register %u must not be less than first register %u in range.", range->last, range->first); + } +} + +static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; + enum vkd3d_sm4_resource_type resource_type; + const uint32_t *end = &tokens[token_count]; + enum vkd3d_sm4_data_type data_type; + enum vkd3d_data_type reg_data_type; + DWORD components; + unsigned int i; + + resource_type = (opcode_token & VKD3D_SM4_RESOURCE_TYPE_MASK) >> VKD3D_SM4_RESOURCE_TYPE_SHIFT; + if (!resource_type || (resource_type >= ARRAY_SIZE(resource_type_table))) + { + FIXME("Unhandled resource type %#x.\n", resource_type); + semantic->resource_type = VKD3D_SHADER_RESOURCE_NONE; + } + else + { + semantic->resource_type = resource_type_table[resource_type]; + } + + if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS + || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) + { + semantic->sample_count = (opcode_token & VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK) + >> VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; + } + + reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV; + shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); + + components = *tokens++; + for (i = 0; i < VKD3D_VEC4_SIZE; i++) + { + data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); + + if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) + { + FIXME("Unhandled data type %#x.\n", data_type); + semantic->resource_data_type[i] = VKD3D_DATA_FLOAT; + } + else + { + semantic->resource_data_type[i] = data_type_table[data_type]; + } + } + + if (reg_data_type == VKD3D_DATA_UAV) + ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; + + shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); +} + +static void shader_sm4_read_dcl_constant_buffer(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_FLOAT, &ins->declaration.cb.src); + shader_sm4_set_descriptor_register_range(priv, &ins->declaration.cb.src.reg, &ins->declaration.cb.range); + if (opcode_token & VKD3D_SM4_INDEX_TYPE_MASK) + ins->flags |= VKD3DSI_INDEXED_DYNAMIC; + + ins->declaration.cb.size = ins->declaration.cb.src.reg.idx[2].offset; + ins->declaration.cb.range.space = 0; + + if (shader_is_sm_5_1(priv)) + { + if (tokens >= end) + { + FIXME("Invalid ptr %p >= end %p.\n", tokens, end); + return; + } + + ins->declaration.cb.size = *tokens++; + shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.cb.range.space); + } +} + +static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + const uint32_t *end = &tokens[token_count]; + + ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; + if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) + FIXME("Unhandled sampler mode %#x.\n", ins->flags); + shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src); + shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); + shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); +} + +static bool sm4_parser_is_in_fork_or_join_phase(const struct vkd3d_shader_sm4_parser *sm4) +{ + return sm4->phase == VKD3DSIH_HS_FORK_PHASE || sm4->phase == VKD3DSIH_HS_JOIN_PHASE; +} + +static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_index_range *index_range = &ins->declaration.index_range; + unsigned int i, register_idx, register_count, write_mask; + enum vkd3d_shader_register_type type; + struct sm4_index_range_array *ranges; + unsigned int *io_masks; + + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, + &index_range->dst); + index_range->register_count = *tokens; + + register_idx = index_range->dst.reg.idx[index_range->dst.reg.idx_count - 1].offset; + register_count = index_range->register_count; + write_mask = index_range->dst.write_mask; + + if (vkd3d_write_mask_component_count(write_mask) != 1) + { + WARN("Unhandled write mask %#x.\n", write_mask); + vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK, + "Index range mask %#x is not scalar.", write_mask); + } + + switch ((type = index_range->dst.reg.type)) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_INCONTROLPOINT: + io_masks = priv->input_register_masks; + ranges = &priv->input_index_ranges; + break; + case VKD3DSPR_OUTPUT: + if (sm4_parser_is_in_fork_or_join_phase(priv)) + { + io_masks = priv->patch_constant_register_masks; + ranges = &priv->patch_constant_index_ranges; + } + else + { + io_masks = priv->output_register_masks; + ranges = &priv->output_index_ranges; + } + break; + case VKD3DSPR_COLOROUT: + case VKD3DSPR_OUTCONTROLPOINT: + io_masks = priv->output_register_masks; + ranges = &priv->output_index_ranges; + break; + case VKD3DSPR_PATCHCONST: + io_masks = priv->patch_constant_register_masks; + ranges = &priv->patch_constant_index_ranges; + break; + + default: + WARN("Unhandled register type %#x.\n", type); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, + "Invalid register type %#x for index range base %u, count %u, mask %#x.", + type, register_idx, register_count, write_mask); + return; + } + + for (i = 0; i < ranges->count; ++i) + { + struct sm4_index_range r = ranges->ranges[i]; + + if (!(r.mask & write_mask)) + continue; + /* Ranges with the same base but different lengths are not an issue. */ + if (register_idx == r.index) + continue; + + if ((r.index <= register_idx && register_idx - r.index < r.count) + || (register_idx < r.index && r.index - register_idx < register_count)) + { + WARN("Detected index range collision for base %u, count %u, mask %#x.\n", + register_idx, register_count, write_mask); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, + "Register index range base %u, count %u, mask %#x collides with a previous declaration.", + register_idx, register_count, write_mask); + return; + } + } + ranges->ranges[ranges->count].index = register_idx; + ranges->ranges[ranges->count].count = register_count; + ranges->ranges[ranges->count++].mask = write_mask; + + for (i = 0; i < register_count; ++i) + { + if ((io_masks[register_idx + i] & write_mask) != write_mask) + { + WARN("No matching declaration for index range base %u, count %u, mask %#x.\n", + register_idx, register_count, write_mask); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, + "Input/output registers matching index range base %u, count %u, mask %#x were not declared.", + register_idx, register_count, write_mask); + return; + } + } +} + +static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + enum vkd3d_sm4_output_primitive_type primitive_type; + + primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; + if (primitive_type >= ARRAY_SIZE(output_primitive_type_table)) + ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; + else + ins->declaration.primitive_type.type = output_primitive_type_table[primitive_type]; + + if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) + FIXME("Unhandled output primitive type %#x.\n", primitive_type); +} + +static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + enum vkd3d_sm4_input_primitive_type primitive_type; + + primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; + if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) + { + ins->declaration.primitive_type.type = VKD3D_PT_PATCH; + ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; + } + else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) + { + ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; + } + else + { + ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type]; + } + + if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) + FIXME("Unhandled input primitive type %#x.\n", primitive_type); +} + +static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.count = *tokens; +} + +static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); +} + +static void shader_sm4_read_declaration_register_semantic(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, + &ins->declaration.register_semantic.reg); + ins->declaration.register_semantic.sysval_semantic = *tokens; +} + +static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); +} + +static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, + &ins->declaration.register_semantic.reg); + ins->declaration.register_semantic.sysval_semantic = *tokens; +} + +static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.indexable_temp.register_idx = *tokens++; + ins->declaration.indexable_temp.register_size = *tokens++; + ins->declaration.indexable_temp.component_count = *tokens; +} + +static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; +} + +static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_src_param *src_params = (struct vkd3d_shader_src_param *)ins->src; + src_params[0].reg.u.fp_body_idx = *tokens++; + shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, &src_params[0]); +} + +static void shader_sm5_read_dcl_function_body(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.index = *tokens; +} + +static void shader_sm5_read_dcl_function_table(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.index = *tokens++; + FIXME("Ignoring set of function bodies (count %u).\n", *tokens); +} + +static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.fp.index = *tokens++; + ins->declaration.fp.body_count = *tokens++; + ins->declaration.fp.array_size = *tokens >> VKD3D_SM5_FP_ARRAY_SIZE_SHIFT; + ins->declaration.fp.table_count = *tokens++ & VKD3D_SM5_FP_TABLE_COUNT_MASK; + FIXME("Ignoring set of function tables (count %u).\n", ins->declaration.fp.table_count); +} + +static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) + >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; +} + +static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; +} + +static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; +} + +static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; +} + +static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.max_tessellation_factor = *(float *)tokens; +} + +static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.thread_group_size.x = *tokens++; + ins->declaration.thread_group_size.y = *tokens++; + ins->declaration.thread_group_size.z = *tokens++; +} + +static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; + resource->byte_stride = *tokens++; + if (resource->byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.tgsm_raw.reg); + ins->declaration.tgsm_raw.byte_count = *tokens; + if (ins->declaration.tgsm_raw.byte_count % 4) + FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); +} + +static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, + &ins->declaration.tgsm_structured.reg); + ins->declaration.tgsm_structured.byte_stride = *tokens++; + ins->declaration.tgsm_structured.structure_count = *tokens; + if (ins->declaration.tgsm_structured.byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); +} + +static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + resource->byte_stride = *tokens++; + if (resource->byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_sync(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM5_SYNC_FLAGS_MASK) >> VKD3D_SM5_SYNC_FLAGS_SHIFT; +} + +/* + * d -> VKD3D_DATA_DOUBLE + * f -> VKD3D_DATA_FLOAT + * i -> VKD3D_DATA_INT + * u -> VKD3D_DATA_UINT + * O -> VKD3D_DATA_OPAQUE + * R -> VKD3D_DATA_RESOURCE + * S -> VKD3D_DATA_SAMPLER + * U -> VKD3D_DATA_UAV + */ +static const struct vkd3d_sm4_opcode_info opcode_table[] = +{ + {VKD3D_SM4_OP_ADD, VKD3DSIH_ADD, "f", "ff"}, + {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, + {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, + {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, + {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, + {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, + {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, + {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, + {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, + {VKD3D_SM4_OP_DISCARD, VKD3DSIH_DISCARD, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, + {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, + {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, + {VKD3D_SM4_OP_DP4, VKD3DSIH_DP4, "f", "ff"}, + {VKD3D_SM4_OP_ELSE, VKD3DSIH_ELSE, "", ""}, + {VKD3D_SM4_OP_EMIT, VKD3DSIH_EMIT, "", ""}, + {VKD3D_SM4_OP_ENDIF, VKD3DSIH_ENDIF, "", ""}, + {VKD3D_SM4_OP_ENDLOOP, VKD3DSIH_ENDLOOP, "", ""}, + {VKD3D_SM4_OP_ENDSWITCH, VKD3DSIH_ENDSWITCH, "", ""}, + {VKD3D_SM4_OP_EQ, VKD3DSIH_EQ, "u", "ff"}, + {VKD3D_SM4_OP_EXP, VKD3DSIH_EXP, "f", "f"}, + {VKD3D_SM4_OP_FRC, VKD3DSIH_FRC, "f", "f"}, + {VKD3D_SM4_OP_FTOI, VKD3DSIH_FTOI, "i", "f"}, + {VKD3D_SM4_OP_FTOU, VKD3DSIH_FTOU, "u", "f"}, + {VKD3D_SM4_OP_GE, VKD3DSIH_GE, "u", "ff"}, + {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, + {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, + {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, + {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, + {VKD3D_SM4_OP_IMAD, VKD3DSIH_IMAD, "i", "iii"}, + {VKD3D_SM4_OP_IMAX, VKD3DSIH_IMAX, "i", "ii"}, + {VKD3D_SM4_OP_IMIN, VKD3DSIH_IMIN, "i", "ii"}, + {VKD3D_SM4_OP_IMUL, VKD3DSIH_IMUL, "ii", "ii"}, + {VKD3D_SM4_OP_INE, VKD3DSIH_INE, "u", "ii"}, + {VKD3D_SM4_OP_INEG, VKD3DSIH_INEG, "i", "i"}, + {VKD3D_SM4_OP_ISHL, VKD3DSIH_ISHL, "i", "ii"}, + {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, + {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, + {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, + {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "iR"}, + {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "iRi"}, + {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, + {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, + {VKD3D_SM4_OP_LT, VKD3DSIH_LT, "u", "ff"}, + {VKD3D_SM4_OP_MAD, VKD3DSIH_MAD, "f", "fff"}, + {VKD3D_SM4_OP_MIN, VKD3DSIH_MIN, "f", "ff"}, + {VKD3D_SM4_OP_MAX, VKD3DSIH_MAX, "f", "ff"}, + {VKD3D_SM4_OP_SHADER_DATA, VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER, "", "", + shader_sm4_read_shader_data}, + {VKD3D_SM4_OP_MOV, VKD3DSIH_MOV, "f", "f"}, + {VKD3D_SM4_OP_MOVC, VKD3DSIH_MOVC, "f", "uff"}, + {VKD3D_SM4_OP_MUL, VKD3DSIH_MUL, "f", "ff"}, + {VKD3D_SM4_OP_NE, VKD3DSIH_NE, "u", "ff"}, + {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, + {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, + {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, + {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "iR"}, + {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, + {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, + {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, + {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, + {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, + {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, + {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "fRS"}, + {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "fRSff"}, + {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "fRSf"}, + {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, + {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, + {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, + {VKD3D_SM4_OP_UDIV, VKD3DSIH_UDIV, "uu", "uu"}, + {VKD3D_SM4_OP_ULT, VKD3DSIH_ULT, "u", "uu"}, + {VKD3D_SM4_OP_UGE, VKD3DSIH_UGE, "u", "uu"}, + {VKD3D_SM4_OP_UMUL, VKD3DSIH_UMUL, "uu", "uu"}, + {VKD3D_SM4_OP_UMAX, VKD3DSIH_UMAX, "u", "uu"}, + {VKD3D_SM4_OP_UMIN, VKD3DSIH_UMIN, "u", "uu"}, + {VKD3D_SM4_OP_USHR, VKD3DSIH_USHR, "u", "uu"}, + {VKD3D_SM4_OP_UTOF, VKD3DSIH_UTOF, "f", "u"}, + {VKD3D_SM4_OP_XOR, VKD3DSIH_XOR, "u", "uu"}, + {VKD3D_SM4_OP_DCL_RESOURCE, VKD3DSIH_DCL, "", "", + shader_sm4_read_dcl_resource}, + {VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, VKD3DSIH_DCL_CONSTANT_BUFFER, "", "", + shader_sm4_read_dcl_constant_buffer}, + {VKD3D_SM4_OP_DCL_SAMPLER, VKD3DSIH_DCL_SAMPLER, "", "", + shader_sm4_read_dcl_sampler}, + {VKD3D_SM4_OP_DCL_INDEX_RANGE, VKD3DSIH_DCL_INDEX_RANGE, "", "", + shader_sm4_read_dcl_index_range}, + {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, "", "", + shader_sm4_read_dcl_output_topology}, + {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3DSIH_DCL_INPUT_PRIMITIVE, "", "", + shader_sm4_read_dcl_input_primitive}, + {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3DSIH_DCL_VERTICES_OUT, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM4_OP_DCL_INPUT, VKD3DSIH_DCL_INPUT, "", "", + shader_sm4_read_declaration_dst}, + {VKD3D_SM4_OP_DCL_INPUT_SGV, VKD3DSIH_DCL_INPUT_SGV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_INPUT_SIV, VKD3DSIH_DCL_INPUT_SIV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_INPUT_PS, VKD3DSIH_DCL_INPUT_PS, "", "", + shader_sm4_read_dcl_input_ps}, + {VKD3D_SM4_OP_DCL_INPUT_PS_SGV, VKD3DSIH_DCL_INPUT_PS_SGV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_INPUT_PS_SIV, VKD3DSIH_DCL_INPUT_PS_SIV, "", "", + shader_sm4_read_dcl_input_ps_siv}, + {VKD3D_SM4_OP_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT, "", "", + shader_sm4_read_declaration_dst}, + {VKD3D_SM4_OP_DCL_OUTPUT_SIV, VKD3DSIH_DCL_OUTPUT_SIV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_TEMPS, VKD3DSIH_DCL_TEMPS, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, VKD3DSIH_DCL_INDEXABLE_TEMP, "", "", + shader_sm4_read_dcl_indexable_temp}, + {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", + shader_sm4_read_dcl_global_flags}, + {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "fRS"}, + {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "fRS"}, + {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "Ru"}, + {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "R"}, + {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, + {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, + {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, + {VKD3D_SM5_OP_HS_JOIN_PHASE, VKD3DSIH_HS_JOIN_PHASE, "", ""}, + {VKD3D_SM5_OP_EMIT_STREAM, VKD3DSIH_EMIT_STREAM, "", "f"}, + {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, + {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", + shader_sm5_read_fcall}, + {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "U"}, + {VKD3D_SM5_OP_DERIV_RTX_COARSE, VKD3DSIH_DSX_COARSE, "f", "f"}, + {VKD3D_SM5_OP_DERIV_RTX_FINE, VKD3DSIH_DSX_FINE, "f", "f"}, + {VKD3D_SM5_OP_DERIV_RTY_COARSE, VKD3DSIH_DSY_COARSE, "f", "f"}, + {VKD3D_SM5_OP_DERIV_RTY_FINE, VKD3DSIH_DSY_FINE, "f", "f"}, + {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "fRSf"}, + {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fiRS"}, + {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fiRSf"}, + {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, + {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, + {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, + {VKD3D_SM5_OP_COUNTBITS, VKD3DSIH_COUNTBITS, "u", "u"}, + {VKD3D_SM5_OP_FIRSTBIT_HI, VKD3DSIH_FIRSTBIT_HI, "u", "u"}, + {VKD3D_SM5_OP_FIRSTBIT_LO, VKD3DSIH_FIRSTBIT_LO, "u", "u"}, + {VKD3D_SM5_OP_FIRSTBIT_SHI, VKD3DSIH_FIRSTBIT_SHI, "u", "i"}, + {VKD3D_SM5_OP_UBFE, VKD3DSIH_UBFE, "u", "iiu"}, + {VKD3D_SM5_OP_IBFE, VKD3DSIH_IBFE, "i", "iii"}, + {VKD3D_SM5_OP_BFI, VKD3DSIH_BFI, "u", "iiuu"}, + {VKD3D_SM5_OP_BFREV, VKD3DSIH_BFREV, "u", "u"}, + {VKD3D_SM5_OP_SWAPC, VKD3DSIH_SWAPC, "ff", "uff"}, + {VKD3D_SM5_OP_DCL_STREAM, VKD3DSIH_DCL_STREAM, "", "O"}, + {VKD3D_SM5_OP_DCL_FUNCTION_BODY, VKD3DSIH_DCL_FUNCTION_BODY, "", "", + shader_sm5_read_dcl_function_body}, + {VKD3D_SM5_OP_DCL_FUNCTION_TABLE, VKD3DSIH_DCL_FUNCTION_TABLE, "", "", + shader_sm5_read_dcl_function_table}, + {VKD3D_SM5_OP_DCL_INTERFACE, VKD3DSIH_DCL_INTERFACE, "", "", + shader_sm5_read_dcl_interface}, + {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT, "", "", + shader_sm5_read_control_point_count}, + {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, "", "", + shader_sm5_read_control_point_count}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3DSIH_DCL_TESSELLATOR_DOMAIN, "", "", + shader_sm5_read_dcl_tessellator_domain}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING, "", "", + shader_sm5_read_dcl_tessellator_partitioning}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, "", "", + shader_sm5_read_dcl_tessellator_output_primitive}, + {VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR, VKD3DSIH_DCL_HS_MAX_TESSFACTOR, "", "", + shader_sm5_read_dcl_hs_max_tessfactor}, + {VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM5_OP_DCL_THREAD_GROUP, VKD3DSIH_DCL_THREAD_GROUP, "", "", + shader_sm5_read_dcl_thread_group}, + {VKD3D_SM5_OP_DCL_UAV_TYPED, VKD3DSIH_DCL_UAV_TYPED, "", "", + shader_sm4_read_dcl_resource}, + {VKD3D_SM5_OP_DCL_UAV_RAW, VKD3DSIH_DCL_UAV_RAW, "", "", + shader_sm5_read_dcl_uav_raw}, + {VKD3D_SM5_OP_DCL_UAV_STRUCTURED, VKD3DSIH_DCL_UAV_STRUCTURED, "", "", + shader_sm5_read_dcl_uav_structured}, + {VKD3D_SM5_OP_DCL_TGSM_RAW, VKD3DSIH_DCL_TGSM_RAW, "", "", + shader_sm5_read_dcl_tgsm_raw}, + {VKD3D_SM5_OP_DCL_TGSM_STRUCTURED, VKD3DSIH_DCL_TGSM_STRUCTURED, "", "", + shader_sm5_read_dcl_tgsm_structured}, + {VKD3D_SM5_OP_DCL_RESOURCE_RAW, VKD3DSIH_DCL_RESOURCE_RAW, "", "", + shader_sm5_read_dcl_resource_raw}, + {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", + shader_sm5_read_dcl_resource_structured}, + {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "iU"}, + {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "U", "iu"}, + {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "iU"}, + {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "U", "uu"}, + {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "iiR"}, + {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "U", "iiu"}, + {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "U", "iuu"}, + {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "U", "ii"}, + {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "U", "ii"}, + {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "U", "ii"}, + {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "U", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "U"}, + {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "U"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "uU", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "uU", "iuu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "iU", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "iU", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "uU", "iu"}, + {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", + shader_sm5_read_sync}, + {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, + {VKD3D_SM5_OP_DMAX, VKD3DSIH_DMAX, "d", "dd"}, + {VKD3D_SM5_OP_DMIN, VKD3DSIH_DMIN, "d", "dd"}, + {VKD3D_SM5_OP_DMUL, VKD3DSIH_DMUL, "d", "dd"}, + {VKD3D_SM5_OP_DEQ, VKD3DSIH_DEQ, "u", "dd"}, + {VKD3D_SM5_OP_DGE, VKD3DSIH_DGE, "u", "dd"}, + {VKD3D_SM5_OP_DLT, VKD3DSIH_DLT, "u", "dd"}, + {VKD3D_SM5_OP_DNE, VKD3DSIH_DNE, "u", "dd"}, + {VKD3D_SM5_OP_DMOV, VKD3DSIH_DMOV, "d", "d"}, + {VKD3D_SM5_OP_DMOVC, VKD3DSIH_DMOVC, "d", "udd"}, + {VKD3D_SM5_OP_DTOF, VKD3DSIH_DTOF, "f", "d"}, + {VKD3D_SM5_OP_FTOD, VKD3DSIH_FTOD, "d", "f"}, + {VKD3D_SM5_OP_EVAL_SAMPLE_INDEX, VKD3DSIH_EVAL_SAMPLE_INDEX, "f", "fi"}, + {VKD3D_SM5_OP_EVAL_CENTROID, VKD3DSIH_EVAL_CENTROID, "f", "f"}, + {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3DSIH_DCL_GS_INSTANCES, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM5_OP_DDIV, VKD3DSIH_DDIV, "d", "dd"}, + {VKD3D_SM5_OP_DFMA, VKD3DSIH_DFMA, "d", "ddd"}, + {VKD3D_SM5_OP_DRCP, VKD3DSIH_DRCP, "d", "d"}, + {VKD3D_SM5_OP_MSAD, VKD3DSIH_MSAD, "u", "uuu"}, + {VKD3D_SM5_OP_DTOI, VKD3DSIH_DTOI, "i", "d"}, + {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, + {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, + {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, + {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "fRS"}, + {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "fRSf"}, + {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fiRS"}, + {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fiRSf"}, + {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "iR"}, + {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "iRi"}, + {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3DSIH_LD_UAV_TYPED_S, "uu", "iU"}, + {VKD3D_SM5_OP_LD_RAW_S, VKD3DSIH_LD_RAW_S, "uu", "iU"}, + {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "iiR"}, + {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "fRSf"}, + {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "fRSf"}, + {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "fRSf"}, + {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "fRSff"}, + {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "fRSfff"}, + {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "fRSff"}, + {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, +}; + +static const enum vkd3d_shader_register_type register_type_table[] = +{ + /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, + /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, + /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, + /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, + /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, + /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, + /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, + /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, + /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, + /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, + /* UNKNOWN */ ~0u, + /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, + /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, + /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, + /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, + /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, + /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, + /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, + /* UNKNOWN */ ~0u, + /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, + /* UNKNOWN */ ~0u, + /* UNKNOWN */ ~0u, + /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, + /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, + /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, + /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, + /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, + /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, + /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, + /* UNKNOWN */ ~0u, + /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, + /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, + /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, + /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, + /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, + /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, + /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, + /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, + /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, + /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, + /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, + /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, +}; + +static const enum vkd3d_shader_register_precision register_precision_table[] = +{ + /* VKD3D_SM4_REGISTER_PRECISION_DEFAULT */ VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_16, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_10, + /* UNKNOWN */ VKD3D_SHADER_REGISTER_PRECISION_INVALID, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, +}; + +static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) +{ + unsigned int i; + + for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) + { + if (opcode == opcode_table[i].opcode) return &opcode_table[i]; + } + + return NULL; +} + +static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) +{ + switch (sm4->p.shader_version.type) + { + case VKD3D_SHADER_TYPE_PIXEL: + if (reg->type == VKD3DSPR_OUTPUT) + { + unsigned int reg_idx = reg->idx[0].offset; + + if (reg_idx >= ARRAY_SIZE(sm4->output_map)) + { + /* Validated later */ + break; + } + + reg->type = VKD3DSPR_COLOROUT; + reg->idx[0].offset = sm4->output_map[reg_idx]; + } + break; + + default: + break; + } +} + +static enum vkd3d_data_type map_data_type(char t) +{ + switch (t) + { + case 'd': + return VKD3D_DATA_DOUBLE; + case 'f': + return VKD3D_DATA_FLOAT; + case 'i': + return VKD3D_DATA_INT; + case 'u': + return VKD3D_DATA_UINT; + case 'O': + return VKD3D_DATA_OPAQUE; + case 'R': + return VKD3D_DATA_RESOURCE; + case 'S': + return VKD3D_DATA_SAMPLER; + case 'U': + return VKD3D_DATA_UAV; + default: + ERR("Invalid data type '%c'.\n", t); + return VKD3D_DATA_FLOAT; + } +} + +static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); + + shader_instruction_array_destroy(&parser->instructions); + free_shader_desc(&parser->shader_desc); + vkd3d_free(sm4); +} + +static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) +{ + if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) + { + struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&priv->p, 1); + + if (!(reg_idx->rel_addr = rel_addr)) + { + ERR("Failed to get src param for relative addressing.\n"); + return false; + } + + if (addressing & VKD3D_SM4_ADDRESSING_OFFSET) + reg_idx->offset = *(*ptr)++; + else + reg_idx->offset = 0; + shader_sm4_read_src_param(priv, ptr, end, VKD3D_DATA_INT, rel_addr); + } + else + { + reg_idx->rel_addr = NULL; + reg_idx->offset = *(*ptr)++; + } + + return true; +} + +static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_type) +{ + switch (register_type) + { + case VKD3D_SM4_RT_SAMPLER: + case VKD3D_SM4_RT_RESOURCE: + case VKD3D_SM4_RT_CONSTBUFFER: + case VKD3D_SM5_RT_UAV: + return true; + + default: + return false; + } +} + +static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, + enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) +{ + enum vkd3d_sm4_register_precision precision; + enum vkd3d_sm4_register_type register_type; + enum vkd3d_sm4_extended_operand_type type; + enum vkd3d_sm4_register_modifier m; + uint32_t token, order, extended; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + token = *(*ptr)++; + + register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; + if (register_type >= ARRAY_SIZE(register_type_table) + || register_type_table[register_type] == VKD3DSPR_INVALID) + { + FIXME("Unhandled register type %#x.\n", register_type); + param->type = VKD3DSPR_TEMP; + } + else + { + param->type = register_type_table[register_type]; + } + param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + param->non_uniform = false; + param->data_type = data_type; + + *modifier = VKD3DSPSM_NONE; + if (token & VKD3D_SM4_EXTENDED_OPERAND) + { + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + extended = *(*ptr)++; + + if (extended & VKD3D_SM4_EXTENDED_OPERAND) + { + FIXME("Skipping second-order extended operand.\n"); + *ptr += *ptr < end; + } + + type = extended & VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK; + if (type == VKD3D_SM4_EXTENDED_OPERAND_MODIFIER) + { + m = (extended & VKD3D_SM4_REGISTER_MODIFIER_MASK) >> VKD3D_SM4_REGISTER_MODIFIER_SHIFT; + switch (m) + { + case VKD3D_SM4_REGISTER_MODIFIER_NEGATE: + *modifier = VKD3DSPSM_NEG; + break; + + case VKD3D_SM4_REGISTER_MODIFIER_ABS: + *modifier = VKD3DSPSM_ABS; + break; + + case VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE: + *modifier = VKD3DSPSM_ABSNEG; + break; + + default: + FIXME("Unhandled register modifier %#x.\n", m); + /* fall-through */ + case VKD3D_SM4_REGISTER_MODIFIER_NONE: + break; + } + + precision = (extended & VKD3D_SM4_REGISTER_PRECISION_MASK) >> VKD3D_SM4_REGISTER_PRECISION_SHIFT; + if (precision >= ARRAY_SIZE(register_precision_table) + || register_precision_table[precision] == VKD3D_SHADER_REGISTER_PRECISION_INVALID) + { + FIXME("Unhandled register precision %#x.\n", precision); + param->precision = VKD3D_SHADER_REGISTER_PRECISION_INVALID; + } + else + { + param->precision = register_precision_table[precision]; + } + + if (extended & VKD3D_SM4_REGISTER_NON_UNIFORM_MASK) + param->non_uniform = true; + + extended &= ~(VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK | VKD3D_SM4_REGISTER_MODIFIER_MASK + | VKD3D_SM4_REGISTER_PRECISION_MASK | VKD3D_SM4_REGISTER_NON_UNIFORM_MASK + | VKD3D_SM4_EXTENDED_OPERAND); + if (extended) + FIXME("Skipping unhandled extended operand bits 0x%08x.\n", extended); + } + else if (type) + { + FIXME("Skipping unhandled extended operand token 0x%08x (type %#x).\n", extended, type); + } + } + + order = (token & VKD3D_SM4_REGISTER_ORDER_MASK) >> VKD3D_SM4_REGISTER_ORDER_SHIFT; + + if (order < 1) + { + param->idx[0].offset = ~0u; + param->idx[0].rel_addr = NULL; + } + else + { + DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK0) >> VKD3D_SM4_ADDRESSING_SHIFT0; + if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[0]))) + { + ERR("Failed to read register index.\n"); + return false; + } + } + + if (order < 2) + { + param->idx[1].offset = ~0u; + param->idx[1].rel_addr = NULL; + } + else + { + DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK1) >> VKD3D_SM4_ADDRESSING_SHIFT1; + if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[1]))) + { + ERR("Failed to read register index.\n"); + return false; + } + } + + if (order < 3) + { + param->idx[2].offset = ~0u; + param->idx[2].rel_addr = NULL; + } + else + { + DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK2) >> VKD3D_SM4_ADDRESSING_SHIFT2; + if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[2]))) + { + ERR("Failed to read register index.\n"); + return false; + } + } + + if (order > 3) + { + WARN("Unhandled order %u.\n", order); + return false; + } + + param->idx_count = order; + + if (register_type == VKD3D_SM4_RT_IMMCONST || register_type == VKD3D_SM4_RT_IMMCONST64) + { + enum vkd3d_sm4_dimension dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT; + unsigned int dword_count; + + switch (dimension) + { + case VKD3D_SM4_DIMENSION_SCALAR: + param->immconst_type = VKD3D_IMMCONST_SCALAR; + dword_count = 1 + (register_type == VKD3D_SM4_RT_IMMCONST64); + if (end - *ptr < dword_count) + { + WARN("Invalid ptr %p, end %p.\n", *ptr, end); + return false; + } + memcpy(param->u.immconst_uint, *ptr, dword_count * sizeof(DWORD)); + *ptr += dword_count; + break; + + case VKD3D_SM4_DIMENSION_VEC4: + param->immconst_type = VKD3D_IMMCONST_VEC4; + if (end - *ptr < VKD3D_VEC4_SIZE) + { + WARN("Invalid ptr %p, end %p.\n", *ptr, end); + return false; + } + memcpy(param->u.immconst_uint, *ptr, VKD3D_VEC4_SIZE * sizeof(DWORD)); + *ptr += 4; + break; + + default: + FIXME("Unhandled dimension %#x.\n", dimension); + break; + } + } + else if (!shader_is_sm_5_1(priv) && sm4_register_is_descriptor(register_type)) + { + /* SM5.1 places a symbol identifier in idx[0] and moves + * other values up one slot. Normalize to SM5.1. */ + param->idx[2] = param->idx[1]; + param->idx[1] = param->idx[0]; + ++param->idx_count; + } + + map_register(priv, param); + + return true; +} + +static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) +{ + switch (reg->type) + { + case VKD3DSPR_COVERAGE: + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + case VKD3DSPR_GSINSTID: + case VKD3DSPR_LOCALTHREADINDEX: + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_PRIMID: + case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_OUTSTENCILREF: + return true; + default: + return false; + } +} + +static uint32_t swizzle_from_sm4(uint32_t s) +{ + return vkd3d_shader_create_swizzle(s & 0x3, (s >> 2) & 0x3, (s >> 4) & 0x3, (s >> 6) & 0x3); +} + +static bool register_is_input_output(const struct vkd3d_shader_register *reg) +{ + switch (reg->type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_OUTPUT: + case VKD3DSPR_COLOROUT: + case VKD3DSPR_INCONTROLPOINT: + case VKD3DSPR_OUTCONTROLPOINT: + case VKD3DSPR_PATCHCONST: + return true; + + default: + return false; + } +} + +static bool register_is_control_point_input(const struct vkd3d_shader_register *reg, + const struct vkd3d_shader_sm4_parser *priv) +{ + return reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT + || (reg->type == VKD3DSPR_INPUT && (priv->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE + || priv->p.shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY)); +} + +static unsigned int mask_from_swizzle(unsigned int swizzle) +{ + return (1u << vkd3d_swizzle_get_component(swizzle, 0)) + | (1u << vkd3d_swizzle_get_component(swizzle, 1)) + | (1u << vkd3d_swizzle_get_component(swizzle, 2)) + | (1u << vkd3d_swizzle_get_component(swizzle, 3)); +} + +static bool shader_sm4_validate_input_output_register(struct vkd3d_shader_sm4_parser *priv, + const struct vkd3d_shader_register *reg, unsigned int mask) +{ + unsigned int idx_count = 1 + register_is_control_point_input(reg, priv); + const unsigned int *masks; + unsigned int register_idx; + + if (reg->idx_count != idx_count) + { + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_INDEX_COUNT, + "Invalid index count %u for register type %#x; expected count %u.", + reg->idx_count, reg->type, idx_count); + return false; + } + + switch (reg->type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_INCONTROLPOINT: + masks = priv->input_register_masks; + break; + case VKD3DSPR_OUTPUT: + masks = sm4_parser_is_in_fork_or_join_phase(priv) ? priv->patch_constant_register_masks + : priv->output_register_masks; + break; + case VKD3DSPR_COLOROUT: + case VKD3DSPR_OUTCONTROLPOINT: + masks = priv->output_register_masks; + break; + case VKD3DSPR_PATCHCONST: + masks = priv->patch_constant_register_masks; + break; + + default: + vkd3d_unreachable(); + } + + register_idx = reg->idx[reg->idx_count - 1].offset; + /* The signature element registers have already been checked against MAX_REG_OUTPUT. */ + if (register_idx >= MAX_REG_OUTPUT || (masks[register_idx] & mask) != mask) + { + WARN("Failed to find signature element for register type %#x, index %u and mask %#x.\n", + reg->type, register_idx, mask); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER, + "Could not find signature element matching register type %#x, index %u and mask %#x.", + reg->type, register_idx, mask); + return false; + } + + return true; +} + +static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) +{ + DWORD token; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + token = **ptr; + + if (!shader_sm4_read_param(priv, ptr, end, data_type, &src_param->reg, &src_param->modifiers)) + { + ERR("Failed to read parameter.\n"); + return false; + } + + if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) + { + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + } + else + { + enum vkd3d_sm4_swizzle_type swizzle_type = + (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + + switch (swizzle_type) + { + case VKD3D_SM4_SWIZZLE_NONE: + if (shader_sm4_is_scalar_register(&src_param->reg)) + src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + else + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + break; + + case VKD3D_SM4_SWIZZLE_SCALAR: + src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; + src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; + break; + + case VKD3D_SM4_SWIZZLE_VEC4: + src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); + break; + + default: + FIXME("Unhandled swizzle type %#x.\n", swizzle_type); + break; + } + } + + if (register_is_input_output(&src_param->reg) && !shader_sm4_validate_input_output_register(priv, + &src_param->reg, mask_from_swizzle(src_param->swizzle))) + return false; + + return true; +} + +static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) +{ + enum vkd3d_shader_src_modifier modifier; + DWORD token; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + token = **ptr; + + if (!shader_sm4_read_param(priv, ptr, end, data_type, &dst_param->reg, &modifier)) + { + ERR("Failed to read parameter.\n"); + return false; + } + + if (modifier != VKD3DSPSM_NONE) + { + ERR("Invalid source modifier %#x on destination register.\n", modifier); + return false; + } + + dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; + if (data_type == VKD3D_DATA_DOUBLE) + dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); + /* Scalar registers are declared with no write mask in shader bytecode. */ + if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) + dst_param->write_mask = VKD3DSP_WRITEMASK_0; + dst_param->modifiers = 0; + dst_param->shift = 0; + + if (register_is_input_output(&dst_param->reg) && !shader_sm4_validate_input_output_register(priv, + &dst_param->reg, dst_param->write_mask)) + return false; + + return true; +} + +static void shader_sm4_read_instruction_modifier(DWORD modifier, struct vkd3d_shader_instruction *ins) +{ + enum vkd3d_sm4_instruction_modifier modifier_type = modifier & VKD3D_SM4_MODIFIER_MASK; + + switch (modifier_type) + { + case VKD3D_SM4_MODIFIER_AOFFIMMI: + { + static const DWORD recognized_bits = VKD3D_SM4_INSTRUCTION_MODIFIER + | VKD3D_SM4_MODIFIER_MASK + | VKD3D_SM4_AOFFIMMI_U_MASK + | VKD3D_SM4_AOFFIMMI_V_MASK + | VKD3D_SM4_AOFFIMMI_W_MASK; + + /* Bit fields are used for sign extension. */ + struct + { + int u : 4; + int v : 4; + int w : 4; + } aoffimmi; + + if (modifier & ~recognized_bits) + FIXME("Unhandled instruction modifier %#x.\n", modifier); + + aoffimmi.u = (modifier & VKD3D_SM4_AOFFIMMI_U_MASK) >> VKD3D_SM4_AOFFIMMI_U_SHIFT; + aoffimmi.v = (modifier & VKD3D_SM4_AOFFIMMI_V_MASK) >> VKD3D_SM4_AOFFIMMI_V_SHIFT; + aoffimmi.w = (modifier & VKD3D_SM4_AOFFIMMI_W_MASK) >> VKD3D_SM4_AOFFIMMI_W_SHIFT; + ins->texel_offset.u = aoffimmi.u; + ins->texel_offset.v = aoffimmi.v; + ins->texel_offset.w = aoffimmi.w; + break; + } + + case VKD3D_SM5_MODIFIER_DATA_TYPE: + { + DWORD components = (modifier & VKD3D_SM5_MODIFIER_DATA_TYPE_MASK) >> VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT; + unsigned int i; + + for (i = 0; i < VKD3D_VEC4_SIZE; i++) + { + enum vkd3d_sm4_data_type data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); + + if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) + { + FIXME("Unhandled data type %#x.\n", data_type); + ins->resource_data_type[i] = VKD3D_DATA_FLOAT; + } + else + { + ins->resource_data_type[i] = data_type_table[data_type]; + } + } + break; + } + + case VKD3D_SM5_MODIFIER_RESOURCE_TYPE: + { + enum vkd3d_sm4_resource_type resource_type + = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT; + + if (resource_type == VKD3D_SM4_RESOURCE_RAW_BUFFER) + ins->raw = true; + else if (resource_type == VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER) + ins->structured = true; + + if (resource_type < ARRAY_SIZE(resource_type_table)) + ins->resource_type = resource_type_table[resource_type]; + else + { + FIXME("Unhandled resource type %#x.\n", resource_type); + ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; + } + + ins->resource_stride + = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT; + break; + } + + default: + FIXME("Unhandled instruction modifier %#x.\n", modifier); + } +} + +static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_instruction *ins) +{ + const struct vkd3d_sm4_opcode_info *opcode_info; + uint32_t opcode_token, opcode, previous_token; + struct vkd3d_shader_dst_param *dst_params; + struct vkd3d_shader_src_param *src_params; + const uint32_t **ptr = &sm4->ptr; + unsigned int i, len; + size_t remaining; + const uint32_t *p; + DWORD precise; + + if (*ptr >= sm4->end) + { + WARN("End of byte-code, failed to read opcode.\n"); + goto fail; + } + remaining = sm4->end - *ptr; + + ++sm4->p.location.line; + + opcode_token = *(*ptr)++; + opcode = opcode_token & VKD3D_SM4_OPCODE_MASK; + + len = ((opcode_token & VKD3D_SM4_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); + if (!len) + { + if (remaining < 2) + { + WARN("End of byte-code, failed to read length token.\n"); + goto fail; + } + len = **ptr; + } + if (!len || remaining < len) + { + WARN("Read invalid length %u (remaining %zu).\n", len, remaining); + goto fail; + } + --len; + + if (!(opcode_info = get_opcode_info(opcode))) + { + FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); + ins->handler_idx = VKD3DSIH_INVALID; + *ptr += len; + return; + } + + ins->handler_idx = opcode_info->handler_idx; + if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE + || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) + sm4->phase = ins->handler_idx; + sm4->has_control_point_phase |= ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE; + ins->flags = 0; + ins->coissue = false; + ins->raw = false; + ins->structured = false; + ins->predicate = NULL; + ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); + ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); + ins->src = src_params = shader_parser_get_src_params(&sm4->p, ins->src_count); + if (!src_params && ins->src_count) + { + ERR("Failed to allocate src parameters.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; + ins->resource_stride = 0; + ins->resource_data_type[0] = VKD3D_DATA_FLOAT; + ins->resource_data_type[1] = VKD3D_DATA_FLOAT; + ins->resource_data_type[2] = VKD3D_DATA_FLOAT; + ins->resource_data_type[3] = VKD3D_DATA_FLOAT; + memset(&ins->texel_offset, 0, sizeof(ins->texel_offset)); + + p = *ptr; + *ptr += len; + + if (opcode_info->read_opcode_func) + { + ins->dst = NULL; + ins->dst_count = 0; + opcode_info->read_opcode_func(ins, opcode, opcode_token, p, len, sm4); + } + else + { + enum vkd3d_shader_dst_modifier instruction_dst_modifier = VKD3DSPDM_NONE; + + previous_token = opcode_token; + while (previous_token & VKD3D_SM4_INSTRUCTION_MODIFIER && p != *ptr) + shader_sm4_read_instruction_modifier(previous_token = *p++, ins); + + ins->flags = (opcode_token & VKD3D_SM4_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + if (ins->flags & VKD3D_SM4_INSTRUCTION_FLAG_SATURATE) + { + ins->flags &= ~VKD3D_SM4_INSTRUCTION_FLAG_SATURATE; + instruction_dst_modifier = VKD3DSPDM_SATURATE; + } + precise = (opcode_token & VKD3D_SM5_PRECISE_MASK) >> VKD3D_SM5_PRECISE_SHIFT; + ins->flags |= precise << VKD3DSI_PRECISE_SHIFT; + + ins->dst = dst_params = shader_parser_get_dst_params(&sm4->p, ins->dst_count); + if (!dst_params && ins->dst_count) + { + ERR("Failed to allocate dst parameters.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + for (i = 0; i < ins->dst_count; ++i) + { + if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), + &dst_params[i]))) + { + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + dst_params[i].modifiers |= instruction_dst_modifier; + } + + for (i = 0; i < ins->src_count; ++i) + { + if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), + &src_params[i]))) + { + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + } + } + + return; + +fail: + *ptr = sm4->end; + ins->handler_idx = VKD3DSIH_INVALID; + return; +} + +static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = +{ + .parser_destroy = shader_sm4_destroy, +}; + +static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, + size_t byte_code_size, const char *source_name, const struct shader_signature *output_signature, + struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_version version; + uint32_t version_token, token_count; + unsigned int i; + + if (byte_code_size / sizeof(*byte_code) < 2) + { + WARN("Invalid byte code size %lu.\n", (long)byte_code_size); + return false; + } + + version_token = byte_code[0]; + TRACE("Version: 0x%08x.\n", version_token); + token_count = byte_code[1]; + TRACE("Token count: %u.\n", token_count); + + if (token_count < 2 || byte_code_size / sizeof(*byte_code) < token_count) + { + WARN("Invalid token count %u.\n", token_count); + return false; + } + + sm4->start = &byte_code[2]; + sm4->end = &byte_code[token_count]; + + switch (version_token >> 16) + { + case VKD3D_SM4_PS: + version.type = VKD3D_SHADER_TYPE_PIXEL; + break; + + case VKD3D_SM4_VS: + version.type = VKD3D_SHADER_TYPE_VERTEX; + break; + + case VKD3D_SM4_GS: + version.type = VKD3D_SHADER_TYPE_GEOMETRY; + break; + + case VKD3D_SM5_HS: + version.type = VKD3D_SHADER_TYPE_HULL; + break; + + case VKD3D_SM5_DS: + version.type = VKD3D_SHADER_TYPE_DOMAIN; + break; + + case VKD3D_SM5_CS: + version.type = VKD3D_SHADER_TYPE_COMPUTE; + break; + + default: + FIXME("Unrecognised shader type %#x.\n", version_token >> 16); + } + version.major = VKD3D_SM4_VERSION_MAJOR(version_token); + version.minor = VKD3D_SM4_VERSION_MINOR(version_token); + + /* Estimate instruction count to avoid reallocation in most shaders. */ + if (!vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops, + token_count / 7u + 20)) + return false; + sm4->ptr = sm4->start; + + memset(sm4->output_map, 0xff, sizeof(sm4->output_map)); + for (i = 0; i < output_signature->element_count; ++i) + { + struct signature_element *e = &output_signature->elements[i]; + + if (version.type == VKD3D_SHADER_TYPE_PIXEL + && ascii_strcasecmp(e->semantic_name, "SV_Target")) + continue; + if (e->register_index >= ARRAY_SIZE(sm4->output_map)) + { + WARN("Invalid output index %u.\n", e->register_index); + continue; + } + + sm4->output_map[e->register_index] = e->semantic_index; + } + + return true; +} + +static bool shader_sm4_parser_validate_signature(struct vkd3d_shader_sm4_parser *sm4, + const struct shader_signature *signature, unsigned int *masks, const char *name) +{ + unsigned int i, register_idx, register_count, mask; + + for (i = 0; i < signature->element_count; ++i) + { + register_idx = signature->elements[i].register_index; + register_count = signature->elements[i].register_count; + if (register_idx != ~0u && (register_idx >= MAX_REG_OUTPUT || MAX_REG_OUTPUT - register_idx < register_count)) + { + WARN("%s signature element %u unhandled register index %u, count %u.\n", + name, i, register_idx, register_count); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS, + "%s signature element %u register index %u, count %u exceeds maximum index of %u.", name, + i, register_idx, register_count, MAX_REG_OUTPUT - 1); + return false; + } + + if (!vkd3d_bitmask_is_contiguous(mask = signature->elements[i].mask)) + { + WARN("%s signature element %u mask %#x is not contiguous.\n", name, i, mask); + vkd3d_shader_parser_warning(&sm4->p, VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS, + "%s signature element %u mask %#x is not contiguous.", name, i, mask); + } + + if (register_idx != ~0u) + masks[register_idx] |= mask; + } + + return true; +} + +static int index_range_compare(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct sm4_index_range)); +} + +static void shader_sm4_validate_default_phase_index_ranges(struct vkd3d_shader_sm4_parser *sm4) +{ + if (!sm4->input_index_ranges.count || !sm4->output_index_ranges.count) + return; + + if (sm4->input_index_ranges.count == sm4->output_index_ranges.count) + { + qsort(sm4->input_index_ranges.ranges, sm4->input_index_ranges.count, sizeof(sm4->input_index_ranges.ranges[0]), + index_range_compare); + qsort(sm4->output_index_ranges.ranges, sm4->output_index_ranges.count, sizeof(sm4->output_index_ranges.ranges[0]), + index_range_compare); + if (!memcmp(sm4->input_index_ranges.ranges, sm4->output_index_ranges.ranges, + sm4->input_index_ranges.count * sizeof(sm4->input_index_ranges.ranges[0]))) + return; + } + + /* This is very unlikely to occur and would complicate the default control point phase implementation. */ + WARN("Default phase index ranges are not identical.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, + "Default control point phase input and output index range declarations are not identical."); + return; +} + +int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) +{ + struct vkd3d_shader_instruction_array *instructions; + struct vkd3d_shader_desc *shader_desc; + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_sm4_parser *sm4; + int ret; + + if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) + { + ERR("Failed to allocate parser.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + shader_desc = &sm4->p.shader_desc; + if ((ret = shader_extract_from_dxbc(&compile_info->source, + message_context, compile_info->source_name, shader_desc)) < 0) + { + WARN("Failed to extract shader, vkd3d result %d.\n", ret); + vkd3d_free(sm4); + return ret; + } + + if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, + compile_info->source_name, &shader_desc->output_signature, message_context)) + { + WARN("Failed to initialise shader parser.\n"); + free_shader_desc(shader_desc); + vkd3d_free(sm4); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + if (!shader_sm4_parser_validate_signature(sm4, &shader_desc->input_signature, + sm4->input_register_masks, "Input") + || !shader_sm4_parser_validate_signature(sm4, &shader_desc->output_signature, + sm4->output_register_masks, "Output") + || !shader_sm4_parser_validate_signature(sm4, &shader_desc->patch_constant_signature, + sm4->patch_constant_register_masks, "Patch constant")) + { + shader_sm4_destroy(&sm4->p); + return VKD3D_ERROR_INVALID_SHADER; + } + + instructions = &sm4->p.instructions; + while (sm4->ptr != sm4->end) + { + if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) + { + ERR("Failed to allocate instructions.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + shader_sm4_destroy(&sm4->p); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ins = &instructions->elements[instructions->count]; + shader_sm4_read_instruction(sm4, ins); + + if (ins->handler_idx == VKD3DSIH_INVALID) + { + WARN("Encountered unrecognized or invalid instruction.\n"); + shader_sm4_destroy(&sm4->p); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ++instructions->count; + } + if (sm4->p.shader_version.type == VKD3D_SHADER_TYPE_HULL && !sm4->has_control_point_phase && !sm4->p.failed) + shader_sm4_validate_default_phase_index_ranges(sm4); + + *parser = &sm4->p; + + return sm4->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; +} + +static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); + +static bool type_is_integer(const struct hlsl_type *type) +{ + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return true; + + default: + return false; + } +} + +bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) +{ + unsigned int i; + + static const struct + { + const char *semantic; + bool output; + enum vkd3d_shader_type shader_type; + enum vkd3d_sm4_swizzle_type swizzle_type; + enum vkd3d_sm4_register_type type; + bool has_idx; + } + register_table[] = + { + {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, + {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, + {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, + + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, + + /* Put sv_target in this table, instead of letting it fall through to + * default varying allocation, so that the register index matches the + * usage index. */ + {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) + { + if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) + && output == register_table[i].output + && ctx->profile->type == register_table[i].shader_type) + { + *type = register_table[i].type; + if (swizzle_type) + *swizzle_type = register_table[i].swizzle_type; + *has_idx = register_table[i].has_idx; + return true; + } + } + + return false; +} + +bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, D3D_NAME *usage) +{ + unsigned int i; + + static const struct + { + const char *name; + bool output; + enum vkd3d_shader_type shader_type; + D3DDECLUSAGE usage; + } + semantics[] = + { + {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + + {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, + + {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, + + {"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, + {"sv_isfrontface", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_IS_FRONT_FACE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, + + {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, + {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, + + {"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, + }; + + for (i = 0; i < ARRAY_SIZE(semantics); ++i) + { + if (!ascii_strcasecmp(semantic->name, semantics[i].name) + && output == semantics[i].output + && ctx->profile->type == semantics[i].shader_type + && !ascii_strncasecmp(semantic->name, "sv_", 3)) + { + *usage = semantics[i].usage; + return true; + } + } + + if (!ascii_strncasecmp(semantic->name, "sv_", 3)) + return false; + + *usage = D3D_NAME_UNDEFINED; + return true; +} + +static void add_section(struct dxbc_writer *dxbc, uint32_t tag, struct vkd3d_bytecode_buffer *buffer) +{ + /* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN + * sections to be aligned. Without this, the sections themselves will be + * aligned, but their reported sizes won't. */ + size_t size = bytecode_align(buffer); + + dxbc_writer_add_section(dxbc, tag, buffer->data, size); +} + +static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) +{ + struct vkd3d_bytecode_buffer buffer = {0}; + struct vkd3d_string_buffer *string; + const struct hlsl_ir_var *var; + size_t count_position; + unsigned int i; + bool ret; + + count_position = put_u32(&buffer, 0); + put_u32(&buffer, 8); /* unknown */ + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; + enum vkd3d_sm4_register_type type; + uint32_t usage_idx, reg_idx; + D3D_NAME usage; + bool has_idx; + + if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) + continue; + + ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + assert(ret); + if (usage == ~0u) + continue; + usage_idx = var->semantic.index; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) + { + reg_idx = has_idx ? var->semantic.index : ~0u; + } + else + { + assert(var->regs[HLSL_REGSET_NUMERIC].allocated); + type = VKD3D_SM4_RT_INPUT; + reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; + } + + use_mask = width; /* FIXME: accurately report use mask */ + if (output) + use_mask = 0xf ^ use_mask; + + /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ + if (usage >= 64) + usage = 0; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, usage_idx); + put_u32(&buffer, usage); + switch (var->data_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32); + break; + + case HLSL_TYPE_INT: + put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32); + break; + + default: + if ((string = hlsl_type_to_string(ctx, var->data_type))) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Invalid data type %s for semantic variable %s.", string->buffer, var->name); + hlsl_release_string_buffer(ctx, string); + put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN); + } + put_u32(&buffer, reg_idx); + put_u32(&buffer, vkd3d_make_u16(width, use_mask)); + } + + i = 0; + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + const char *semantic = var->semantic.name; + size_t string_offset; + D3D_NAME usage; + + if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) + continue; + + hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + if (usage == ~0u) + continue; + + if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) + string_offset = put_string(&buffer, "SV_Target"); + else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) + string_offset = put_string(&buffer, "SV_Depth"); + else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) + string_offset = put_string(&buffer, "SV_Position"); + else + string_offset = put_string(&buffer, semantic); + set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); + } + + set_u32(&buffer, count_position, i); + + add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); +} + +static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) +{ + switch (type->class) + { + case HLSL_CLASS_ARRAY: + return sm4_class(type->e.array.type); + case HLSL_CLASS_MATRIX: + assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3D_SVC_MATRIX_COLUMNS; + else + return D3D_SVC_MATRIX_ROWS; + case HLSL_CLASS_OBJECT: + return D3D_SVC_OBJECT; + case HLSL_CLASS_SCALAR: + return D3D_SVC_SCALAR; + case HLSL_CLASS_STRUCT: + return D3D_SVC_STRUCT; + case HLSL_CLASS_VECTOR: + return D3D_SVC_VECTOR; + default: + ERR("Invalid class %#x.\n", type->class); + vkd3d_unreachable(); + } +} + +static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) +{ + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + return D3D_SVT_BOOL; + case HLSL_TYPE_DOUBLE: + return D3D_SVT_DOUBLE; + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3D_SVT_FLOAT; + case HLSL_TYPE_INT: + return D3D_SVT_INT; + case HLSL_TYPE_PIXELSHADER: + return D3D_SVT_PIXELSHADER; + case HLSL_TYPE_SAMPLER: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3D_SVT_SAMPLER1D; + case HLSL_SAMPLER_DIM_2D: + return D3D_SVT_SAMPLER2D; + case HLSL_SAMPLER_DIM_3D: + return D3D_SVT_SAMPLER3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3D_SVT_SAMPLERCUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3D_SVT_SAMPLER; + default: + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_STRING: + return D3D_SVT_STRING; + case HLSL_TYPE_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3D_SVT_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3D_SVT_TEXTURE2D; + case HLSL_SAMPLER_DIM_2DMS: + return D3D_SVT_TEXTURE2DMS; + case HLSL_SAMPLER_DIM_3D: + return D3D_SVT_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3D_SVT_TEXTURECUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3D_SVT_TEXTURE; + default: + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_UINT: + return D3D_SVT_UINT; + case HLSL_TYPE_VERTEXSHADER: + return D3D_SVT_VERTEXSHADER; + case HLSL_TYPE_VOID: + return D3D_SVT_VOID; + default: + vkd3d_unreachable(); + } +} + +static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) +{ + const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); + const char *name = array_type->name ? array_type->name : "<unnamed>"; + const struct hlsl_profile_info *profile = ctx->profile; + unsigned int field_count = 0, array_size = 0; + size_t fields_offset = 0, name_offset = 0; + size_t i; + + if (type->bytecode_offset) + return; + + if (profile->major_version >= 5) + name_offset = put_string(buffer, name); + + if (type->class == HLSL_CLASS_ARRAY) + array_size = hlsl_get_multiarray_size(type); + + if (array_type->class == HLSL_CLASS_STRUCT) + { + field_count = array_type->e.record.field_count; + + for (i = 0; i < field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + field->name_bytecode_offset = put_string(buffer, field->name); + write_sm4_type(ctx, buffer, field->type); + } + + fields_offset = bytecode_align(buffer); + + for (i = 0; i < field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + put_u32(buffer, field->name_bytecode_offset); + put_u32(buffer, field->type->bytecode_offset); + put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); + } + } + + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type))); + put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); + put_u32(buffer, vkd3d_make_u32(array_size, field_count)); + put_u32(buffer, fields_offset); + + if (profile->major_version >= 5) + { + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, name_offset); + } +} + +static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_ARRAY) + return sm4_resource_type(type->e.array.type); + + switch (type->base_type) + { + case HLSL_TYPE_SAMPLER: + return D3D_SIT_SAMPLER; + case HLSL_TYPE_TEXTURE: + return D3D_SIT_TEXTURE; + case HLSL_TYPE_UAV: + return D3D_SIT_UAV_RWTYPED; + default: + vkd3d_unreachable(); + } +} + +static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_ARRAY) + return sm4_resource_format(type->e.array.type); + + switch (type->e.resource_format->base_type) + { + case HLSL_TYPE_DOUBLE: + return D3D_RETURN_TYPE_DOUBLE; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3D_RETURN_TYPE_FLOAT; + + case HLSL_TYPE_INT: + return D3D_RETURN_TYPE_SINT; + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + return D3D_RETURN_TYPE_UINT; + + default: + vkd3d_unreachable(); + } +} + +static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_ARRAY) + return sm4_rdef_resource_dimension(type->e.array.type); + + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3D_SRV_DIMENSION_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3D_SRV_DIMENSION_TEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3D_SRV_DIMENSION_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3D_SRV_DIMENSION_TEXTURECUBE; + case HLSL_SAMPLER_DIM_1DARRAY: + return D3D_SRV_DIMENSION_TEXTURE1DARRAY; + case HLSL_SAMPLER_DIM_2DARRAY: + return D3D_SRV_DIMENSION_TEXTURE2DARRAY; + case HLSL_SAMPLER_DIM_2DMS: + return D3D_SRV_DIMENSION_TEXTURE2DMS; + case HLSL_SAMPLER_DIM_2DMSARRAY: + return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; + case HLSL_SAMPLER_DIM_CUBEARRAY: + return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; + case HLSL_SAMPLER_DIM_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + return D3D_SRV_DIMENSION_BUFFER; + default: + vkd3d_unreachable(); + } +} + +static int sm4_compare_extern_resources(const void *a, const void *b) +{ + const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; + const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; + enum hlsl_regset aa_regset, bb_regset; + + aa_regset = hlsl_type_get_regset(aa->data_type); + bb_regset = hlsl_type_get_regset(bb->data_type); + + if (aa_regset != bb_regset) + return aa_regset - bb_regset; + + return aa->regs[aa_regset].id - bb->regs[bb_regset].id; +} + +static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) +{ + const struct hlsl_ir_var **extern_resources = NULL; + const struct hlsl_ir_var *var; + enum hlsl_regset regset; + size_t capacity = 0; + + *count = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!hlsl_type_is_resource(var->data_type)) + continue; + regset = hlsl_type_get_regset(var->data_type); + if (!var->regs[regset].allocated) + continue; + + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, + sizeof(*extern_resources)))) + { + *count = 0; + return NULL; + } + + extern_resources[*count] = var; + ++*count; + } + + qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); + return extern_resources; +} + +static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +{ + unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; + size_t cbuffers_offset, resources_offset, creator_offset, string_offset; + size_t cbuffer_position, resource_position, creator_position; + const struct hlsl_profile_info *profile = ctx->profile; + const struct hlsl_ir_var **extern_resources; + struct vkd3d_bytecode_buffer buffer = {0}; + const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; + + static const uint16_t target_types[] = + { + 0xffff, /* PIXEL */ + 0xfffe, /* VERTEX */ + 0x4753, /* GEOMETRY */ + 0x4853, /* HULL */ + 0x4453, /* DOMAIN */ + 0x4353, /* COMPUTE */ + }; + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + + resource_count += extern_resources_count; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) + { + ++cbuffer_count; + ++resource_count; + } + } + + put_u32(&buffer, cbuffer_count); + cbuffer_position = put_u32(&buffer, 0); + put_u32(&buffer, resource_count); + resource_position = put_u32(&buffer, 0); + put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), + target_types[profile->type])); + put_u32(&buffer, 0); /* FIXME: compilation flags */ + creator_position = put_u32(&buffer, 0); + + if (profile->major_version >= 5) + { + put_u32(&buffer, TAG_RD11); + put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ + put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ + put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ + put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ + put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ + put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ + put_u32(&buffer, 0); /* unknown; possibly a null terminator */ + } + + /* Bound resources. */ + + resources_offset = bytecode_align(&buffer); + set_u32(&buffer, resource_position, resources_offset); + + for (i = 0; i < extern_resources_count; ++i) + { + enum hlsl_regset regset; + uint32_t flags = 0; + + var = extern_resources[i]; + regset = hlsl_type_get_regset(var->data_type); + + if (var->reg_reservation.reg_type) + flags |= D3D_SIF_USERPACKED; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, sm4_resource_type(var->data_type)); + if (regset == HLSL_REGSET_SAMPLERS) + { + put_u32(&buffer, 0); + put_u32(&buffer, 0); + put_u32(&buffer, 0); + } + else + { + unsigned int dimx = hlsl_type_get_component_type(ctx, var->data_type, 0)->e.resource_format->dimx; + + put_u32(&buffer, sm4_resource_format(var->data_type)); + put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); + put_u32(&buffer, ~0u); /* FIXME: multisample count */ + flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; + } + put_u32(&buffer, var->regs[regset].id); + put_u32(&buffer, var->regs[regset].bind_count); + put_u32(&buffer, flags); + } + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + uint32_t flags = 0; + + if (!cbuffer->reg.allocated) + continue; + + if (cbuffer->reservation.reg_type) + flags |= D3D_SIF_USERPACKED; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); + put_u32(&buffer, 0); /* return type */ + put_u32(&buffer, 0); /* dimension */ + put_u32(&buffer, 0); /* multisample count */ + put_u32(&buffer, cbuffer->reg.id); /* bind point */ + put_u32(&buffer, 1); /* bind count */ + put_u32(&buffer, flags); /* flags */ + } + + for (i = 0; i < extern_resources_count; ++i) + { + var = extern_resources[i]; + + string_offset = put_string(&buffer, var->name); + set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); + } + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!cbuffer->reg.allocated) + continue; + + string_offset = put_string(&buffer, cbuffer->name); + set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); + } + + /* Buffers. */ + + cbuffers_offset = bytecode_align(&buffer); + set_u32(&buffer, cbuffer_position, cbuffers_offset); + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + unsigned int var_count = 0; + + if (!cbuffer->reg.allocated) + continue; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->buffer == cbuffer) + ++var_count; + } + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, var_count); + put_u32(&buffer, 0); /* variable offset */ + put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); + put_u32(&buffer, 0); /* FIXME: flags */ + put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); + } + + i = 0; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!cbuffer->reg.allocated) + continue; + + string_offset = put_string(&buffer, cbuffer->name); + set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); + } + + i = 0; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + size_t vars_start = bytecode_align(&buffer); + + if (!cbuffer->reg.allocated) + continue; + + set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->buffer == cbuffer) + { + uint32_t flags = 0; + + if (var->last_read) + flags |= D3D_SVF_USED; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, var->buffer_offset * sizeof(float)); + put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); + put_u32(&buffer, flags); + put_u32(&buffer, 0); /* type */ + put_u32(&buffer, 0); /* FIXME: default value */ + + if (profile->major_version >= 5) + { + put_u32(&buffer, 0); /* texture start */ + put_u32(&buffer, 0); /* texture count */ + put_u32(&buffer, 0); /* sampler start */ + put_u32(&buffer, 0); /* sampler count */ + } + } + } + + j = 0; + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->buffer == cbuffer) + { + const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); + size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); + size_t string_offset = put_string(&buffer, var->name); + + set_u32(&buffer, var_offset, string_offset); + write_sm4_type(ctx, &buffer, var->data_type); + set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); + ++j; + } + } + } + + creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(&buffer, creator_position, creator_offset); + + add_section(dxbc, TAG_RDEF, &buffer); + + vkd3d_free(extern_resources); +} + +static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) +{ + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return VKD3D_SM4_RESOURCE_TEXTURE_1D; + case HLSL_SAMPLER_DIM_2D: + return VKD3D_SM4_RESOURCE_TEXTURE_2D; + case HLSL_SAMPLER_DIM_3D: + return VKD3D_SM4_RESOURCE_TEXTURE_3D; + case HLSL_SAMPLER_DIM_CUBE: + return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; + case HLSL_SAMPLER_DIM_1DARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; + case HLSL_SAMPLER_DIM_2DARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; + case HLSL_SAMPLER_DIM_2DMS: + return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; + case HLSL_SAMPLER_DIM_2DMSARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; + case HLSL_SAMPLER_DIM_CUBEARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; + case HLSL_SAMPLER_DIM_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + return VKD3D_SM4_RESOURCE_BUFFER; + default: + vkd3d_unreachable(); + } +} + +struct sm4_instruction_modifier +{ + enum vkd3d_sm4_instruction_modifier type; + + union + { + struct + { + int u, v, w; + } aoffimmi; + } u; +}; + +static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_modifier *imod) +{ + uint32_t word = 0; + + word |= VKD3D_SM4_MODIFIER_MASK & imod->type; + + switch (imod->type) + { + case VKD3D_SM4_MODIFIER_AOFFIMMI: + assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); + assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); + assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); + word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; + word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; + word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; + break; + + default: + vkd3d_unreachable(); + } + + return word; +} + +struct sm4_register +{ + enum vkd3d_sm4_register_type type; + uint32_t idx[2]; + unsigned int idx_count; + enum vkd3d_sm4_dimension dim; + uint32_t immconst_uint[4]; + unsigned int mod; +}; + +struct sm4_instruction +{ + enum vkd3d_sm4_opcode opcode; + + struct sm4_instruction_modifier modifiers[1]; + unsigned int modifier_count; + + struct sm4_dst_register + { + struct sm4_register reg; + unsigned int writemask; + } dsts[2]; + unsigned int dst_count; + + struct sm4_src_register + { + struct sm4_register reg; + enum vkd3d_sm4_swizzle_type swizzle_type; + unsigned int swizzle; + } srcs[5]; + unsigned int src_count; + + unsigned int byte_stride; + + uint32_t idx[3]; + unsigned int idx_count; +}; + +static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, + unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, + const struct hlsl_deref *deref, const struct hlsl_type *data_type) +{ + const struct hlsl_ir_var *var = deref->var; + + if (var->is_uniform) + { + enum hlsl_regset regset = hlsl_type_get_regset(data_type); + + if (regset == HLSL_REGSET_TEXTURES) + { + reg->type = VKD3D_SM4_RT_RESOURCE; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_TEXTURES); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_UAVS) + { + reg->type = VKD3D_SM5_RT_UAV; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_UAVS); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_SAMPLERS) + { + reg->type = VKD3D_SM4_RT_SAMPLER; + reg->dim = VKD3D_SM4_DIMENSION_NONE; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_SAMPLERS); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; + + assert(data_type->class <= HLSL_CLASS_VECTOR); + reg->type = VKD3D_SM4_RT_CONSTBUFFER; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = var->buffer->reg.id; + reg->idx[1] = offset / 4; + reg->idx_count = 2; + *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); + } + } + else if (var->is_input_semantic) + { + bool has_idx; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, swizzle_type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + + if (has_idx) + { + reg->idx[0] = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); + reg->type = VKD3D_SM4_RT_INPUT; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + else if (var->is_output_semantic) + { + bool has_idx; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, swizzle_type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + + if (has_idx) + { + reg->idx[0] = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + + if (reg->type == VKD3D_SM4_RT_DEPTHOUT) + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + else + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); + reg->type = VKD3D_SM4_RT_OUTPUT; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + reg->idx[0] = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); + reg->type = VKD3D_SM4_RT_TEMP; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } +} + +static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, + const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) +{ + unsigned int writemask; + + sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); + if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) + src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +} + +static void sm4_register_from_node(struct sm4_register *reg, unsigned int *writemask, + enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) +{ + assert(instr->reg.allocated); + reg->type = VKD3D_SM4_RT_TEMP; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = instr->reg.id; + reg->idx_count = 1; + *writemask = instr->reg.writemask; +} + +static void sm4_dst_from_node(struct sm4_dst_register *dst, const struct hlsl_ir_node *instr) +{ + unsigned int swizzle_type; + + sm4_register_from_node(&dst->reg, &dst->writemask, &swizzle_type, instr); +} + +static void sm4_src_from_constant_value(struct sm4_src_register *src, + const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) +{ + src->swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + src->reg.type = VKD3D_SM4_RT_IMMCONST; + if (width == 1) + { + src->reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + src->reg.immconst_uint[0] = value->u[0].u; + } + else + { + unsigned int i, j = 0; + + src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; + for (i = 0; i < 4; ++i) + { + if (map_writemask & (1u << i)) + src->reg.immconst_uint[i] = value->u[j++].u; + } + } +} + +static void sm4_src_from_node(struct sm4_src_register *src, + const struct hlsl_ir_node *instr, unsigned int map_writemask) +{ + unsigned int writemask; + + if (instr->type == HLSL_IR_CONSTANT) + { + struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + + sm4_src_from_constant_value(src, &constant->value, instr->data_type->dimx, map_writemask); + return; + } + + sm4_register_from_node(&src->reg, &writemask, &src->swizzle_type, instr); + if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) + src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +} + +static uint32_t sm4_encode_register(const struct sm4_register *reg) +{ + return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) + | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) + | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); +} + +static uint32_t sm4_register_order(const struct sm4_register *reg) +{ + uint32_t order = 1; + if (reg->type == VKD3D_SM4_RT_IMMCONST) + order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; + order += reg->idx_count; + if (reg->mod) + ++order; + return order; +} + +static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) +{ + uint32_t token = instr->opcode; + unsigned int size = 1, i, j; + + size += instr->modifier_count; + for (i = 0; i < instr->dst_count; ++i) + size += sm4_register_order(&instr->dsts[i].reg); + for (i = 0; i < instr->src_count; ++i) + size += sm4_register_order(&instr->srcs[i].reg); + size += instr->idx_count; + if (instr->byte_stride) + ++size; + + token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); + + if (instr->modifier_count > 0) + token |= VKD3D_SM4_INSTRUCTION_MODIFIER; + put_u32(buffer, token); + + for (i = 0; i < instr->modifier_count; ++i) + { + token = sm4_encode_instruction_modifier(&instr->modifiers[i]); + if (instr->modifier_count > i + 1) + token |= VKD3D_SM4_INSTRUCTION_MODIFIER; + put_u32(buffer, token); + } + + for (i = 0; i < instr->dst_count; ++i) + { + token = sm4_encode_register(&instr->dsts[i].reg); + if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) + token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; + put_u32(buffer, token); + + for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) + put_u32(buffer, instr->dsts[i].reg.idx[j]); + } + + for (i = 0; i < instr->src_count; ++i) + { + token = sm4_encode_register(&instr->srcs[i].reg); + token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; + if (instr->srcs[i].reg.mod) + token |= VKD3D_SM4_EXTENDED_OPERAND; + put_u32(buffer, token); + + if (instr->srcs[i].reg.mod) + put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) + | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); + + for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) + put_u32(buffer, instr->srcs[i].reg.idx[j]); + + if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) + { + put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); + if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) + { + put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); + put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); + put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); + } + } + } + + if (instr->byte_stride) + put_u32(buffer, instr->byte_stride); + + for (j = 0; j < instr->idx_count; ++j) + put_u32(buffer, instr->idx[j]); +} + +static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, + const struct hlsl_ir_node *texel_offset) +{ + struct sm4_instruction_modifier modif; + struct hlsl_ir_constant *offset; + + if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) + return false; + offset = hlsl_ir_constant(texel_offset); + + modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; + modif.u.aoffimmi.u = offset->value.u[0].i; + modif.u.aoffimmi.v = 0; + modif.u.aoffimmi.w = 0; + if (offset->node.data_type->dimx > 1) + modif.u.aoffimmi.v = offset->value.u[1].i; + if (offset->node.data_type->dimx > 2) + modif.u.aoffimmi.w = offset->value.u[2].i; + if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 + || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 + || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) + return false; + + instr->modifiers[instr->modifier_count++] = modif; + return true; +} + +static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) +{ + const struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, + + .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, + .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, + .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, + .srcs[0].reg.idx_count = 2, + .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, + .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), + .src_count = 1, + }; + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +{ + unsigned int i, count = var->data_type->reg_size[HLSL_REGSET_SAMPLERS]; + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + + .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + }; + + if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) + instr.opcode |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; + + for (i = 0; i < count; ++i) + { + if (!var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + continue; + + instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id + i; + write_sm4_instruction(buffer, &instr); + } +} + +static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_var *var, bool uav) +{ + enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; + unsigned int i, count = var->data_type->reg_size[regset]; + struct hlsl_type *component_type; + struct sm4_instruction instr; + + component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); + + for (i = 0; i < count; ++i) + { + if (!var->objects_usage[regset][i].used) + continue; + + instr = (struct sm4_instruction) + { + .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, + .dsts[0].reg.idx = {var->regs[regset].id + i}, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + + .idx[0] = sm4_resource_format(component_type) * 0x1111, + .idx_count = 1, + }; + + if (uav) + { + switch (var->data_type->sampler_dim) + { + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; + instr.byte_stride = var->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; + break; + default: + instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; + break; + } + } + else + { + instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; + } + instr.opcode |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); + + if (component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + instr.opcode |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; + } + + write_sm4_instruction(buffer, &instr); + } +} + +static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +{ + const struct hlsl_profile_info *profile = ctx->profile; + const bool output = var->is_output_semantic; + D3D_NAME usage; + bool has_idx; + + struct sm4_instruction instr = + { + .dsts[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, + .dst_count = 1, + }; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) + { + if (has_idx) + { + instr.dsts[0].reg.idx[0] = var->semantic.index; + instr.dsts[0].reg.idx_count = 1; + } + else + { + instr.dsts[0].reg.idx_count = 0; + } + instr.dsts[0].writemask = (1 << var->data_type->dimx) - 1; + } + else + { + instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; + instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; + instr.dsts[0].reg.idx_count = 1; + instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; + } + + if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) + instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + + hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + if (usage == ~0u) + usage = D3D_NAME_UNDEFINED; + + if (var->is_input_semantic) + { + switch (usage) + { + case D3D_NAME_UNDEFINED: + instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; + break; + + case D3D_NAME_INSTANCE_ID: + case D3D_NAME_PRIMITIVE_ID: + case D3D_NAME_VERTEX_ID: + instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; + break; + + default: + instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; + break; + } + + if (profile->type == VKD3D_SHADER_TYPE_PIXEL) + { + enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; + + if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type)) + mode = VKD3DSIM_CONSTANT; + + instr.opcode |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; + } + } + else + { + if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) + instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; + else + instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; + } + + switch (usage) + { + case D3D_NAME_COVERAGE: + case D3D_NAME_DEPTH: + case D3D_NAME_DEPTH_GREATER_EQUAL: + case D3D_NAME_DEPTH_LESS_EQUAL: + case D3D_NAME_TARGET: + case D3D_NAME_UNDEFINED: + break; + + default: + instr.idx_count = 1; + instr.idx[0] = usage; + break; + } + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_TEMPS, + + .idx = {temp_count}, + .idx_count = 1, + }; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, + + .idx = {thread_count[0], thread_count[1], thread_count[2]}, + .idx_count = 3, + }; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_RET, + }; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], src, instr.dsts[0].writemask); + instr.srcs[0].reg.mod = src_mod; + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, + enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, + const struct hlsl_ir_node *src) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + assert(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + assert(1 - dst_idx >= 0); + instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; + instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; + instr.dsts[1 - dst_idx].reg.idx_count = 0; + instr.dst_count = 2; + + sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask); + sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +/* dp# instructions don't map the swizzle. */ +static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, + enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, + const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + assert(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + assert(1 - dst_idx >= 0); + instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; + instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; + instr.dsts[1 - dst_idx].reg.idx_count = 0; + instr.dst_count = 2; + + sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[dst_idx].writemask); + sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, + const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, + enum hlsl_sampler_dim dim) +{ + bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE + && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); + bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); + unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + if (uav) + instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; + else + instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; + + if (texel_offset) + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return; + } + } + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + if (!uav) + { + /* Mipmap level is in the last component in the IR, but needs to be in the W + * component in the instruction. */ + unsigned int dim_count = hlsl_sampler_dim_count(dim); + + if (dim_count == 1) + coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; + if (dim_count == 2) + coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; + } + + sm4_src_from_node(&instr.srcs[0], coords, coords_writemask); + + sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); + + instr.src_count = 2; + + if (multisampled) + { + if (sample_index->type == HLSL_IR_CONSTANT) + { + struct sm4_register *reg = &instr.srcs[2].reg; + struct hlsl_ir_constant *index; + + index = hlsl_ir_constant(sample_index); + + memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); + instr.srcs[2].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + reg->type = VKD3D_SM4_RT_IMMCONST; + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + reg->immconst_uint[0] = index->value.u[0].u; + } + else if (ctx->profile->major_version == 4 && ctx->profile->minor_version == 0) + { + hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); + } + else + { + sm4_src_from_node(&instr.srcs[2], sample_index, 0); + } + + ++instr.src_count; + } + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_type *resource_type = load->resource.var->data_type; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_deref *sampler = &load->sampler; + const struct hlsl_ir_node *dst = &load->node; + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + switch (load->load_type) + { + case HLSL_RESOURCE_SAMPLE: + instr.opcode = VKD3D_SM4_OP_SAMPLE; + break; + + case HLSL_RESOURCE_SAMPLE_CMP: + instr.opcode = VKD3D_SM4_OP_SAMPLE_C; + break; + + case HLSL_RESOURCE_SAMPLE_CMP_LZ: + instr.opcode = VKD3D_SM4_OP_SAMPLE_C_LZ; + break; + + case HLSL_RESOURCE_SAMPLE_LOD: + instr.opcode = VKD3D_SM4_OP_SAMPLE_LOD; + break; + + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + instr.opcode = VKD3D_SM4_OP_SAMPLE_B; + break; + + case HLSL_RESOURCE_SAMPLE_GRAD: + instr.opcode = VKD3D_SM4_OP_SAMPLE_GRAD; + break; + + default: + vkd3d_unreachable(); + } + + if (texel_offset) + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return; + } + } + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); + sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 3; + + if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD + || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) + { + sm4_src_from_node(&instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL); + ++instr.src_count; + } + else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) + { + sm4_src_from_node(&instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_node(&instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL); + instr.src_count += 2; + } + else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP + || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ) + { + sm4_src_from_node(&instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL); + ++instr.src_count; + } + + write_sm4_instruction(buffer, &instr); +} + +static bool type_is_float(const struct hlsl_type *type) +{ + return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; +} + +static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, + const struct hlsl_ir_node *arg, uint32_t mask) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_AND; + + sm4_dst_from_node(&instr.dsts[0], &expr->node); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); + instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; + instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + instr.srcs[1].reg.immconst_uint[0] = mask; + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_cast(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) +{ + static const union + { + uint32_t u; + float f; + } one = { .f = 1.0 }; + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_type *dst_type = expr->node.data_type; + const struct hlsl_type *src_type = arg1->data_type; + + /* Narrowing casts were already lowered. */ + assert(src_type->dimx == dst_type->dimx); + + switch (dst_type->base_type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + switch (src_type->base_type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_UINT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: + write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_TYPE_INT: + switch (src_type->base_type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: + write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_TYPE_UINT: + switch (src_type->base_type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: + write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); + break; + + case HLSL_TYPE_BOOL: + /* Casts to bool should have already been lowered. */ + default: + vkd3d_unreachable(); + } +} + +static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; + + sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_expr(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) +{ + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_ir_node *arg2 = expr->operands[1].node; + const struct hlsl_type *dst_type = expr->node.data_type; + struct vkd3d_string_buffer *dst_type_string; + + assert(expr->node.reg.allocated); + + if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) + return; + + switch (expr->op) + { + case HLSL_OP1_ABS: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP1_BIT_NOT: + assert(type_is_integer(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_CAST: + write_sm4_cast(ctx, buffer, expr); + break; + + case HLSL_OP1_COS: + assert(type_is_float(dst_type)); + write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); + break; + + case HLSL_OP1_DSX: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); + break; + + case HLSL_OP1_EXP2: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); + break; + + case HLSL_OP1_FLOOR: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); + break; + + case HLSL_OP1_FRACT: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); + break; + + case HLSL_OP1_LOG2: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); + break; + + case HLSL_OP1_LOGIC_NOT: + assert(dst_type->base_type == HLSL_TYPE_BOOL); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_NEG: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP1_REINTERPRET: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_OP1_ROUND: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_RSQ: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); + break; + + case HLSL_OP1_SAT: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV + | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), + &expr->node, arg1, 0); + break; + + case HLSL_OP1_SIN: + assert(type_is_float(dst_type)); + write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); + break; + + case HLSL_OP1_SQRT: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_TRUNC: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); + break; + + case HLSL_OP2_ADD: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_BIT_AND: + assert(type_is_integer(dst_type)); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_BIT_OR: + assert(type_is_integer(dst_type)); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_BIT_XOR: + assert(type_is_integer(dst_type)); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_DIV: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: + write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_DOT: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + switch (arg1->data_type->dimx) + { + case 4: + write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); + break; + + case 3: + write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); + break; + + case 2: + write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); + break; + + case 1: + default: + vkd3d_unreachable(); + } + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_EQUAL: + { + const struct hlsl_type *src_type = arg1->data_type; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between "%s" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + case HLSL_OP2_GEQUAL: + { + const struct hlsl_type *src_type = arg1->data_type; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between "%s" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + case HLSL_OP2_LESS: + { + const struct hlsl_type *src_type = arg1->data_type; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between "%s" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + case HLSL_OP2_LOGIC_AND: + assert(dst_type->base_type == HLSL_TYPE_BOOL); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LOGIC_OR: + assert(dst_type->base_type == HLSL_TYPE_BOOL); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LSHIFT: + assert(type_is_integer(dst_type)); + assert(dst_type->base_type != HLSL_TYPE_BOOL); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_MAX: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_MIN: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_MOD: + switch (dst_type->base_type) + { + case HLSL_TYPE_UINT: + write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_MUL: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + /* Using IMUL instead of UMUL because we're taking the low + * bits, and the native compiler generates IMUL. */ + write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_NEQUAL: + { + const struct hlsl_type *src_type = arg1->data_type; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between "%s" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + case HLSL_OP2_RSHIFT: + assert(type_is_integer(dst_type)); + assert(dst_type->base_type != HLSL_TYPE_BOOL); + write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, + &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); + } + + hlsl_release_string_buffer(ctx, dst_type_string); +} + +static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, + .src_count = 1, + }; + + assert(iff->condition.node->data_type->dimx == 1); + + sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); + write_sm4_instruction(buffer, &instr); + + write_sm4_block(ctx, buffer, &iff->then_block); + + if (!list_empty(&iff->else_block.instrs)) + { + instr.opcode = VKD3D_SM4_OP_ELSE; + instr.src_count = 0; + write_sm4_instruction(buffer, &instr); + + write_sm4_block(ctx, buffer, &iff->else_block); + } + + instr.opcode = VKD3D_SM4_OP_ENDIF; + instr.src_count = 0; + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_jump(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) +{ + struct sm4_instruction instr = {0}; + + switch (jump->type) + { + case HLSL_IR_JUMP_BREAK: + instr.opcode = VKD3D_SM4_OP_BREAK; + break; + + case HLSL_IR_JUMP_DISCARD: + { + struct sm4_register *reg = &instr.srcs[0].reg; + + instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; + + memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); + instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + instr.src_count = 1; + reg->type = VKD3D_SM4_RT_IMMCONST; + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + reg->immconst_uint[0] = ~0u; + + break; + } + + case HLSL_IR_JUMP_RETURN: + vkd3d_unreachable(); + + default: + hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); + return; + } + + write_sm4_instruction(buffer, &instr); +} + +/* Does this variable's data come directly from the API user, rather than being + * temporary or from a previous shader stage? + * I.e. is it a uniform or VS input? */ +static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) +{ + if (var->is_uniform) + return true; + + return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; +} + +static void write_sm4_load(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) +{ + const struct hlsl_type *type = load->node.data_type; + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + + sm4_dst_from_node(&instr.dsts[0], &load->node); + instr.dst_count = 1; + + assert(type->class <= HLSL_CLASS_LAST_NUMERIC); + if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(ctx, load->src.var)) + { + struct hlsl_constant_value value; + + /* Uniform bools can be specified as anything, but internal bools always + * have 0 for false and ~0 for true. Normalize that here. */ + + instr.opcode = VKD3D_SM4_OP_MOVC; + + sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); + + memset(&value, 0xff, sizeof(value)); + sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].writemask); + memset(&value, 0, sizeof(value)); + sm4_src_from_constant_value(&instr.srcs[2], &value, type->dimx, instr.dsts[0].writemask); + instr.src_count = 3; + } + else + { + instr.opcode = VKD3D_SM4_OP_MOV; + + sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); + instr.src_count = 1; + } + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_loop(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_LOOP, + }; + + write_sm4_instruction(buffer, &instr); + + write_sm4_block(ctx, buffer, &loop->body); + + instr.opcode = VKD3D_SM4_OP_ENDLOOP; + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_deref *sampler, + const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) +{ + struct sm4_src_register *src; + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + + instr.opcode = VKD3D_SM4_OP_GATHER4; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); + + if (texel_offset) + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { + if (ctx->profile->major_version < 5) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); + return; + } + instr.opcode = VKD3D_SM5_OP_GATHER4_PO; + sm4_src_from_node(&instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); + } + } + + sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); + + src = &instr.srcs[instr.src_count++]; + sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; + src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; + src->swizzle = swizzle; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_resource_load(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_type *resource_type = load->resource.var->data_type; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *sample_index = load->sample_index.node; + const struct hlsl_ir_node *coords = load->coords.node; + + if (!hlsl_type_is_resource(resource_type)) + { + hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); + return; + } + + if (load->sampler.var) + { + const struct hlsl_type *sampler_type = load->sampler.var->data_type; + + if (!hlsl_type_is_resource(sampler_type)) + { + hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); + return; + } + + if (!load->sampler.var->is_uniform) + { + hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); + return; + } + } + + if (!load->resource.var->is_uniform) + { + hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); + return; + } + + switch (load->load_type) + { + case HLSL_RESOURCE_LOAD: + write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, + coords, sample_index, texel_offset, load->sampling_dim); + break; + + case HLSL_RESOURCE_SAMPLE: + case HLSL_RESOURCE_SAMPLE_CMP: + case HLSL_RESOURCE_SAMPLE_CMP_LZ: + case HLSL_RESOURCE_SAMPLE_LOD: + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + case HLSL_RESOURCE_SAMPLE_GRAD: + if (!load->sampler.var) + { + hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); + return; + } + write_sm4_sample(ctx, buffer, load); + break; + + case HLSL_RESOURCE_GATHER_RED: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_GREEN: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_BLUE: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_ALPHA: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); + break; + } +} + +static void write_sm4_resource_store(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) +{ + const struct hlsl_type *resource_type = store->resource.var->data_type; + + if (!hlsl_type_is_resource(resource_type)) + { + hlsl_fixme(ctx, &store->node.loc, "Separate object fields as new variables."); + return; + } + + if (!store->resource.var->is_uniform) + { + hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); + return; + } + + if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.\n"); + return; + } + + write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); +} + +static void write_sm4_store(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) +{ + const struct hlsl_ir_node *rhs = store->rhs.node; + struct sm4_instruction instr; + unsigned int writemask; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_MOV; + + sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); + instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_swizzle(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) +{ + struct sm4_instruction instr; + unsigned int writemask; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_MOV; + + sm4_dst_from_node(&instr.dsts[0], &swizzle->node); + instr.dst_count = 1; + + sm4_register_from_node(&instr.srcs[0].reg, &writemask, &instr.srcs[0].swizzle_type, swizzle->val.node); + instr.srcs[0].swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), + swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_block *block) +{ + const struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->class == HLSL_CLASS_MATRIX) + { + hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); + break; + } + else if (instr->data_type->class == HLSL_CLASS_OBJECT) + { + hlsl_fixme(ctx, &instr->loc, "Object copy."); + break; + } + + assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); + + if (!instr->reg.allocated) + { + assert(instr->type == HLSL_IR_CONSTANT); + continue; + } + } + + switch (instr->type) + { + case HLSL_IR_CALL: + case HLSL_IR_CONSTANT: + vkd3d_unreachable(); + + case HLSL_IR_EXPR: + write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); + break; + + case HLSL_IR_IF: + write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); + break; + + case HLSL_IR_JUMP: + write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); + break; + + case HLSL_IR_LOAD: + write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); + break; + + case HLSL_IR_RESOURCE_LOAD: + write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); + break; + + case HLSL_IR_RESOURCE_STORE: + write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); + break; + + case HLSL_IR_LOOP: + write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); + break; + + case HLSL_IR_STORE: + write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); + break; + + case HLSL_IR_SWIZZLE: + write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + } + } +} + +static void write_sm4_shdr(struct hlsl_ctx *ctx, + const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) +{ + const struct hlsl_profile_info *profile = ctx->profile; + const struct hlsl_ir_var **extern_resources; + struct vkd3d_bytecode_buffer buffer = {0}; + unsigned int extern_resources_count, i; + const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; + size_t token_count_position; + + static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = + { + VKD3D_SM4_PS, + VKD3D_SM4_VS, + VKD3D_SM4_GS, + VKD3D_SM5_HS, + VKD3D_SM5_DS, + VKD3D_SM5_CS, + 0, /* EFFECT */ + 0, /* TEXTURE */ + VKD3D_SM4_LIB, + }; + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + + put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); + token_count_position = put_u32(&buffer, 0); + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) + write_sm4_dcl_constant_buffer(&buffer, cbuffer); + } + + for (i = 0; i < extern_resources_count; ++i) + { + enum hlsl_regset regset; + + var = extern_resources[i]; + regset = hlsl_type_get_regset(var->data_type); + + if (regset == HLSL_REGSET_SAMPLERS) + write_sm4_dcl_samplers(&buffer, var); + else if (regset == HLSL_REGSET_TEXTURES) + write_sm4_dcl_textures(ctx, &buffer, var, false); + else if (regset == HLSL_REGSET_UAVS) + write_sm4_dcl_textures(ctx, &buffer, var, true); + } + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) + write_sm4_dcl_semantic(ctx, &buffer, var); + } + + if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) + write_sm4_dcl_thread_group(&buffer, ctx->thread_count); + + if (ctx->temp_count) + write_sm4_dcl_temps(&buffer, ctx->temp_count); + + write_sm4_block(ctx, &buffer, &entry_func->body); + + write_sm4_ret(&buffer); + + set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); + + add_section(dxbc, TAG_SHDR, &buffer); + + vkd3d_free(extern_resources); +} + +int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +{ + struct dxbc_writer dxbc; + size_t i; + int ret; + + dxbc_writer_init(&dxbc); + + write_sm4_signature(ctx, &dxbc, false); + write_sm4_signature(ctx, &dxbc, true); + write_sm4_rdef(ctx, &dxbc); + write_sm4_shdr(ctx, entry_func, &dxbc); + + if (!(ret = ctx->result)) + ret = dxbc_writer_write(&dxbc, out); + for (i = 0; i < dxbc.section_count; ++i) + vkd3d_shader_free_shader_code(&dxbc.sections[i].data); + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index c9c15f01155..b8f36df07f1 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -338,22 +338,35 @@ void vkd3d_shader_error(struct vkd3d_shader_message_context *context, const stru va_end(args); }
+size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer) +{ + size_t aligned_size = align(buffer->size, 4); + + if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, aligned_size, 1)) + { + buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; + return aligned_size; + } + + memset(buffer->data + buffer->size, 0xab, aligned_size - buffer->size); + buffer->size = aligned_size; + return aligned_size; +} + size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size) { - size_t aligned_size = align(size, 4); - size_t offset = buffer->size; + size_t offset = bytecode_align(buffer);
if (buffer->status) return offset;
- if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, offset + aligned_size, 1)) + if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, offset + size, 1)) { buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; return offset; } memcpy(buffer->data + offset, bytes, size); - memset(buffer->data + offset + size, 0xab, aligned_size - size); - buffer->size = offset + aligned_size; + buffer->size = offset + size; return offset; }
@@ -1070,7 +1083,7 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info
if (TRACE_ON()) { - vkd3d_shader_trace(parser); + vkd3d_shader_trace(&parser->instructions, &parser->shader_version); }
for (i = 0; i < parser->instructions.count; ++i) @@ -1167,75 +1180,73 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char return ret; }
-static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, +static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { struct vkd3d_shader_scan_descriptor_info scan_descriptor_info; + struct vkd3d_glsl_generator *glsl_generator; struct vkd3d_shader_compile_info scan_info; - struct spirv_compiler *spirv_compiler; - struct vkd3d_shader_parser *parser; int ret;
+ vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); + scan_info = *compile_info; scan_descriptor_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; scan_descriptor_info.next = scan_info.next; scan_info.next = &scan_descriptor_info;
- if ((ret = scan_dxbc(&scan_info, message_context)) < 0) + if ((ret = scan_with_parser(&scan_info, message_context, parser)) < 0) return ret;
- if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) + switch (compile_info->target_type) { - WARN("Failed to initialise shader parser.\n"); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - return ret; - } + case VKD3D_SHADER_TARGET_D3D_ASM: + ret = vkd3d_dxbc_binary_to_text(&parser->instructions, &parser->shader_version, compile_info, out); + break;
- vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); + case VKD3D_SHADER_TARGET_GLSL: + if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->shader_version, + message_context, &parser->location))) + { + ERR("Failed to create GLSL generator.\n"); + vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + return VKD3D_ERROR; + }
- if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) - { - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); - vkd3d_shader_parser_destroy(parser); - return ret; - } + ret = vkd3d_glsl_generator_generate(glsl_generator, parser, out); + vkd3d_glsl_generator_destroy(glsl_generator); + break;
- if (compile_info->target_type == VKD3D_SHADER_TARGET_GLSL) - { - struct vkd3d_glsl_generator *glsl_generator; + case VKD3D_SHADER_TARGET_SPIRV_BINARY: + case VKD3D_SHADER_TARGET_SPIRV_TEXT: + ret = spirv_compile(parser, &scan_descriptor_info, compile_info, out, message_context); + break;
- if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->shader_version, - message_context, &parser->location))) - { - ERR("Failed to create GLSL generator.\n"); - vkd3d_shader_parser_destroy(parser); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - return VKD3D_ERROR; - } + default: + /* Validation should prevent us from reaching this. */ + assert(0); + }
- ret = vkd3d_glsl_generator_generate(glsl_generator, parser, out); + vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + return ret; +}
- vkd3d_glsl_generator_destroy(glsl_generator); - vkd3d_shader_parser_destroy(parser); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - return ret; - } +static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_parser *parser; + int ret;
- if (!(spirv_compiler = spirv_compiler_create(&parser->shader_version, &parser->shader_desc, - compile_info, &scan_descriptor_info, message_context, &parser->location))) + if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) { - ERR("Failed to create DXBC compiler.\n"); - vkd3d_shader_parser_destroy(parser); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - return VKD3D_ERROR; + WARN("Failed to initialise shader parser.\n"); + return ret; }
- ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out); + ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context);
- spirv_compiler_destroy(spirv_compiler); vkd3d_shader_parser_destroy(parser); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); return ret; }
@@ -1270,7 +1281,7 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_
if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) { - ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); + ret = vkd3d_dxbc_binary_to_text(&parser->instructions, &parser->shader_version, compile_info, out); vkd3d_shader_parser_destroy(parser); return ret; } @@ -1388,10 +1399,54 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu desc->version = 0; }
+static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, + const struct shader_signature *src) +{ + unsigned int i; + + signature->element_count = src->element_count; + if (!src->elements) + { + assert(!signature->element_count); + signature->elements = NULL; + return true; + } + + if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) + return false; + + for (i = 0; i < signature->element_count; ++i) + { + struct vkd3d_shader_signature_element *d = &signature->elements[i]; + struct signature_element *e = &src->elements[i]; + + d->semantic_name = e->semantic_name; + d->semantic_index = e->semantic_index; + d->stream_index = e->stream_index; + d->sysval_semantic = e->sysval_semantic; + d->component_type = e->component_type; + d->register_index = e->register_index; + if (e->register_count > 1) + FIXME("Arrayed elements are not supported yet.\n"); + d->mask = e->mask; + d->used_mask = e->used_mask; + d->min_precision = e->min_precision; + } + + return true; +} + +void shader_signature_cleanup(struct shader_signature *signature) +{ + vkd3d_free(signature->elements); + signature->elements = NULL; +} + int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_signature *signature, char **messages) { struct vkd3d_shader_message_context message_context; + struct shader_signature shader_signature; int ret;
TRACE("dxbc {%p, %zu}, signature %p, messages %p.\n", dxbc->code, dxbc->size, signature, messages); @@ -1400,13 +1455,17 @@ int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, *messages = NULL; vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO);
- ret = shader_parse_input_signature(dxbc, &message_context, signature); + ret = shader_parse_input_signature(dxbc, &message_context, &shader_signature); vkd3d_shader_message_context_trace_messages(&message_context); if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) ret = VKD3D_ERROR_OUT_OF_MEMORY;
vkd3d_shader_message_context_cleanup(&message_context);
+ if (!vkd3d_shader_signature_from_shader_signature(signature, &shader_signature)) + ret = VKD3D_ERROR_OUT_OF_MEMORY; + + shader_signature_cleanup(&shader_signature); return ret; }
@@ -1642,6 +1701,84 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins return true; }
+static struct vkd3d_shader_src_param *shader_instruction_array_clone_src_params( + struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_src_param *params, + unsigned int count); + +static bool shader_register_clone_relative_addresses(struct vkd3d_shader_register *reg, + struct vkd3d_shader_instruction_array *instructions) +{ + unsigned int i; + + for (i = 0; i < reg->idx_count; ++i) + { + if (!reg->idx[i].rel_addr) + continue; + + if (!(reg->idx[i].rel_addr = shader_instruction_array_clone_src_params(instructions, reg->idx[i].rel_addr, 1))) + return false; + } + + return true; +} + +static struct vkd3d_shader_dst_param *shader_instruction_array_clone_dst_params( + struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_dst_param *params, + unsigned int count) +{ + struct vkd3d_shader_dst_param *dst_params; + unsigned int i; + + if (!(dst_params = shader_dst_param_allocator_get(&instructions->dst_params, count))) + return NULL; + + memcpy(dst_params, params, count * sizeof(*params)); + for (i = 0; i < count; ++i) + { + if (!shader_register_clone_relative_addresses(&dst_params[i].reg, instructions)) + return NULL; + } + + return dst_params; +} + +static struct vkd3d_shader_src_param *shader_instruction_array_clone_src_params( + struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_src_param *params, + unsigned int count) +{ + struct vkd3d_shader_src_param *src_params; + unsigned int i; + + if (!(src_params = shader_src_param_allocator_get(&instructions->src_params, count))) + return NULL; + + memcpy(src_params, params, count * sizeof(*params)); + for (i = 0; i < count; ++i) + { + if (!shader_register_clone_relative_addresses(&src_params[i].reg, instructions)) + return NULL; + } + + return src_params; +} + +/* NOTE: Immediate constant buffers are not cloned, so the source must not be destroyed while the + * destination is in use. This seems like a reasonable requirement given how this is currently used. */ +bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, + unsigned int dst, unsigned int src) +{ + struct vkd3d_shader_instruction *ins = &instructions->elements[dst]; + + *ins = instructions->elements[src]; + + if (ins->dst_count && ins->dst && !(ins->dst = shader_instruction_array_clone_dst_params(instructions, + ins->dst, ins->dst_count))) + return false; + + return !ins->src_count || !!(ins->src = shader_instruction_array_clone_src_params(instructions, + ins->src, ins->src_count)); +} + void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions) { unsigned int i; diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index 79be999adf9..d77c5393940 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -74,6 +74,13 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF = 1000, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE = 1001, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY = 1002, + VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_INDEX_COUNT = 1003, + VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS = 1004, + VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER = 1005, + VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL = 1006, + + VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS = 1300, + VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK = 1301,
VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_BINDING_NOT_FOUND = 2000, VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE = 2001, @@ -125,6 +132,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT = 5023, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, + VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, @@ -223,6 +231,7 @@ enum vkd3d_shader_opcode VKD3DSIH_DEQ, VKD3DSIH_DFMA, VKD3DSIH_DGE, + VKD3DSIH_DISCARD, VKD3DSIH_DIV, VKD3DSIH_DLT, VKD3DSIH_DMAX, @@ -675,6 +684,7 @@ struct vkd3d_shader_register bool non_uniform; enum vkd3d_data_type data_type; struct vkd3d_shader_register_index idx[3]; + unsigned int idx_count; enum vkd3d_immconst_type immconst_type; union { @@ -774,13 +784,36 @@ enum vkd3d_shader_input_sysval_semantic VKD3D_SIV_LINE_DENSITY_TESS_FACTOR = 22, };
+struct signature_element +{ + unsigned int sort_index; + const char *semantic_name; + unsigned int semantic_index; + unsigned int stream_index; + enum vkd3d_shader_sysval_semantic sysval_semantic; + enum vkd3d_shader_component_type component_type; + unsigned int register_index; + unsigned int register_count; + unsigned int mask; + unsigned int used_mask; + enum vkd3d_shader_minimum_precision min_precision; +}; + +struct shader_signature +{ + struct signature_element *elements; + unsigned int element_count; +}; + +void shader_signature_cleanup(struct shader_signature *signature); + struct vkd3d_shader_desc { const uint32_t *byte_code; size_t byte_code_size; - struct vkd3d_shader_signature input_signature; - struct vkd3d_shader_signature output_signature; - struct vkd3d_shader_signature patch_constant_signature; + struct shader_signature input_signature; + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; };
struct vkd3d_shader_register_semantic @@ -927,6 +960,11 @@ static inline bool vkd3d_shader_register_is_output(const struct vkd3d_shader_reg return reg->type == VKD3DSPR_OUTPUT || reg->type == VKD3DSPR_COLOROUT; }
+static inline bool vkd3d_shader_register_is_patch_constant(const struct vkd3d_shader_register *reg) +{ + return reg->type == VKD3DSPR_PATCHCONST; +} + struct vkd3d_shader_location { const char *source_name; @@ -981,6 +1019,8 @@ bool shader_instruction_array_init(struct vkd3d_shader_instruction_array *instru bool shader_instruction_array_reserve(struct vkd3d_shader_instruction_array *instructions, unsigned int reserve); bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *instructions, struct vkd3d_shader_immediate_constant_buffer *icb); +bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, + unsigned int dst, unsigned int src); void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions);
struct vkd3d_shader_parser @@ -991,10 +1031,8 @@ struct vkd3d_shader_parser
struct vkd3d_shader_desc shader_desc; struct vkd3d_shader_version shader_version; - const uint32_t *ptr; const struct vkd3d_shader_parser_ops *ops; struct vkd3d_shader_instruction_array instructions; - size_t instruction_idx; };
struct vkd3d_shader_parser_ops @@ -1028,7 +1066,8 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse parser->ops->parser_destroy(parser); }
-void vkd3d_shader_trace(struct vkd3d_shader_parser *parser); +void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, + const struct vkd3d_shader_version *shader_version);
const char *shader_get_type_prefix(enum vkd3d_shader_type type);
@@ -1044,8 +1083,9 @@ struct vkd3d_string_buffer_cache size_t count, max_count, capacity; };
-enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out); +enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vkd3d_shader_instruction_array *instructions, + const struct vkd3d_shader_version *shader_version, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out); void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer); struct vkd3d_string_buffer *vkd3d_string_buffer_get(struct vkd3d_string_buffer_cache *list); void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer); @@ -1067,6 +1107,8 @@ struct vkd3d_bytecode_buffer int status; };
+/* Align to the next 4-byte offset, and return that offset. */ +size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer); size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size); void set_u32(struct vkd3d_bytecode_buffer *buffer, size_t offset, uint32_t value);
@@ -1128,8 +1170,10 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi
void free_shader_desc(struct vkd3d_shader_desc *desc);
+int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc); int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *signature); + struct vkd3d_shader_message_context *message_context, struct shader_signature *signature);
struct vkd3d_glsl_generator;
@@ -1141,16 +1185,10 @@ void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator);
#define SPIRV_MAX_SRC_COUNT 6
-struct spirv_compiler; - -struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, - const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, +int spirv_compile(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location); -int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, - struct vkd3d_shader_code *spirv); -void spirv_compiler_destroy(struct spirv_compiler *compiler); + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]);
@@ -1202,6 +1240,14 @@ static inline enum vkd3d_data_type vkd3d_data_type_from_component_type( } }
+enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, + unsigned int index); + +static inline enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval(enum vkd3d_shader_sysval_semantic sysval) +{ + return vkd3d_siv_from_sysval_indexed(sysval, 0); +} + static inline unsigned int vkd3d_write_mask_get_component_idx(DWORD write_mask) { unsigned int i; @@ -1323,4 +1369,11 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void void dxbc_writer_init(struct dxbc_writer *dxbc); int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code);
+enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *instructions); +enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( + struct vkd3d_shader_instruction_array *instructions, const struct shader_signature *input_signature); +enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, + enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, + struct shader_signature *output_signature, struct shader_signature *patch_constant_signature); + #endif /* __VKD3D_SHADER_PRIVATE_H */ diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index 6eddcfa2d14..32439eec7eb 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -1437,7 +1437,7 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( pool_desc.pNext = NULL; pool_desc.flags = 0; pool_desc.maxSets = 512; - pool_desc.poolSizeCount = ARRAY_SIZE(device->vk_pool_sizes); + pool_desc.poolSizeCount = device->vk_pool_count; pool_desc.pPoolSizes = device->vk_pool_sizes; if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0) { @@ -2463,6 +2463,8 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, memset(list->so_counter_buffers, 0, sizeof(list->so_counter_buffers)); memset(list->so_counter_buffer_offsets, 0, sizeof(list->so_counter_buffer_offsets));
+ list->descriptor_heap_count = 0; + ID3D12GraphicsCommandList2_SetPipelineState(iface, initial_pipeline_state); }
@@ -2720,28 +2722,31 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des unsigned int index, bool use_array) { uint32_t descriptor_range_magic = range->descriptor_magic; - const struct vkd3d_view *view = descriptor->s.u.view_info.view; + union d3d12_desc_object u = descriptor->s.u; uint32_t vk_binding = range->binding; + VkDescriptorType vk_descriptor_type; uint32_t set = range->set;
- if (descriptor->s.magic != descriptor_range_magic) + if (!u.header || u.header->magic != descriptor_range_magic) return false;
+ vk_descriptor_type = u.header->vk_descriptor_type; + vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; vk_descriptor_write->pNext = NULL; vk_descriptor_write->dstSet = vk_descriptor_sets[set]; vk_descriptor_write->dstBinding = use_array ? vk_binding : vk_binding + index; vk_descriptor_write->dstArrayElement = use_array ? index : 0; vk_descriptor_write->descriptorCount = 1; - vk_descriptor_write->descriptorType = descriptor->s.vk_descriptor_type; + vk_descriptor_write->descriptorType = vk_descriptor_type; vk_descriptor_write->pImageInfo = NULL; vk_descriptor_write->pBufferInfo = NULL; vk_descriptor_write->pTexelBufferView = NULL;
- switch (descriptor->s.magic) + switch (u.header->magic) { case VKD3D_DESCRIPTOR_MAGIC_CBV: - vk_descriptor_write->pBufferInfo = &descriptor->s.u.vk_cbv_info; + vk_descriptor_write->pBufferInfo = &u.cb_desc->vk_cbv_info; break;
case VKD3D_DESCRIPTOR_MAGIC_SRV: @@ -2752,8 +2757,8 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des * in pairs in one set. */ if (range->descriptor_count == UINT_MAX) { - if (descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - && descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; vk_descriptor_write->dstBinding = 0; @@ -2763,21 +2768,21 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des { if (!use_array) vk_descriptor_write->dstBinding = vk_binding + 2 * index; - if (descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - && descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) ++vk_descriptor_write->dstBinding; }
- if (descriptor->s.vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - || descriptor->s.vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + if (vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + || vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { - vk_descriptor_write->pTexelBufferView = &view->u.vk_buffer_view; + vk_descriptor_write->pTexelBufferView = &u.view->v.u.vk_buffer_view; } else { vk_image_info->sampler = VK_NULL_HANDLE; - vk_image_info->imageView = view->u.vk_image_view; - vk_image_info->imageLayout = descriptor->s.magic == VKD3D_DESCRIPTOR_MAGIC_SRV + vk_image_info->imageView = u.view->v.u.vk_image_view; + vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
vk_descriptor_write->pImageInfo = vk_image_info; @@ -2785,7 +2790,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des break;
case VKD3D_DESCRIPTOR_MAGIC_SAMPLER: - vk_image_info->sampler = view->u.vk_sampler; + vk_image_info->sampler = u.view->v.u.vk_sampler; vk_image_info->imageView = VK_NULL_HANDLE; vk_image_info->imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
@@ -2793,7 +2798,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des break;
default: - ERR("Invalid descriptor %#x.\n", descriptor->s.magic); + ERR("Invalid descriptor %#x.\n", u.header->magic); return false; }
@@ -2847,6 +2852,11 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list for (j = 0; j < descriptor_count; ++j, ++descriptor) { unsigned int register_idx = range->base_register_idx + j; + union d3d12_desc_object u = descriptor->s.u; + VkBufferView vk_counter_view; + + vk_counter_view = (u.header && u.header->magic == VKD3D_DESCRIPTOR_MAGIC_UAV) + ? u.view->v.vk_counter_view : VK_NULL_HANDLE;
/* Track UAV counters. */ if (range->descriptor_magic == VKD3D_DESCRIPTOR_MAGIC_UAV) @@ -2856,8 +2866,6 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list if (state->uav_counters.bindings[k].register_space == range->register_space && state->uav_counters.bindings[k].register_index == register_idx) { - VkBufferView vk_counter_view = descriptor->s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV - ? descriptor->s.u.view_info.view->vk_counter_view : VK_NULL_HANDLE; if (bindings->vk_uav_counter_views[k] != vk_counter_view) bindings->uav_counters_dirty = true; bindings->vk_uav_counter_views[k] = vk_counter_view; @@ -2867,7 +2875,7 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list }
/* Not all descriptors are necessarily populated if the range is unbounded. */ - if (descriptor->s.magic == VKD3D_DESCRIPTOR_MAGIC_FREE) + if (!u.header) continue;
if (!vk_write_descriptor_set_from_d3d12_desc(current_descriptor_write, current_image_info, @@ -3153,6 +3161,30 @@ static void d3d12_command_list_update_descriptor_tables(struct d3d12_command_lis } }
+static bool contains_heap(struct d3d12_descriptor_heap **heap_array, unsigned int count, + const struct d3d12_descriptor_heap *query) +{ + unsigned int i; + + for (i = 0; i < count; ++i) + if (heap_array[i] == query) + return true; + return false; +} + +static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) +{ + struct d3d12_device *device = list->device; + unsigned int i; + + for (i = 0; i < list->descriptor_heap_count; ++i) + { + vkd3d_mutex_lock(&list->descriptor_heaps[i]->vk_sets_mutex); + d3d12_desc_flush_vk_heap_updates_locked(list->descriptor_heaps[i], device); + vkd3d_mutex_unlock(&list->descriptor_heaps[i]->vk_sets_mutex); + } +} + static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap) { @@ -3177,10 +3209,18 @@ static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *l bindings->sampler_heap_id = heap->serial_id; }
- /* These sets can be shared across multiple command lists, and therefore binding must - * be synchronised. On an experimental branch in which caching of Vk descriptor writes - * greatly increased the chance of multiple threads arriving here at the same time, - * GRID 2019 crashed without the mutex lock. */ + if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) + { + if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) + { + /* Descriptors can be written after binding. */ + FIXME("Flushing descriptor updates while list %p is not closed.\n", list); + command_list_flush_vk_heap_updates(list); + list->descriptor_heap_count = 0; + } + list->descriptor_heaps[list->descriptor_heap_count++] = heap; + } + vkd3d_mutex_lock(&heap->vk_sets_mutex);
for (set = 0; set < ARRAY_SIZE(heap->vk_descriptor_sets); ++set) @@ -3963,10 +4003,12 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo vk_viewports[i].minDepth = viewports[i].MinDepth; vk_viewports[i].maxDepth = viewports[i].MaxDepth;
- if (!vk_viewports[i].width || !vk_viewports[i].height) + if (vk_viewports[i].width <= 0.0f) { - FIXME_ONCE("Invalid viewport %u, ignoring RSSetViewports().\n", i); - return; + /* Vulkan does not support width <= 0 */ + FIXME_ONCE("Setting invalid viewport %u to zero height.\n", i); + vk_viewports[i].width = 1.0f; + vk_viewports[i].height = 0.0f; } }
@@ -4481,11 +4523,20 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, root_parameter = root_signature_get_root_descriptor(root_signature, index); assert(root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV);
- resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, gpu_address); - buffer_info.buffer = resource->u.vk_buffer; - buffer_info.offset = gpu_address - resource->gpu_address; - buffer_info.range = resource->desc.Width - buffer_info.offset; - buffer_info.range = min(buffer_info.range, vk_info->device_limits.maxUniformBufferRange); + if (gpu_address) + { + resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, gpu_address); + buffer_info.buffer = resource->u.vk_buffer; + buffer_info.offset = gpu_address - resource->gpu_address; + buffer_info.range = resource->desc.Width - buffer_info.offset; + buffer_info.range = min(buffer_info.range, vk_info->device_limits.maxUniformBufferRange); + } + else + { + buffer_info.buffer = list->device->null_resources.vk_buffer; + buffer_info.offset = 0; + buffer_info.range = VK_WHOLE_SIZE; + }
if (vk_info->KHR_push_descriptor) { @@ -4547,13 +4598,13 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li assert(root_parameter->parameter_type != D3D12_ROOT_PARAMETER_TYPE_CBV);
/* FIXME: Re-use buffer views. */ - if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, &vk_buffer_view)) + if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, root_parameter->parameter_type, &vk_buffer_view)) { ERR("Failed to create buffer view.\n"); return; }
- if (!(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view))) + if (vk_buffer_view && !(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view))) { ERR("Failed to add buffer view.\n"); VK_CALL(vkDestroyBufferView(vk_device, vk_buffer_view, NULL)); @@ -4644,6 +4695,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics WARN("Ignoring NULL index buffer view.\n"); return; } + if (!view->BufferLocation) + { + WARN("Ignoring index buffer location 0.\n"); + return; + }
vk_procs = &list->device->vk_procs;
@@ -4844,7 +4900,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi WARN("Failed to add view.\n"); }
- list->rtvs[i] = view->u.vk_image_view; + list->rtvs[i] = view->v.u.vk_image_view; list->fb_width = max(list->fb_width, rtv_desc->width); list->fb_height = max(list->fb_height, rtv_desc->height); list->fb_layer_count = max(list->fb_layer_count, rtv_desc->layer_count); @@ -4868,7 +4924,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi list->dsv = VK_NULL_HANDLE; }
- list->dsv = view->u.vk_image_view; + list->dsv = view->v.u.vk_image_view; list->fb_width = max(list->fb_width, dsv_desc->width); list->fb_height = max(list->fb_height, dsv_desc->height); list->fb_layer_count = max(list->fb_layer_count, dsv_desc->layer_count); @@ -4960,7 +5016,7 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, fb_desc.flags = 0; fb_desc.renderPass = vk_render_pass; fb_desc.attachmentCount = 1; - fb_desc.pAttachments = &view->u.vk_image_view; + fb_desc.pAttachments = &view->v.u.vk_image_view; fb_desc.width = width; fb_desc.height = height; fb_desc.layers = layer_count; @@ -5163,13 +5219,14 @@ static void vkd3d_uav_clear_state_get_image_pipeline(const struct vkd3d_uav_clea }
static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, - struct d3d12_resource *resource, struct vkd3d_view *view, const VkClearColorValue *clear_colour, + struct d3d12_resource *resource, struct vkd3d_view *descriptor, const VkClearColorValue *clear_colour, unsigned int rect_count, const D3D12_RECT *rects) { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; unsigned int i, miplevel_idx, layer_count; struct vkd3d_uav_clear_pipeline pipeline; struct vkd3d_uav_clear_args clear_args; + const struct vkd3d_resource_view *view; VkDescriptorImageInfo image_info; D3D12_RECT full_rect, curr_rect; VkWriteDescriptorSet write_set; @@ -5181,8 +5238,9 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, d3d12_command_list_invalidate_bindings(list, list->state); d3d12_command_list_invalidate_root_parameters(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE);
- if (!d3d12_command_allocator_add_view(list->allocator, view)) + if (!d3d12_command_allocator_add_view(list->allocator, descriptor)) WARN("Failed to add view.\n"); + view = &descriptor->v;
clear_args.colour = *clear_colour;
@@ -5295,10 +5353,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID const UINT values[4], UINT rect_count, const D3D12_RECT *rects) { struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct vkd3d_view *descriptor, *uint_view = NULL; struct d3d12_device *device = list->device; - struct vkd3d_view *view, *uint_view = NULL; struct vkd3d_texture_view_desc view_desc; const struct vkd3d_format *uint_format; + const struct vkd3d_resource_view *view; struct d3d12_resource *resource_impl; VkClearColorValue colour;
@@ -5306,7 +5365,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects);
resource_impl = unsafe_impl_from_ID3D12Resource(resource); - view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view_info.view; + if (!(descriptor = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) + return; + view = &descriptor->v; memcpy(colour.uint32, values, sizeof(colour.uint32));
if (view->format->type != VKD3D_FORMAT_TYPE_UINT) @@ -5320,8 +5381,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID
if (d3d12_resource_is_buffer(resource_impl)) { - if (!vkd3d_create_buffer_view(device, resource_impl->u.vk_buffer, uint_format, - view->info.buffer.offset, view->info.buffer.size, &uint_view)) + if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_buffer, + uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) { ERR("Failed to create buffer view.\n"); return; @@ -5337,16 +5398,17 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID view_desc.layer_idx = view->info.texture.layer_idx; view_desc.layer_count = view->info.texture.layer_count;
- if (!vkd3d_create_texture_view(device, resource_impl->u.vk_image, &view_desc, &uint_view)) + if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_image, &view_desc, + &uint_view)) { ERR("Failed to create image view.\n"); return; } } - view = uint_view; + descriptor = uint_view; }
- d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); + d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects);
if (uint_view) vkd3d_view_decref(uint_view, device); @@ -5365,7 +5427,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects);
resource_impl = unsafe_impl_from_ID3D12Resource(resource); - view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view_info.view; + if (!(view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) + return; memcpy(colour.float32, values, sizeof(colour.float32));
d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); @@ -5906,6 +5969,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d
list->update_descriptors = device->use_vk_heaps ? d3d12_command_list_update_heap_descriptors : d3d12_command_list_update_descriptors; + list->descriptor_heap_count = 0;
if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) { @@ -6199,6 +6263,8 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm return; }
+ command_list_flush_vk_heap_updates(cmd_list); + buffers[i] = cmd_list->vk_command_buffer; }
diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index 39a5ca013c7..4263dcf4184 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -19,6 +19,8 @@ #include "vkd3d_private.h" #include "vkd3d_version.h"
+#define VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE 256u + struct vkd3d_struct { enum vkd3d_structure_type type; @@ -2393,9 +2395,23 @@ static void vkd3d_time_domains_init(struct d3d12_device *device) WARN("Found no acceptable host time domain. Calibrated timestamps will not be available.\n"); }
-static void vkd3d_init_descriptor_pool_sizes(VkDescriptorPoolSize *pool_sizes, - const struct vkd3d_device_descriptor_limits *limits) +static void device_init_descriptor_pool_sizes(struct d3d12_device *device) { + const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; + VkDescriptorPoolSize *pool_sizes = device->vk_pool_sizes; + + if (device->use_vk_heaps) + { + pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + pool_sizes[0].descriptorCount = min(limits->storage_image_max_descriptors, + VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE); + pool_sizes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + pool_sizes[1].descriptorCount = pool_sizes[0].descriptorCount; + device->vk_pool_count = 2; + return; + } + + assert(ARRAY_SIZE(device->vk_pool_sizes) >= 6); pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); @@ -2412,8 +2428,27 @@ static void vkd3d_init_descriptor_pool_sizes(VkDescriptorPoolSize *pool_sizes, pool_sizes[5].type = VK_DESCRIPTOR_TYPE_SAMPLER; pool_sizes[5].descriptorCount = min(limits->sampler_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); + device->vk_pool_count = 6; };
+static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) +{ + cache->head = NULL; + cache->size = size; +} + +static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cache) +{ + union d3d12_desc_object u; + void *next; + + for (u.object = cache->head; u.object; u.object = next) + { + next = u.header->next; + vkd3d_free(u.object); + } +} + /* ID3D12Device */ static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface) { @@ -2454,7 +2489,6 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) { struct d3d12_device *device = impl_from_ID3D12Device(iface); ULONG refcount = InterlockedDecrement(&device->refcount); - size_t i;
TRACE("%p decreasing refcount to %u.\n", device, refcount);
@@ -2474,8 +2508,8 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_render_pass_cache_cleanup(&device->render_pass_cache, device); d3d12_device_destroy_pipeline_cache(device); d3d12_device_destroy_vkd3d_queues(device); - for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - vkd3d_mutex_destroy(&device->desc_mutex[i]); + vkd3d_desc_object_cache_cleanup(&device->view_desc_cache); + vkd3d_desc_object_cache_cleanup(&device->cbuffer_desc_cache); VK_CALL(vkDestroyDevice(device->vk_device, NULL)); if (device->parent) IUnknown_Release(device->parent); @@ -3368,132 +3402,6 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
-static void flush_desc_writes(struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], - struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) -{ - enum vkd3d_vk_descriptor_set_index set; - for (set = 0; set < VKD3D_SET_INDEX_COUNT; ++set) - { - if (!infos[set].count) - continue; - d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device); - infos[set].count = 0; - infos[set].uav_counter = false; - } -} - -static void d3d12_desc_buffered_copy_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], - struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) -{ - struct d3d12_desc_copy_location *location; - enum vkd3d_vk_descriptor_set_index set; - struct vkd3d_mutex *mutex; - - mutex = d3d12_device_get_descriptor_mutex(device, src); - vkd3d_mutex_lock(mutex); - - if (src->s.magic == VKD3D_DESCRIPTOR_MAGIC_FREE) - { - /* Source must be unlocked first, and therefore can't be used as a null source. */ - static const struct d3d12_desc null = {0}; - vkd3d_mutex_unlock(mutex); - d3d12_desc_write_atomic(dst, &null, device); - return; - } - - set = vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(src->s.vk_descriptor_type); - location = &locations[set][infos[set].count++]; - - location->src.s = src->s; - - if (location->src.s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - vkd3d_view_incref(location->src.s.u.view_info.view); - - vkd3d_mutex_unlock(mutex); - - infos[set].uav_counter |= (location->src.s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV) - && !!location->src.s.u.view_info.view->vk_counter_view; - location->dst = dst; - - if (infos[set].count == ARRAY_SIZE(locations[0])) - { - d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device); - infos[set].count = 0; - infos[set].uav_counter = false; - } -} - -/* Some games, e.g. Control, copy a large number of descriptors per frame, so the - * speed of this function is critical. */ -static void d3d12_device_vk_heaps_copy_descriptors(struct d3d12_device *device, - UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, - const UINT *dst_descriptor_range_sizes, - UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, - const UINT *src_descriptor_range_sizes) -{ - struct d3d12_desc_copy_location locations[VKD3D_SET_INDEX_COUNT][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; - unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; - /* The locations array is relatively large, and often mostly empty. Keeping these - * values together in a separate array will likely result in fewer cache misses. */ - struct d3d12_desc_copy_info infos[VKD3D_SET_INDEX_COUNT]; - struct d3d12_descriptor_heap *descriptor_heap = NULL; - const struct d3d12_desc *src, *heap_base, *heap_end; - unsigned int dst_range_size, src_range_size; - struct d3d12_desc *dst; - - descriptor_heap = d3d12_desc_get_descriptor_heap(d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[0])); - heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors; - heap_end = heap_base + descriptor_heap->desc.NumDescriptors; - - memset(infos, 0, sizeof(infos)); - dst_range_idx = dst_idx = 0; - src_range_idx = src_idx = 0; - while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) - { - dst_range_size = dst_descriptor_range_sizes ? dst_descriptor_range_sizes[dst_range_idx] : 1; - src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1; - - dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); - src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]); - - if (dst < heap_base || dst >= heap_end) - { - flush_desc_writes(locations, infos, descriptor_heap, device); - descriptor_heap = d3d12_desc_get_descriptor_heap(dst); - heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors; - heap_end = heap_base + descriptor_heap->desc.NumDescriptors; - } - - for (; dst_idx < dst_range_size && src_idx < src_range_size; src_idx++, dst_idx++) - { - /* We don't need to lock either descriptor for the identity check. The descriptor - * mutex is only intended to prevent use-after-free of the vkd3d_view caused by a - * race condition in the calling app. It is unnecessary to protect this test as it's - * the app's race condition, not ours. */ - if (dst[dst_idx].s.magic == src[src_idx].s.magic && (dst[dst_idx].s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - && dst[dst_idx].s.u.view_info.written_serial_id == src[src_idx].s.u.view_info.view->serial_id) - continue; - d3d12_desc_buffered_copy_atomic(&dst[dst_idx], &src[src_idx], locations, infos, descriptor_heap, device); - } - - if (dst_idx >= dst_range_size) - { - ++dst_range_idx; - dst_idx = 0; - } - if (src_idx >= src_range_size) - { - ++src_range_idx; - src_idx = 0; - } - } - - flush_desc_writes(locations, infos, descriptor_heap, device); -} - -#define VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT 8 - static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, const UINT *dst_descriptor_range_sizes, @@ -3525,15 +3433,6 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, if (!dst_descriptor_range_count) return;
- if (device->use_vk_heaps && (dst_descriptor_range_count > 1 || (dst_descriptor_range_sizes - && dst_descriptor_range_sizes[0] >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT))) - { - d3d12_device_vk_heaps_copy_descriptors(device, dst_descriptor_range_count, dst_descriptor_range_offsets, - dst_descriptor_range_sizes, src_descriptor_range_count, src_descriptor_range_offsets, - src_descriptor_range_sizes); - return; - } - dst_range_idx = dst_idx = 0; src_range_idx = src_idx = 0; while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) @@ -3544,8 +3443,12 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]);
- while (dst_idx < dst_range_size && src_idx < src_range_size) - d3d12_desc_copy(&dst[dst_idx++], &src[src_idx++], device); + for (; dst_idx < dst_range_size && src_idx < src_range_size; ++dst_idx, ++src_idx) + { + if (dst[dst_idx].s.u.object == src[src_idx].s.u.object) + continue; + d3d12_desc_copy(&dst[dst_idx], &src[src_idx], device); + }
if (dst_idx >= dst_range_size) { @@ -3570,17 +3473,6 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *i iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr, descriptor_heap_type);
- if (descriptor_count >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT) - { - struct d3d12_device *device = impl_from_ID3D12Device(iface); - if (device->use_vk_heaps) - { - d3d12_device_vk_heaps_copy_descriptors(device, 1, &dst_descriptor_range_offset, - &descriptor_count, 1, &src_descriptor_range_offset, &descriptor_count); - return; - } - } - d3d12_device_CopyDescriptors(iface, 1, &dst_descriptor_range_offset, &descriptor_count, 1, &src_descriptor_range_offset, &descriptor_count, descriptor_heap_type); } @@ -4080,7 +3972,6 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, { const struct vkd3d_vk_device_procs *vk_procs; HRESULT hr; - size_t i;
device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; device->refcount = 1; @@ -4123,10 +4014,10 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, device->blocked_queue_count = 0; vkd3d_mutex_init(&device->blocked_queues_mutex);
- for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - vkd3d_mutex_init(&device->desc_mutex[i]); + vkd3d_desc_object_cache_init(&device->view_desc_cache, sizeof(struct vkd3d_view)); + vkd3d_desc_object_cache_init(&device->cbuffer_desc_cache, sizeof(struct vkd3d_cbuffer_desc));
- vkd3d_init_descriptor_pool_sizes(device->vk_pool_sizes, &device->vk_info.descriptor_limits); + device_init_descriptor_pool_sizes(device);
if ((device->parent = create_info->parent)) IUnknown_AddRef(device->parent); diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c index 8c050cfeb32..ea7b6859cc1 100644 --- a/libs/vkd3d/libs/vkd3d/resource.c +++ b/libs/vkd3d/libs/vkd3d/resource.c @@ -326,6 +326,9 @@ static void d3d12_heap_destroy(struct d3d12_heap *heap)
vkd3d_private_store_destroy(&heap->private_store);
+ if (heap->map_ptr) + VK_CALL(vkUnmapMemory(device->vk_device, heap->vk_memory)); + VK_CALL(vkFreeMemory(device->vk_device, heap->vk_memory, NULL));
vkd3d_mutex_destroy(&heap->mutex); @@ -346,12 +349,19 @@ static ULONG STDMETHODCALLTYPE d3d12_heap_Release(ID3D12Heap *iface)
TRACE("%p decreasing refcount to %u.\n", heap, refcount);
- if (!refcount) + /* A heap must not be destroyed until all contained resources are destroyed. */ + if (!refcount && !heap->resource_count) d3d12_heap_destroy(heap);
return refcount; }
+static void d3d12_heap_resource_destroyed(struct d3d12_heap *heap) +{ + if (!InterlockedDecrement(&heap->resource_count) && (!heap->refcount || heap->is_private)) + d3d12_heap_destroy(heap); +} + static HRESULT STDMETHODCALLTYPE d3d12_heap_GetPrivateData(ID3D12Heap *iface, REFGUID guid, UINT *data_size, void *data) { @@ -437,97 +447,6 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface) return impl_from_ID3D12Heap(iface); }
-static HRESULT d3d12_heap_map(struct d3d12_heap *heap, uint64_t offset, - struct d3d12_resource *resource, void **data) -{ - struct d3d12_device *device = heap->device; - HRESULT hr = S_OK; - VkResult vr; - - vkd3d_mutex_lock(&heap->mutex); - - assert(!resource->map_count || heap->map_ptr); - - if (!resource->map_count) - { - if (!heap->map_ptr) - { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - - TRACE("Mapping heap %p.\n", heap); - - assert(!heap->map_count); - - if ((vr = VK_CALL(vkMapMemory(device->vk_device, heap->vk_memory, - 0, VK_WHOLE_SIZE, 0, &heap->map_ptr))) < 0) - { - WARN("Failed to map device memory, vr %d.\n", vr); - heap->map_ptr = NULL; - } - - hr = hresult_from_vk_result(vr); - } - - if (heap->map_ptr) - ++heap->map_count; - } - - if (hr == S_OK) - { - assert(heap->map_ptr); - if (data) - *data = (BYTE *)heap->map_ptr + offset; - ++resource->map_count; - } - else - { - assert(!heap->map_ptr); - if (data) - *data = NULL; - } - - vkd3d_mutex_unlock(&heap->mutex); - - return hr; -} - -static void d3d12_heap_unmap(struct d3d12_heap *heap, struct d3d12_resource *resource) -{ - struct d3d12_device *device = heap->device; - - vkd3d_mutex_lock(&heap->mutex); - - if (!resource->map_count) - { - WARN("Resource %p is not mapped.\n", resource); - goto done; - } - - --resource->map_count; - if (resource->map_count) - goto done; - - if (!heap->map_count) - { - ERR("Heap %p is not mapped.\n", heap); - goto done; - } - - --heap->map_count; - if (!heap->map_count) - { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - - TRACE("Unmapping heap %p, ptr %p.\n", heap, heap->map_ptr); - - VK_CALL(vkUnmapMemory(device->vk_device, heap->vk_memory)); - heap->map_ptr = NULL; - } - -done: - vkd3d_mutex_unlock(&heap->mutex); -} - static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource) { if (!resource && !desc->SizeInBytes) @@ -552,15 +471,23 @@ static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc, const struct d3d1 return S_OK; }
+static VkMemoryPropertyFlags d3d12_heap_get_memory_property_flags(const struct d3d12_heap *heap) +{ + return heap->device->memory_properties.memoryTypes[heap->vk_memory_type].propertyFlags; +} + static HRESULT d3d12_heap_init(struct d3d12_heap *heap, struct d3d12_device *device, const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource) { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkMemoryRequirements memory_requirements; VkDeviceSize vk_memory_size; + VkResult vr; HRESULT hr;
heap->ID3D12Heap_iface.lpVtbl = &d3d12_heap_vtbl; heap->refcount = 1; + heap->resource_count = 0;
heap->is_private = !!resource;
@@ -628,6 +555,20 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, heap->device = device; if (!heap->is_private) d3d12_device_add_ref(heap->device); + else + heap->resource_count = 1; + + if (d3d12_heap_get_memory_property_flags(heap) & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + { + if ((vr = VK_CALL(vkMapMemory(device->vk_device, + heap->vk_memory, 0, VK_WHOLE_SIZE, 0, &heap->map_ptr))) < 0) + { + heap->map_ptr = NULL; + ERR("Failed to map memory, vr %d.\n", vr); + d3d12_heap_destroy(heap); + return hresult_from_vk_result(hr); + } + }
return S_OK; } @@ -1027,8 +968,8 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12 else VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL));
- if (resource->flags & VKD3D_RESOURCE_DEDICATED_HEAP) - d3d12_heap_destroy(resource->heap); + if (resource->heap) + d3d12_heap_resource_destroyed(resource->heap); }
static ULONG d3d12_resource_incref(struct d3d12_resource *resource) @@ -1223,12 +1164,55 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_GetDevice(ID3D12Resource *iface, return d3d12_device_query_interface(resource->device, iid, device); }
+static void *d3d12_resource_get_map_ptr(struct d3d12_resource *resource) +{ + assert(resource->heap->map_ptr); + return (uint8_t *)resource->heap->map_ptr + resource->heap_offset; +} + +static void d3d12_resource_get_vk_range(struct d3d12_resource *resource, + uint64_t offset, uint64_t size, VkMappedMemoryRange *vk_range) +{ + vk_range->sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + vk_range->pNext = NULL; + vk_range->memory = resource->heap->vk_memory; + vk_range->offset = resource->heap_offset + offset; + vk_range->size = size; +} + +static void d3d12_resource_invalidate(struct d3d12_resource *resource, uint64_t offset, uint64_t size) +{ + const struct vkd3d_vk_device_procs *vk_procs = &resource->device->vk_procs; + VkMappedMemoryRange vk_range; + VkResult vr; + + if (d3d12_heap_get_memory_property_flags(resource->heap) & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + return; + + d3d12_resource_get_vk_range(resource, offset, size, &vk_range); + if ((vr = VK_CALL(vkInvalidateMappedMemoryRanges(resource->device->vk_device, 1, &vk_range))) < 0) + ERR("Failed to invalidate memory, vr %d.\n", vr); +} + +static void d3d12_resource_flush(struct d3d12_resource *resource, uint64_t offset, uint64_t size) +{ + const struct vkd3d_vk_device_procs *vk_procs = &resource->device->vk_procs; + VkMappedMemoryRange vk_range; + VkResult vr; + + if (d3d12_heap_get_memory_property_flags(resource->heap) & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + return; + + d3d12_resource_get_vk_range(resource, offset, size, &vk_range); + if ((vr = VK_CALL(vkFlushMappedMemoryRanges(resource->device->vk_device, 1, &vk_range))) < 0) + ERR("Failed to flush memory, vr %d.\n", vr); +} + static HRESULT STDMETHODCALLTYPE d3d12_resource_Map(ID3D12Resource *iface, UINT sub_resource, const D3D12_RANGE *read_range, void **data) { struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); unsigned int sub_resource_count; - HRESULT hr;
TRACE("iface %p, sub_resource %u, read_range %p, data %p.\n", iface, sub_resource, read_range, data); @@ -1259,15 +1243,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_Map(ID3D12Resource *iface, UINT return E_NOTIMPL; }
- WARN("Ignoring read range %p.\n", read_range); - - if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, data))) - WARN("Failed to map resource %p, hr %#x.\n", resource, hr); - if (data) + { + *data = d3d12_resource_get_map_ptr(resource); TRACE("Returning pointer %p.\n", *data); + }
- return hr; + if (!read_range) + d3d12_resource_invalidate(resource, 0, resource->desc.Width); + else if (read_range->End > read_range->Begin) + d3d12_resource_invalidate(resource, read_range->Begin, read_range->End - read_range->Begin); + + return S_OK; }
static void STDMETHODCALLTYPE d3d12_resource_Unmap(ID3D12Resource *iface, UINT sub_resource, @@ -1286,9 +1273,10 @@ static void STDMETHODCALLTYPE d3d12_resource_Unmap(ID3D12Resource *iface, UINT s return; }
- WARN("Ignoring written range %p.\n", written_range); - - d3d12_heap_unmap(resource->heap, resource); + if (!written_range) + d3d12_resource_flush(resource, 0, resource->desc.Width); + else if (written_range->End > written_range->Begin) + d3d12_resource_flush(resource, written_range->Begin, written_range->End - written_range->Begin); }
static D3D12_RESOURCE_DESC * STDMETHODCALLTYPE d3d12_resource_GetDesc(ID3D12Resource *iface, @@ -1320,10 +1308,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_WriteToSubresource(ID3D12Resourc VkImageSubresource vk_sub_resource; const struct vkd3d_format *format; VkSubresourceLayout vk_layout; + uint64_t dst_offset, dst_size; struct d3d12_device *device; uint8_t *dst_data; D3D12_BOX box; - HRESULT hr;
TRACE("iface %p, src_data %p, src_row_pitch %u, src_slice_pitch %u, " "dst_sub_resource %u, dst_box %s.\n", @@ -1381,20 +1369,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_WriteToSubresource(ID3D12Resourc TRACE("Offset %#"PRIx64", size %#"PRIx64", row pitch %#"PRIx64", depth pitch %#"PRIx64".\n", vk_layout.offset, vk_layout.size, vk_layout.rowPitch, vk_layout.depthPitch);
- if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, (void **)&dst_data))) - { - WARN("Failed to map resource %p, hr %#x.\n", resource, hr); - return hr; - } - - dst_data += vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + dst_data = d3d12_resource_get_map_ptr(resource); + dst_offset = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, vk_layout.depthPitch, dst_box->left, dst_box->top, dst_box->front); + dst_size = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + vk_layout.depthPitch, dst_box->right, dst_box->bottom - 1, dst_box->back - 1) - dst_offset;
vkd3d_format_copy_data(format, src_data, src_row_pitch, src_slice_pitch, - dst_data, vk_layout.rowPitch, vk_layout.depthPitch, dst_box->right - dst_box->left, + dst_data + dst_offset, vk_layout.rowPitch, vk_layout.depthPitch, dst_box->right - dst_box->left, dst_box->bottom - dst_box->top, dst_box->back - dst_box->front);
- d3d12_heap_unmap(resource->heap, resource); + d3d12_resource_flush(resource, dst_offset, dst_size);
return S_OK; } @@ -1408,10 +1393,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour VkImageSubresource vk_sub_resource; const struct vkd3d_format *format; VkSubresourceLayout vk_layout; + uint64_t src_offset, src_size; struct d3d12_device *device; uint8_t *src_data; D3D12_BOX box; - HRESULT hr;
TRACE("iface %p, dst_data %p, dst_row_pitch %u, dst_slice_pitch %u, " "src_sub_resource %u, src_box %s.\n", @@ -1469,21 +1454,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour TRACE("Offset %#"PRIx64", size %#"PRIx64", row pitch %#"PRIx64", depth pitch %#"PRIx64".\n", vk_layout.offset, vk_layout.size, vk_layout.rowPitch, vk_layout.depthPitch);
- if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, (void **)&src_data))) - { - WARN("Failed to map resource %p, hr %#x.\n", resource, hr); - return hr; - } - - src_data += vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + src_data = d3d12_resource_get_map_ptr(resource); + src_offset = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, vk_layout.depthPitch, src_box->left, src_box->top, src_box->front); + src_size = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + vk_layout.depthPitch, src_box->right, src_box->bottom - 1, src_box->back - 1) - src_offset;
- vkd3d_format_copy_data(format, src_data, vk_layout.rowPitch, vk_layout.depthPitch, + d3d12_resource_invalidate(resource, src_offset, src_size); + + vkd3d_format_copy_data(format, src_data + src_offset, vk_layout.rowPitch, vk_layout.depthPitch, dst_data, dst_row_pitch, dst_slice_pitch, src_box->right - src_box->left, src_box->bottom - src_box->top, src_box->back - src_box->front);
- d3d12_heap_unmap(resource->heap, resource); - return S_OK; }
@@ -1941,6 +1923,7 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, { resource->heap = heap; resource->heap_offset = heap_offset; + InterlockedIncrement(&heap->resource_count); } else { @@ -2061,24 +2044,72 @@ ULONG vkd3d_resource_decref(ID3D12Resource *resource) return d3d12_resource_decref(impl_from_ID3D12Resource(resource)); }
-/* CBVs, SRVs, UAVs */ -static struct vkd3d_view *vkd3d_view_create(enum vkd3d_view_type type) +/* Objects are cached so that vkd3d_view_incref() can safely check the refcount + * of an object freed by another thread. */ +static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) { - struct vkd3d_view *view; + union d3d12_desc_object u; + void *next;
- if ((view = vkd3d_malloc(sizeof(*view)))) + do { - view->refcount = 1; - view->type = type; - view->serial_id = InterlockedIncrement64(&object_global_serial_id); - view->vk_counter_view = VK_NULL_HANDLE; + u.object = cache->head; + if (!u.object) + return vkd3d_malloc(cache->size); + next = u.header->next; } - return view; + while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, u.object, next)); + + return u.object; +} + +static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, void *object) +{ + union d3d12_desc_object u = {object}; + void *head; + + do + { + head = cache->head; + u.header->next = head; + } + while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, head, u.object)); +} + +static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device *device) +{ + struct vkd3d_cbuffer_desc *desc; + + if (!(desc = vkd3d_desc_object_cache_get(&device->cbuffer_desc_cache))) + return NULL; + + desc->h.magic = VKD3D_DESCRIPTOR_MAGIC_CBV; + desc->h.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + desc->h.refcount = 1; + + return desc; }
-void vkd3d_view_incref(struct vkd3d_view *view) +static struct vkd3d_view *vkd3d_view_create(uint32_t magic, VkDescriptorType vk_descriptor_type, + enum vkd3d_view_type type, struct d3d12_device *device) { - InterlockedIncrement(&view->refcount); + struct vkd3d_view *view; + + assert(magic); + + if (!(view = vkd3d_desc_object_cache_get(&device->view_desc_cache))) + { + ERR("Failed to allocate descriptor object.\n"); + return NULL; + } + + view->h.magic = magic; + view->h.vk_descriptor_type = vk_descriptor_type; + view->h.refcount = 1; + view->v.type = type; + view->v.vk_counter_view = VK_NULL_HANDLE; + + return view; }
static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *device) @@ -2087,313 +2118,299 @@ static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *dev
TRACE("Destroying view %p.\n", view);
- switch (view->type) + switch (view->v.type) { case VKD3D_VIEW_TYPE_BUFFER: - VK_CALL(vkDestroyBufferView(device->vk_device, view->u.vk_buffer_view, NULL)); + VK_CALL(vkDestroyBufferView(device->vk_device, view->v.u.vk_buffer_view, NULL)); break; case VKD3D_VIEW_TYPE_IMAGE: - VK_CALL(vkDestroyImageView(device->vk_device, view->u.vk_image_view, NULL)); + VK_CALL(vkDestroyImageView(device->vk_device, view->v.u.vk_image_view, NULL)); break; case VKD3D_VIEW_TYPE_SAMPLER: - VK_CALL(vkDestroySampler(device->vk_device, view->u.vk_sampler, NULL)); + VK_CALL(vkDestroySampler(device->vk_device, view->v.u.vk_sampler, NULL)); break; default: - WARN("Unhandled view type %d.\n", view->type); + WARN("Unhandled view type %d.\n", view->v.type); }
- if (view->vk_counter_view) - VK_CALL(vkDestroyBufferView(device->vk_device, view->vk_counter_view, NULL)); + if (view->v.vk_counter_view) + VK_CALL(vkDestroyBufferView(device->vk_device, view->v.vk_counter_view, NULL));
- vkd3d_free(view); + vkd3d_desc_object_cache_push(&device->view_desc_cache, view); }
-void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) +void vkd3d_view_decref(void *view, struct d3d12_device *device) { - if (!InterlockedDecrement(&view->refcount)) - vkd3d_view_destroy(view, device); + union d3d12_desc_object u = {view}; + + if (vkd3d_atomic_decrement(&u.header->refcount)) + return; + + if (u.header->magic != VKD3D_DESCRIPTOR_MAGIC_CBV) + vkd3d_view_destroy(u.view, device); + else + vkd3d_desc_object_cache_push(&device->cbuffer_desc_cache, u.object); }
-/* TODO: write null descriptors to all applicable sets (invalid behaviour workaround). */ -static void d3d12_descriptor_heap_write_vk_descriptor_range(struct d3d12_descriptor_heap_vk_set *descriptor_set, - struct d3d12_desc_copy_location *locations, unsigned int write_count) +static inline void d3d12_desc_replace(struct d3d12_desc *dst, void *view, struct d3d12_device *device) { - unsigned int i, info_index = 0, write_index = 0; + if ((view = vkd3d_atomic_exchange_pointer(&dst->s.u.object, view))) + vkd3d_view_decref(view, device); +}
- switch (locations[0].src.s.vk_descriptor_type) - { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pBufferInfo = &descriptor_set->vk_buffer_infos[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_buffer_infos[info_index] = locations[info_index].src.s.u.vk_cbv_info; - } - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_image_infos[info_index].imageView = locations[info_index].src.s.u.view_info.view->u.vk_image_view; - } - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pTexelBufferView = &descriptor_set->vk_buffer_views[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_buffer_views[info_index] = locations[info_index].src.s.u.view_info.view->u.vk_buffer_view; - } - break; - case VK_DESCRIPTOR_TYPE_SAMPLER: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_image_infos[info_index].sampler = locations[info_index].src.s.u.view_info.view->u.vk_sampler; - } - break; - default: - ERR("Unhandled descriptor type %#x.\n", locations[0].src.s.vk_descriptor_type); - break; - } +#define VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE 24 + +struct descriptor_writes +{ + VkDescriptorBufferInfo null_vk_cbv_info; + VkBufferView null_vk_buffer_view; + VkDescriptorImageInfo vk_image_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + VkWriteDescriptorSet vk_descriptor_writes[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + void *held_refs[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + unsigned int count; + unsigned int held_ref_count; +}; + +static void descriptor_writes_free_object_refs(struct descriptor_writes *writes, struct d3d12_device *device) +{ + unsigned int i; + for (i = 0; i < writes->held_ref_count; ++i) + vkd3d_view_decref(writes->held_refs[i], device); + writes->held_ref_count = 0; }
static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_heap *descriptor_heap, - uint32_t dst_array_element, const struct d3d12_device *device) + uint32_t dst_array_element, struct descriptor_writes *writes, struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct d3d12_descriptor_heap_vk_set *descriptor_set; - VkBufferView vk_buffer_view = VK_NULL_HANDLE; - enum vkd3d_vk_descriptor_set_index i; - VkDescriptorBufferInfo vk_cbv_info; - - vk_cbv_info.buffer = VK_NULL_HANDLE; - vk_cbv_info.offset = 0; - vk_cbv_info.range = VK_WHOLE_SIZE; + enum vkd3d_vk_descriptor_set_index set; + unsigned int i = writes->count;
/* Binding a shader with the wrong null descriptor type works in Windows. * To support that here we must write one to all applicable Vulkan sets. */ - for (i = VKD3D_SET_INDEX_UNIFORM_BUFFER; i <= VKD3D_SET_INDEX_STORAGE_IMAGE; ++i) - { - descriptor_set = &descriptor_heap->vk_descriptor_sets[i]; - descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst_array_element; - descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; - switch (i) + for (set = VKD3D_SET_INDEX_UNIFORM_BUFFER; set <= VKD3D_SET_INDEX_STORAGE_IMAGE; ++set) + { + descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; + writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes->vk_descriptor_writes[i].pNext = NULL; + writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; + writes->vk_descriptor_writes[i].dstBinding = 0; + writes->vk_descriptor_writes[i].dstArrayElement = dst_array_element; + writes->vk_descriptor_writes[i].descriptorCount = 1; + writes->vk_descriptor_writes[i].descriptorType = descriptor_set->vk_type; + switch (set) { case VKD3D_SET_INDEX_UNIFORM_BUFFER: - descriptor_set->vk_descriptor_writes[0].pBufferInfo = &vk_cbv_info; + writes->vk_descriptor_writes[i].pImageInfo = NULL; + writes->vk_descriptor_writes[i].pBufferInfo = &writes->null_vk_cbv_info; + writes->vk_descriptor_writes[i].pTexelBufferView = NULL; break; case VKD3D_SET_INDEX_SAMPLED_IMAGE: case VKD3D_SET_INDEX_STORAGE_IMAGE: - descriptor_set->vk_image_infos[0].imageView = VK_NULL_HANDLE; + writes->vk_descriptor_writes[i].pImageInfo = &writes->vk_image_infos[i]; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i].pTexelBufferView = NULL; + writes->vk_image_infos[i].sampler = VK_NULL_HANDLE; + writes->vk_image_infos[i].imageView = VK_NULL_HANDLE; + writes->vk_image_infos[i].imageLayout = (set == VKD3D_SET_INDEX_STORAGE_IMAGE) + ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; break; case VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER: case VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER: - descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &vk_buffer_view; + writes->vk_descriptor_writes[i].pImageInfo = NULL; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i].pTexelBufferView = &writes->null_vk_buffer_view; break; default: assert(false); break; } - VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); + if (++i < ARRAY_SIZE(writes->vk_descriptor_writes) - 1) + continue; + VK_CALL(vkUpdateDescriptorSets(device->vk_device, i, writes->vk_descriptor_writes, 0, NULL)); + descriptor_writes_free_object_refs(writes, device); + i = 0; } + + writes->count = i; }
-/* dst and src contain the same data unless another thread overwrites dst. The array index is - * calculated from dst, and src is thread safe. */ -static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_device *device) +static void d3d12_desc_write_vk_heap(struct d3d12_descriptor_heap *descriptor_heap, unsigned int dst_array_element, + struct descriptor_writes *writes, void *object, struct d3d12_device *device) { struct d3d12_descriptor_heap_vk_set *descriptor_set; - struct d3d12_descriptor_heap *descriptor_heap; const struct vkd3d_vk_device_procs *vk_procs; + union d3d12_desc_object u = {object}; + unsigned int i = writes->count; + VkDescriptorType type; bool is_null = false;
- descriptor_heap = d3d12_desc_get_descriptor_heap(dst); - descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( - src->s.vk_descriptor_type)]; + type = u.header->vk_descriptor_type; + descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(type)]; vk_procs = &device->vk_procs;
- vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); - - descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst->index; - descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; - switch (src->s.vk_descriptor_type) + writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes->vk_descriptor_writes[i].pNext = NULL; + writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; + writes->vk_descriptor_writes[i].dstBinding = 0; + writes->vk_descriptor_writes[i].dstArrayElement = dst_array_element; + writes->vk_descriptor_writes[i].descriptorCount = 1; + writes->vk_descriptor_writes[i].descriptorType = type; + switch (type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - descriptor_set->vk_descriptor_writes[0].pBufferInfo = &src->s.u.vk_cbv_info; - is_null = !src->s.u.vk_cbv_info.buffer; + writes->vk_descriptor_writes[i].pImageInfo = NULL; + writes->vk_descriptor_writes[i].pBufferInfo = &u.cb_desc->vk_cbv_info; + writes->vk_descriptor_writes[i].pTexelBufferView = NULL; + is_null = !u.cb_desc->vk_cbv_info.buffer; break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - is_null = !(descriptor_set->vk_image_infos[0].imageView = src->s.u.view_info.view->u.vk_image_view); + writes->vk_descriptor_writes[i].pImageInfo = &writes->vk_image_infos[i]; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i].pTexelBufferView = NULL; + writes->vk_image_infos[i].sampler = VK_NULL_HANDLE; + is_null = !(writes->vk_image_infos[i].imageView = u.view->v.u.vk_image_view); + writes->vk_image_infos[i].imageLayout = (type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) + ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->s.u.view_info.view->u.vk_buffer_view; - is_null = !src->s.u.view_info.view->u.vk_buffer_view; + writes->vk_descriptor_writes[i].pImageInfo = NULL; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i].pTexelBufferView = &u.view->v.u.vk_buffer_view; + is_null = !u.view->v.u.vk_buffer_view; break; case VK_DESCRIPTOR_TYPE_SAMPLER: - descriptor_set->vk_image_infos[0].sampler = src->s.u.view_info.view->u.vk_sampler; + writes->vk_descriptor_writes[i].pImageInfo = &writes->vk_image_infos[i]; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i].pTexelBufferView = NULL; + writes->vk_image_infos[i].sampler = u.view->v.u.vk_sampler; + writes->vk_image_infos[i].imageView = VK_NULL_HANDLE; + writes->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; break; default: - ERR("Unhandled descriptor type %#x.\n", src->s.vk_descriptor_type); + ERR("Unhandled descriptor type %#x.\n", type); break; } if (is_null && device->vk_info.EXT_robustness2) + return d3d12_desc_write_vk_heap_null_descriptor(descriptor_heap, dst_array_element, writes, device); + + ++i; + if (u.header->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && u.view->v.vk_counter_view) { - d3d12_desc_write_vk_heap_null_descriptor(descriptor_heap, - descriptor_set->vk_descriptor_writes[0].dstArrayElement, device); - vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); - return; + descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; + writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes->vk_descriptor_writes[i].pNext = NULL; + writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; + writes->vk_descriptor_writes[i].dstBinding = 0; + writes->vk_descriptor_writes[i].dstArrayElement = dst_array_element; + writes->vk_descriptor_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + writes->vk_descriptor_writes[i].descriptorCount = 1; + writes->vk_descriptor_writes[i].pImageInfo = NULL; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i++].pTexelBufferView = &u.view->v.vk_counter_view; }
- VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); - - if (src->s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->s.u.view_info.view->vk_counter_view) + if (i >= ARRAY_SIZE(writes->vk_descriptor_writes) - 1) { - descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; - descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst->index; - descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; - descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->s.u.view_info.view->vk_counter_view; - VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); + VK_CALL(vkUpdateDescriptorSets(device->vk_device, i, writes->vk_descriptor_writes, 0, NULL)); + descriptor_writes_free_object_refs(writes, device); + i = 0; }
- vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); + writes->count = i; }
-static void d3d12_desc_write_atomic_d3d12_only(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) +void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) { - struct vkd3d_view *defunct_view; - struct vkd3d_mutex *mutex; - - mutex = d3d12_device_get_descriptor_mutex(device, dst); - vkd3d_mutex_lock(mutex); + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct d3d12_desc *descriptors, *src; + struct descriptor_writes writes; + union d3d12_desc_object u; + unsigned int i, next;
- if (!(dst->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) || InterlockedDecrement(&dst->s.u.view_info.view->refcount)) - { - d3d12_desc_copy_raw(dst, src); - vkd3d_mutex_unlock(mutex); + if ((i = vkd3d_atomic_exchange(&descriptor_heap->dirty_list_head, UINT_MAX)) == UINT_MAX) return; - }
- defunct_view = dst->s.u.view_info.view; - d3d12_desc_copy_raw(dst, src); - vkd3d_mutex_unlock(mutex); + writes.null_vk_cbv_info.buffer = VK_NULL_HANDLE; + writes.null_vk_cbv_info.offset = 0; + writes.null_vk_cbv_info.range = VK_WHOLE_SIZE; + writes.null_vk_buffer_view = VK_NULL_HANDLE; + writes.count = 0; + writes.held_ref_count = 0;
- /* Destroy the view after unlocking to reduce wait time. */ - vkd3d_view_destroy(defunct_view, device); -} - -void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_device *device) -{ - struct vkd3d_view *defunct_view = NULL; - struct vkd3d_mutex *mutex; + descriptors = (struct d3d12_desc *)descriptor_heap->descriptors;
- mutex = d3d12_device_get_descriptor_mutex(device, dst); - vkd3d_mutex_lock(mutex); + for (; i != UINT_MAX; i = next) + { + src = &descriptors[i]; + next = (int)src->next >> 1;
- /* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ - if ((dst->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - && !InterlockedDecrement(&dst->s.u.view_info.view->refcount)) - defunct_view = dst->s.u.view_info.view; + u.object = d3d12_desc_get_object_ref(src, device);
- d3d12_desc_copy_raw(dst, src); + if (!u.object) + { + vkd3d_atomic_exchange(&src->next, 0); + continue; + }
- vkd3d_mutex_unlock(mutex); + writes.held_refs[writes.held_ref_count++] = u.object; + d3d12_desc_write_vk_heap(descriptor_heap, i, &writes, u.object, device);
- /* Destroy the view after unlocking to reduce wait time. */ - if (defunct_view) - vkd3d_view_destroy(defunct_view, device); + vkd3d_atomic_exchange(&src->next, 0); + }
- if (device->use_vk_heaps && dst->s.magic) - d3d12_desc_write_vk_heap(dst, src, device); + /* Avoid thunk calls wherever possible. */ + if (writes.count) + VK_CALL(vkUpdateDescriptorSets(device->vk_device, writes.count, writes.vk_descriptor_writes, 0, NULL)); + descriptor_writes_free_object_refs(&writes, device); }
-static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) +static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst) { - static const struct d3d12_desc null_desc = {0}; + struct d3d12_descriptor_heap *descriptor_heap; + unsigned int i, head; + + i = dst->index; + descriptor_heap = d3d12_desc_get_descriptor_heap(dst); + head = descriptor_heap->dirty_list_head;
- d3d12_desc_write_atomic(descriptor, &null_desc, device); + /* Only one thread can swap the value away from zero. */ + if (!vkd3d_atomic_compare_exchange(&dst->next, 0, (head << 1) | 1)) + return; + /* Now it is safe to modify 'next' to another nonzero value if necessary. */ + while (!vkd3d_atomic_compare_exchange(&descriptor_heap->dirty_list_head, head, i)) + { + head = descriptor_heap->dirty_list_head; + vkd3d_atomic_exchange(&dst->next, (head << 1) | 1); + } }
-void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, - struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, +void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { - struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - unsigned int i, write_count; - - vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); - - for (i = 0, write_count = 0; i < info->count; ++i) - { - d3d12_desc_write_atomic_d3d12_only(locations[i].dst, &locations[i].src, device); + void *object = src->s.u.object;
- if (i && locations[i].dst == locations[i - 1].dst + 1) - { - ++descriptor_set->vk_descriptor_writes[write_count - 1].descriptorCount; - continue; - } - /* Accessing dst->index will be slow if a cache miss occurs, so calculate instead. */ - descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst - - (const struct d3d12_desc *)descriptor_heap->descriptors; - descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1; - } - d3d12_descriptor_heap_write_vk_descriptor_range(descriptor_set, locations, write_count); - /* We could pass a VkCopyDescriptorSet array instead, but that would require also storing a src array index - * for each location, which means querying the src descriptor heap. Contiguous copies require contiguous src - * descriptors as well as dst, which is less likely to occur. And client race conditions may break it. */ - VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL)); - - if (!info->uav_counter) - goto done; - - descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; - - for (i = 0, write_count = 0; i < info->count; ++i) - { - if (!locations[i].src.s.u.view_info.view->vk_counter_view) - continue; - descriptor_set->vk_buffer_views[write_count] = locations[i].src.s.u.view_info.view->vk_counter_view; - descriptor_set->vk_descriptor_writes[write_count].pTexelBufferView = &descriptor_set->vk_buffer_views[write_count]; - /* Accessing dst->index will be slow if a cache miss occurs, so calculate instead. */ - descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst - - (const struct d3d12_desc *)descriptor_heap->descriptors; - descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1; - } - VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL)); + d3d12_desc_replace(dst, object, device); + if (device->use_vk_heaps && object && !dst->next) + d3d12_desc_mark_as_modified(dst); +}
-done: - vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); +static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) +{ + d3d12_desc_replace(descriptor, NULL, device); }
void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { struct d3d12_desc tmp; - struct vkd3d_mutex *mutex;
assert(dst != src);
- /* Shadow of the Tomb Raider and possibly other titles sometimes destroy - * and rewrite a descriptor in another thread while it is being copied. */ - mutex = d3d12_device_get_descriptor_mutex(device, src); - vkd3d_mutex_lock(mutex); - - if (src->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - vkd3d_view_incref(src->s.u.view_info.view); - - d3d12_desc_copy_raw(&tmp, src); - - vkd3d_mutex_unlock(mutex); - + tmp.s.u.object = d3d12_desc_get_object_ref(src, device); d3d12_desc_write_atomic(dst, &tmp, device); }
@@ -2455,8 +2472,9 @@ static bool vkd3d_create_vk_buffer_view(struct d3d12_device *device, return vr == VK_SUCCESS; }
-bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, const struct vkd3d_format *format, - VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view) +bool vkd3d_create_buffer_view(struct d3d12_device *device, uint32_t magic, VkBuffer vk_buffer, + const struct vkd3d_format *format, VkDeviceSize offset, VkDeviceSize size, + struct vkd3d_view **view) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkBufferView vk_view = VK_NULL_HANDLE; @@ -2465,16 +2483,18 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c if (vk_buffer && !vkd3d_create_vk_buffer_view(device, vk_buffer, format, offset, size, &vk_view)) return false;
- if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_BUFFER))) + if (!(object = vkd3d_view_create(magic, magic == VKD3D_DESCRIPTOR_MAGIC_UAV + ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + VKD3D_VIEW_TYPE_BUFFER, device))) { VK_CALL(vkDestroyBufferView(device->vk_device, vk_view, NULL)); return false; }
- object->u.vk_buffer_view = vk_view; - object->format = format; - object->info.buffer.offset = offset; - object->info.buffer.size = size; + object->v.u.vk_buffer_view = vk_view; + object->v.format = format; + object->v.info.buffer.offset = offset; + object->v.info.buffer.size = size; *view = object; return true; } @@ -2482,7 +2502,7 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c #define VKD3D_VIEW_RAW_BUFFER 0x1
static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, - struct d3d12_resource *resource, DXGI_FORMAT view_format, + uint32_t magic, struct d3d12_resource *resource, DXGI_FORMAT view_format, unsigned int offset, unsigned int size, unsigned int structure_stride, unsigned int flags, struct vkd3d_view **view) { @@ -2513,7 +2533,7 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device,
assert(d3d12_resource_is_buffer(resource));
- return vkd3d_create_buffer_view(device, resource->u.vk_buffer, + return vkd3d_create_buffer_view(device, magic, resource->u.vk_buffer, format, offset * element_size, size * element_size, view); }
@@ -2741,7 +2761,7 @@ static void vkd3d_texture_view_desc_normalise(struct vkd3d_texture_view_desc *de desc->layer_count = max_layer_count; }
-bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, +bool vkd3d_create_texture_view(struct d3d12_device *device, uint32_t magic, VkImage vk_image, const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; @@ -2774,18 +2794,19 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, } }
- if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_IMAGE))) + if (!(object = vkd3d_view_create(magic, magic == VKD3D_DESCRIPTOR_MAGIC_UAV ? VK_DESCRIPTOR_TYPE_STORAGE_IMAGE + : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VKD3D_VIEW_TYPE_IMAGE, device))) { VK_CALL(vkDestroyImageView(device->vk_device, vk_view, NULL)); return false; }
- object->u.vk_image_view = vk_view; - object->format = format; - object->info.texture.vk_view_type = desc->view_type; - object->info.texture.miplevel_idx = desc->miplevel_idx; - object->info.texture.layer_idx = desc->layer_idx; - object->info.texture.layer_count = desc->layer_count; + object->v.u.vk_image_view = vk_view; + object->v.format = format; + object->v.info.texture.vk_view_type = desc->view_type; + object->v.info.texture.miplevel_idx = desc->miplevel_idx; + object->v.info.texture.layer_idx = desc->layer_idx; + object->v.info.texture.layer_count = desc->layer_count; *view = object; return true; } @@ -2794,6 +2815,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc) { struct VkDescriptorBufferInfo *buffer_info; + struct vkd3d_cbuffer_desc *cb_desc; struct d3d12_resource *resource;
if (!desc) @@ -2802,13 +2824,19 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, return; }
+ if (!(cb_desc = vkd3d_cbuffer_desc_create(device))) + { + ERR("Failed to allocate descriptor object.\n"); + return; + } + if (desc->SizeInBytes & (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1)) { WARN("Size is not %u bytes aligned.\n", D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); return; }
- buffer_info = &descriptor->s.u.vk_cbv_info; + buffer_info = &cb_desc->vk_cbv_info; if (desc->BufferLocation) { resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, desc->BufferLocation); @@ -2824,8 +2852,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, buffer_info->range = VK_WHOLE_SIZE; }
- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_CBV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor->s.u.cb_desc = cb_desc; }
static unsigned int vkd3d_view_flags_from_d3d12_buffer_srv_flags(D3D12_BUFFER_SRV_FLAGS flags) @@ -2842,7 +2869,6 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, { struct vkd3d_null_resources *null_resources = &device->null_resources; struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; VkImage vk_image;
if (!desc) @@ -2857,15 +2883,9 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, if (!device->vk_info.EXT_robustness2) WARN("Creating NULL buffer SRV %#x.\n", desc->Format);
- if (vkd3d_create_buffer_view(device, null_resources->vk_buffer, + vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_SRV, null_resources->vk_buffer, vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false), - 0, VKD3D_NULL_BUFFER_SIZE, &view)) - { - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; - } + 0, VKD3D_NULL_BUFFER_SIZE, &descriptor->s.u.view); return;
case D3D12_SRV_DIMENSION_TEXTURE2D: @@ -2904,20 +2924,13 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_ZERO; vkd3d_desc.allowed_swizzle = true;
- if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) - return; - - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_SRV, vk_image, &vkd3d_desc, &descriptor->s.u.view); }
static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, struct d3d12_device *device, struct d3d12_resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) { - struct vkd3d_view *view; unsigned int flags;
if (!desc) @@ -2933,15 +2946,9 @@ static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, }
flags = vkd3d_view_flags_from_d3d12_buffer_srv_flags(desc->u.Buffer.Flags); - if (!vkd3d_create_buffer_view_for_resource(device, resource, desc->Format, + vkd3d_create_buffer_view_for_resource(device, VKD3D_DESCRIPTOR_MAGIC_SRV, resource, desc->Format, desc->u.Buffer.FirstElement, desc->u.Buffer.NumElements, - desc->u.Buffer.StructureByteStride, flags, &view)) - return; - - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; + desc->u.Buffer.StructureByteStride, flags, &descriptor->s.u.view); }
static VkImageAspectFlags vk_image_aspect_flags_from_d3d12_plane_slice(const struct vkd3d_format *format, @@ -2970,7 +2977,6 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) { struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view;
if (!resource) { @@ -3002,6 +3008,11 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor,
switch (desc->ViewDimension) { + case D3D12_SRV_DIMENSION_TEXTURE1D: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_1D; + vkd3d_desc.miplevel_idx = desc->u.Texture1D.MostDetailedMip; + vkd3d_desc.miplevel_count = desc->u.Texture1D.MipLevels; + break; case D3D12_SRV_DIMENSION_TEXTURE2D: vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D; vkd3d_desc.miplevel_idx = desc->u.Texture2D.MostDetailedMip; @@ -3066,13 +3077,8 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, } }
- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) - return; - - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_SRV, resource->u.vk_image, &vkd3d_desc, + &descriptor->s.u.view); }
static unsigned int vkd3d_view_flags_from_d3d12_buffer_uav_flags(D3D12_BUFFER_UAV_FLAGS flags) @@ -3089,7 +3095,6 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, { struct vkd3d_null_resources *null_resources = &device->null_resources; struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; VkImage vk_image;
if (!desc) @@ -3104,15 +3109,9 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, if (!device->vk_info.EXT_robustness2) WARN("Creating NULL buffer UAV %#x.\n", desc->Format);
- if (vkd3d_create_buffer_view(device, null_resources->vk_storage_buffer, + vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, null_resources->vk_storage_buffer, vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false), - 0, VKD3D_NULL_BUFFER_SIZE, &view)) - { - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; - } + 0, VKD3D_NULL_BUFFER_SIZE, &descriptor->s.u.view); return;
case D3D12_UAV_DIMENSION_TEXTURE2D: @@ -3150,13 +3149,7 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_A; vkd3d_desc.allowed_swizzle = false;
- if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) - return; - - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, vk_image, &vkd3d_desc, &descriptor->s.u.view); }
static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, @@ -3179,16 +3172,11 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ }
flags = vkd3d_view_flags_from_d3d12_buffer_uav_flags(desc->u.Buffer.Flags); - if (!vkd3d_create_buffer_view_for_resource(device, resource, desc->Format, + if (!vkd3d_create_buffer_view_for_resource(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource, desc->Format, desc->u.Buffer.FirstElement, desc->u.Buffer.NumElements, desc->u.Buffer.StructureByteStride, flags, &view)) return;
- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; - if (counter_resource) { const struct vkd3d_format *format; @@ -3198,13 +3186,16 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_
format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format, - desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->vk_counter_view)) + desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->v.vk_counter_view)) { WARN("Failed to create counter buffer view.\n"); - view->vk_counter_view = VK_NULL_HANDLE; - d3d12_desc_destroy(descriptor, device); + view->v.vk_counter_view = VK_NULL_HANDLE; + vkd3d_view_decref(view, device); + return; } } + + descriptor->s.u.view = view; }
static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, @@ -3212,7 +3203,6 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) { struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view;
if (!init_default_texture_view_desc(&vkd3d_desc, resource, desc ? desc->Format : 0)) return; @@ -3227,6 +3217,9 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, { switch (desc->ViewDimension) { + case D3D12_UAV_DIMENSION_TEXTURE1D: + vkd3d_desc.miplevel_idx = desc->u.Texture1D.MipSlice; + break; case D3D12_UAV_DIMENSION_TEXTURE2D: vkd3d_desc.miplevel_idx = desc->u.Texture2D.MipSlice; if (desc->u.Texture2D.PlaneSlice) @@ -3257,13 +3250,8 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, } }
- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) - return; - - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource->u.vk_image, &vkd3d_desc, + &descriptor->s.u.view); }
void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, @@ -3291,12 +3279,26 @@ void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *d }
bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, - D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view) + D3D12_GPU_VIRTUAL_ADDRESS gpu_address, D3D12_ROOT_PARAMETER_TYPE parameter_type, VkBufferView *vk_buffer_view) { const struct vkd3d_format *format; struct d3d12_resource *resource;
format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); + + if (!gpu_address) + { + if (device->vk_info.EXT_robustness2) + { + *vk_buffer_view = VK_NULL_HANDLE; + return true; + } + WARN("Creating null buffer view.\n"); + return vkd3d_create_vk_buffer_view(device, parameter_type == D3D12_ROOT_PARAMETER_TYPE_UAV + ? device->null_resources.vk_storage_buffer : device->null_resources.vk_buffer, + format, 0, VK_WHOLE_SIZE, vk_buffer_view); + } + resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, gpu_address); assert(d3d12_resource_is_buffer(resource)); return vkd3d_create_vk_buffer_view(device, resource->u.vk_buffer, format, @@ -3412,21 +3414,21 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, FIXME("Ignoring border color {%.8e, %.8e, %.8e, %.8e}.\n", desc->BorderColor[0], desc->BorderColor[1], desc->BorderColor[2], desc->BorderColor[3]);
- if (!(view = vkd3d_view_create(VKD3D_VIEW_TYPE_SAMPLER))) + if (!(view = vkd3d_view_create(VKD3D_DESCRIPTOR_MAGIC_SAMPLER, VK_DESCRIPTOR_TYPE_SAMPLER, + VKD3D_VIEW_TYPE_SAMPLER, device))) return; + view->v.u.vk_sampler = VK_NULL_HANDLE; + view->v.format = NULL;
if (d3d12_create_sampler(device, desc->Filter, desc->AddressU, desc->AddressV, desc->AddressW, desc->MipLODBias, desc->MaxAnisotropy, - desc->ComparisonFunc, desc->MinLOD, desc->MaxLOD, &view->u.vk_sampler) < 0) + desc->ComparisonFunc, desc->MinLOD, desc->MaxLOD, &view->v.u.vk_sampler) < 0) { - vkd3d_free(view); + vkd3d_view_decref(view, device); return; }
- sampler->s.magic = VKD3D_DESCRIPTOR_MAGIC_SAMPLER; - sampler->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLER; - sampler->s.u.view_info.view = view; - sampler->s.u.view_info.written_serial_id = view->serial_id; + sampler->s.u.view = view; }
HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, @@ -3448,7 +3450,7 @@ HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, /* RTVs */ static void d3d12_rtv_desc_destroy(struct d3d12_rtv_desc *rtv, struct d3d12_device *device) { - if (rtv->magic != VKD3D_DESCRIPTOR_MAGIC_RTV) + if (!rtv->view) return;
vkd3d_view_decref(rtv->view, device); @@ -3527,10 +3529,9 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev
assert(d3d12_resource_is_texture(resource));
- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) + if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_RTV, resource->u.vk_image, &vkd3d_desc, &view)) return;
- rtv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_RTV; rtv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); rtv_desc->format = vkd3d_desc.format; rtv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); @@ -3543,7 +3544,7 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev /* DSVs */ static void d3d12_dsv_desc_destroy(struct d3d12_dsv_desc *dsv, struct d3d12_device *device) { - if (dsv->magic != VKD3D_DESCRIPTOR_MAGIC_DSV) + if (!dsv->view) return;
vkd3d_view_decref(dsv->view, device); @@ -3612,10 +3613,9 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev
assert(d3d12_resource_is_texture(resource));
- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) + if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_DSV, resource->u.vk_image, &vkd3d_desc, &view)) return;
- dsv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_DSV; dsv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); dsv_desc->format = vkd3d_desc.format; dsv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); @@ -3883,7 +3883,6 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size; VkDescriptorSetAllocateInfo set_desc; - unsigned int i; VkResult vr;
set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; @@ -3897,8 +3896,7 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript set_size.pDescriptorCounts = &variable_binding_size; if ((vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &set_desc, &descriptor_set->vk_set))) >= 0) { - for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_descriptor_writes); ++i) - descriptor_set->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; + descriptor_set->vk_type = device->vk_descriptor_heap_layouts[set].type; return S_OK; }
@@ -3914,7 +3912,6 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri
descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE; memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets)); - vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex);
if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) @@ -3925,53 +3922,6 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri
for (set = 0; set < ARRAY_SIZE(descriptor_heap->vk_descriptor_sets); ++set) { - struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_descriptor_writes); ++i) - { - descriptor_set->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - descriptor_set->vk_descriptor_writes[i].pNext = NULL; - descriptor_set->vk_descriptor_writes[i].dstBinding = 0; - descriptor_set->vk_descriptor_writes[i].descriptorType = device->vk_descriptor_heap_layouts[set].type; - descriptor_set->vk_descriptor_writes[i].pImageInfo = NULL; - descriptor_set->vk_descriptor_writes[i].pBufferInfo = NULL; - descriptor_set->vk_descriptor_writes[i].pTexelBufferView = NULL; - } - switch (device->vk_descriptor_heap_layouts[set].type) - { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; - for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) - { - descriptor_set->vk_image_infos[i].sampler = VK_NULL_HANDLE; - descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - } - break; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; - for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) - { - descriptor_set->vk_image_infos[i].sampler = VK_NULL_HANDLE; - descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - break; - case VK_DESCRIPTOR_TYPE_SAMPLER: - descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; - for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) - { - descriptor_set->vk_image_infos[i].imageView = VK_NULL_HANDLE; - descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; - } - break; - default: - ERR("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type); - return E_FAIL; - } if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, set))) return hr; @@ -3995,6 +3945,7 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript return hr;
d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); + vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex);
d3d12_device_add_ref(descriptor_heap->device = device);
@@ -4047,7 +3998,9 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, { memset(&dst[i].s, 0, sizeof(dst[i].s)); dst[i].index = i; + dst[i].next = 0; } + object->dirty_list_head = UINT_MAX; } else { diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index c964ea8fe3a..5e46b467252 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -1958,7 +1958,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device,
const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_7}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_8}, {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, {VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE, 0}, }; @@ -2011,7 +2011,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER
const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_7}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_8}, {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, };
diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index 77b795d6278..e8d6371709c 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -44,13 +44,11 @@
#define VK_CALL(f) (vk_procs->f)
-#define VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW 0x01000000u - #define VKD3D_DESCRIPTOR_MAGIC_FREE 0x00000000u #define VKD3D_DESCRIPTOR_MAGIC_CBV VKD3D_MAKE_TAG('C', 'B', 'V', 0) -#define VKD3D_DESCRIPTOR_MAGIC_SRV VKD3D_MAKE_TAG('S', 'R', 'V', 1) -#define VKD3D_DESCRIPTOR_MAGIC_UAV VKD3D_MAKE_TAG('U', 'A', 'V', 1) -#define VKD3D_DESCRIPTOR_MAGIC_SAMPLER VKD3D_MAKE_TAG('S', 'M', 'P', 1) +#define VKD3D_DESCRIPTOR_MAGIC_SRV VKD3D_MAKE_TAG('S', 'R', 'V', 0) +#define VKD3D_DESCRIPTOR_MAGIC_UAV VKD3D_MAKE_TAG('U', 'A', 'V', 0) +#define VKD3D_DESCRIPTOR_MAGIC_SAMPLER VKD3D_MAKE_TAG('S', 'M', 'P', 0) #define VKD3D_DESCRIPTOR_MAGIC_DSV VKD3D_MAKE_TAG('D', 'S', 'V', 0) #define VKD3D_DESCRIPTOR_MAGIC_RTV VKD3D_MAKE_TAG('R', 'T', 'V', 0)
@@ -252,6 +250,31 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) { }
+static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) +{ + return InterlockedDecrement((LONG volatile *)x); +} + +static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) +{ + return InterlockedCompareExchange((LONG volatile *)x, xchg, cmp) == cmp; +} + +static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) +{ + return InterlockedExchange((LONG volatile *)x, val); +} + +static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) +{ + return InterlockedCompareExchangePointer(x, xchg, cmp) == cmp; +} + +static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) +{ + return InterlockedExchangePointer(x, val); +} + #else /* _WIN32 */
#include <pthread.h> @@ -354,6 +377,63 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) ERR("Could not destroy the condition variable, error %d.\n", ret); }
+# if HAVE_SYNC_SUB_AND_FETCH +static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) +{ + return __sync_sub_and_fetch(x, 1); +} +# else +# error "vkd3d_atomic_decrement() not implemented for this platform" +# endif /* HAVE_SYNC_ADD_AND_FETCH */ + +# if HAVE_SYNC_BOOL_COMPARE_AND_SWAP +static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) +{ + return __sync_bool_compare_and_swap(x, cmp, xchg); +} + +static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) +{ + return __sync_bool_compare_and_swap(x, cmp, xchg); +} +# else +# error "vkd3d_atomic_compare_exchange() not implemented for this platform" +# endif + +# if HAVE_ATOMIC_EXCHANGE_N +static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) +{ + return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); +} + +static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) +{ + return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); +} +# elif HAVE_SYNC_BOOL_COMPARE_AND_SWAP +static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) +{ + unsigned int i; + do + { + i = *x; + } while (!__sync_bool_compare_and_swap(x, i, val)); + return i; +} + +static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) +{ + void *p; + do + { + p = *x; + } while (!__sync_bool_compare_and_swap(x, p, val)); + return p; +} +# else +# error "vkd3d_atomic_exchange() not implemented for this platform" +# endif + #endif /* _WIN32 */
HRESULT vkd3d_create_thread(struct vkd3d_instance *instance, @@ -563,6 +643,7 @@ struct d3d12_heap { ID3D12Heap ID3D12Heap_iface; LONG refcount; + LONG resource_count;
bool is_private; D3D12_HEAP_DESC desc; @@ -661,11 +742,9 @@ enum vkd3d_view_type VKD3D_VIEW_TYPE_SAMPLER, };
-struct vkd3d_view +struct vkd3d_resource_view { - LONG refcount; enum vkd3d_view_type type; - uint64_t serial_id; union { VkBufferView vk_buffer_view; @@ -691,9 +770,6 @@ struct vkd3d_view } info; };
-void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device); -void vkd3d_view_incref(struct vkd3d_view *view); - struct vkd3d_texture_view_desc { VkImageViewType view_type; @@ -707,32 +783,88 @@ struct vkd3d_texture_view_desc bool allowed_swizzle; };
-bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, const struct vkd3d_format *format, - VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view); -bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, +struct vkd3d_desc_header +{ + uint32_t magic; + unsigned int volatile refcount; + void *next; + VkDescriptorType vk_descriptor_type; +}; + +struct vkd3d_view +{ + struct vkd3d_desc_header h; + struct vkd3d_resource_view v; +}; + +bool vkd3d_create_buffer_view(struct d3d12_device *device, uint32_t magic, VkBuffer vk_buffer, + const struct vkd3d_format *format, VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view); +bool vkd3d_create_texture_view(struct d3d12_device *device, uint32_t magic, VkImage vk_image, const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view);
-struct vkd3d_view_info +struct vkd3d_cbuffer_desc { - uint64_t written_serial_id; - struct vkd3d_view *view; + struct vkd3d_desc_header h; + VkDescriptorBufferInfo vk_cbv_info; };
struct d3d12_desc { struct { - uint32_t magic; - VkDescriptorType vk_descriptor_type; - union + union d3d12_desc_object { - VkDescriptorBufferInfo vk_cbv_info; - struct vkd3d_view_info view_info; + struct vkd3d_desc_header *header; + struct vkd3d_view *view; + struct vkd3d_cbuffer_desc *cb_desc; + void *object; } u; } s; unsigned int index; + unsigned int next; };
+void vkd3d_view_decref(void *view, struct d3d12_device *device); + +static inline bool vkd3d_view_incref(void *desc) +{ + struct vkd3d_desc_header *h = desc; + unsigned int refcount; + + do + { + refcount = h->refcount; + /* Avoid incrementing a freed object. Reading the value is safe because objects are recycled. */ + if (refcount <= 0) + return false; + } + while (!vkd3d_atomic_compare_exchange(&h->refcount, refcount, refcount + 1)); + + return true; +} + +static inline void *d3d12_desc_get_object_ref(const volatile struct d3d12_desc *src, struct d3d12_device *device) +{ + void *view; + + /* Some games, e.g. Shadow of the Tomb Raider, GRID 2019, and Horizon Zero Dawn, write descriptors + * from multiple threads without syncronisation. This is apparently valid in Windows. */ + for (;;) + { + do + { + view = src->s.u.object; + } while (view && !vkd3d_view_incref(view)); + + /* Check if the object is still in src to handle the case where it was + * already freed and reused elsewhere when the refcount was incremented. */ + if (view == src->s.u.object) + return view; + + vkd3d_view_decref(view, device); + } +} + static inline struct d3d12_desc *d3d12_desc_from_cpu_handle(D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle) { return (struct d3d12_desc *)cpu_handle.ptr; @@ -761,13 +893,12 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, struct d3d12_device * void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device);
bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, - D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view); + D3D12_GPU_VIRTUAL_ADDRESS gpu_address, D3D12_ROOT_PARAMETER_TYPE parameter_type, VkBufferView *vk_buffer_view); HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, const D3D12_STATIC_SAMPLER_DESC *desc, VkSampler *vk_sampler);
struct d3d12_rtv_desc { - uint32_t magic; VkSampleCountFlagBits sample_count; const struct vkd3d_format *format; uint64_t width; @@ -787,7 +918,6 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev
struct d3d12_dsv_desc { - uint32_t magic; VkSampleCountFlagBits sample_count; const struct vkd3d_format *format; uint64_t width; @@ -837,15 +967,10 @@ struct vkd3d_vk_descriptor_heap_layout VkDescriptorSetLayout vk_set_layout; };
-#define VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE 64 - struct d3d12_descriptor_heap_vk_set { VkDescriptorSet vk_set; - VkDescriptorBufferInfo vk_buffer_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; - VkBufferView vk_buffer_views[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; - VkDescriptorImageInfo vk_image_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; - VkWriteDescriptorSet vk_descriptor_writes[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + VkDescriptorType vk_type; };
/* ID3D12DescriptorHeap */ @@ -865,9 +990,13 @@ struct d3d12_descriptor_heap struct d3d12_descriptor_heap_vk_set vk_descriptor_sets[VKD3D_SET_INDEX_COUNT]; struct vkd3d_mutex vk_sets_mutex;
- BYTE descriptors[]; + unsigned int volatile dirty_list_head; + + uint8_t DECLSPEC_ALIGN(sizeof(void *)) descriptors[]; };
+void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device); + static inline struct d3d12_descriptor_heap *d3d12_desc_get_descriptor_heap(const struct d3d12_desc *descriptor) { return CONTAINING_RECORD(descriptor - descriptor->index, struct d3d12_descriptor_heap, descriptors); @@ -882,22 +1011,6 @@ static inline unsigned int d3d12_desc_heap_range_size(const struct d3d12_desc *d HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc, struct d3d12_descriptor_heap **descriptor_heap);
-struct d3d12_desc_copy_location -{ - struct d3d12_desc src; - struct d3d12_desc *dst; -}; - -struct d3d12_desc_copy_info -{ - unsigned int count; - bool uav_counter; -}; - -void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, - struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, - struct d3d12_device *device); - /* ID3D12QueryHeap */ struct d3d12_query_heap { @@ -1295,6 +1408,8 @@ struct d3d12_command_list VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT];
void (*update_descriptors)(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point); + struct d3d12_descriptor_heap *descriptor_heaps[64]; + unsigned int descriptor_heap_count;
struct vkd3d_private_store private_store; }; @@ -1485,6 +1600,12 @@ struct vkd3d_uav_clear_state HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device);
+struct vkd3d_desc_object_cache +{ + void * volatile head; + size_t size; +}; + #define VKD3D_DESCRIPTOR_POOL_COUNT 6
/* ID3D12Device */ @@ -1502,7 +1623,8 @@ struct d3d12_device struct vkd3d_gpu_va_allocator gpu_va_allocator;
struct vkd3d_mutex mutex; - struct vkd3d_mutex desc_mutex[8]; + struct vkd3d_desc_object_cache view_desc_cache; + struct vkd3d_desc_object_cache cbuffer_desc_cache; struct vkd3d_render_pass_cache render_pass_cache; VkPipelineCache vk_pipeline_cache;
@@ -1544,6 +1666,7 @@ struct d3d12_device struct vkd3d_uav_clear_state uav_clear_state;
VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; + unsigned int vk_pool_count; struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; bool use_vk_heaps; }; @@ -1577,19 +1700,6 @@ static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(str return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); }
-static inline struct vkd3d_mutex *d3d12_device_get_descriptor_mutex(struct d3d12_device *device, - const struct d3d12_desc *descriptor) -{ - STATIC_ASSERT(!(ARRAY_SIZE(device->desc_mutex) & (ARRAY_SIZE(device->desc_mutex) - 1))); - uintptr_t idx = (uintptr_t)descriptor; - - idx ^= idx >> 12; - idx ^= idx >> 6; - idx ^= idx >> 3; - - return &device->desc_mutex[idx & (ARRAY_SIZE(device->desc_mutex) - 1)]; -} - /* utils */ enum vkd3d_format_type {
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=134205
Your paranoid android.
=== debian11 (32 bit report) ===
d3dcompiler_43: hlsl_d3d9.c:1112: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}.
d3dcompiler_46: hlsl_d3d9.c:1112: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}.
d3dcompiler_47: hlsl_d3d9.c:1112: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}.
Report validation errors: d3dcompiler_43:hlsl_d3d9 has no test summary line (early exit of the main process?) d3dcompiler_43:hlsl_d3d9 has unaccounted for failure messages d3dcompiler_43:hlsl_d3d9 has unaccounted for todo messages d3dcompiler_46:hlsl_d3d9 has no test summary line (early exit of the main process?) d3dcompiler_46:hlsl_d3d9 has unaccounted for failure messages d3dcompiler_46:hlsl_d3d9 has unaccounted for todo messages d3dcompiler_47:hlsl_d3d9 has no test summary line (early exit of the main process?) d3dcompiler_47:hlsl_d3d9 has unaccounted for failure messages d3dcompiler_47:hlsl_d3d9 has unaccounted for todo messages
=== debian11 (32 bit ar:MA report) ===
d3dcompiler_43: hlsl_d3d9.c:1112: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}.
Report validation errors: d3dcompiler_43:hlsl_d3d9 has no test summary line (early exit of the main process?) d3dcompiler_43:hlsl_d3d9 has unaccounted for failure messages d3dcompiler_43:hlsl_d3d9 has unaccounted for todo messages
=== debian11 (32 bit de report) ===
d3dcompiler_43: hlsl_d3d9.c:1112: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}.
Report validation errors: d3dcompiler_43:hlsl_d3d9 has no test summary line (early exit of the main process?) d3dcompiler_43:hlsl_d3d9 has unaccounted for failure messages d3dcompiler_43:hlsl_d3d9 has unaccounted for todo messages
=== debian11 (32 bit fr report) ===
d3dcompiler_43: hlsl_d3d9.c:1112: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}.
Report validation errors: d3dcompiler_43:hlsl_d3d9 has no test summary line (early exit of the main process?) d3dcompiler_43:hlsl_d3d9 has unaccounted for failure messages d3dcompiler_43:hlsl_d3d9 has unaccounted for todo messages
=== debian11 (32 bit he:IL report) ===
d3dcompiler_43: hlsl_d3d9.c:1112: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}.
Report validation errors: d3dcompiler_43:hlsl_d3d9 has no test summary line (early exit of the main process?) d3dcompiler_43:hlsl_d3d9 has unaccounted for failure messages d3dcompiler_43:hlsl_d3d9 has unaccounted for todo messages
=== debian11 (32 bit hi:IN report) ===
d3dcompiler_43: hlsl_d3d9.c:1112: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}.
Report validation errors: d3dcompiler_43:hlsl_d3d9 has no test summary line (early exit of the main process?) d3dcompiler_43:hlsl_d3d9 has unaccounted for failure messages d3dcompiler_43:hlsl_d3d9 has unaccounted for todo messages
=== debian11 (32 bit ja:JP report) ===
d3dcompiler_43: hlsl_d3d9.c:1112: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}.
Report validation errors: d3dcompiler_43:hlsl_d3d9 has no test summary line (early exit of the main process?) d3dcompiler_43:hlsl_d3d9 has unaccounted for failure messages d3dcompiler_43:hlsl_d3d9 has unaccounted for todo messages
=== debian11 (32 bit zh:CN report) ===
d3dcompiler_43: hlsl_d3d9.c:1112: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}.
Report validation errors: d3dcompiler_43:hlsl_d3d9 has no test summary line (early exit of the main process?) d3dcompiler_43:hlsl_d3d9 has unaccounted for failure messages d3dcompiler_43:hlsl_d3d9 has unaccounted for todo messages
=== debian11b (32 bit WoW report) ===
d3dcompiler_43: hlsl_d3d9.c:1112: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}.
Report validation errors: d3dcompiler_43:hlsl_d3d9 has no test summary line (early exit of the main process?) d3dcompiler_43:hlsl_d3d9 has unaccounted for failure messages d3dcompiler_43:hlsl_d3d9 has unaccounted for todo messages
=== debian11b (64 bit WoW report) ===
d3dcompiler_43: hlsl_d3d9.c:1112: Test failed: Test 0: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 1: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}. hlsl_d3d9.c:1112: Test failed: Test 2: Got unexpected value {2.47058839e-001, 0.00000000e+000, 2.47058839e-001, 0.00000000e+000}.
Report validation errors: d3dcompiler_43:hlsl_d3d9 has no test summary line (early exit of the main process?) d3dcompiler_43:hlsl_d3d9 has unaccounted for failure messages d3dcompiler_43:hlsl_d3d9 has unaccounted for todo messages