From: Alexandre Julliard julliard@winehq.org
--- dlls/d3dcompiler_43/tests/hlsl_d3d9.c | 14 +- libs/vkd3d/include/private/vkd3d_common.h | 45 +- libs/vkd3d/include/private/vkd3d_debug.h | 147 - libs/vkd3d/include/vkd3d.h | 1 + libs/vkd3d/include/vkd3d_shader.h | 189 +- libs/vkd3d/libs/vkd3d-common/debug.c | 9 +- libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 84 +- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 199 +- libs/vkd3d/libs/vkd3d-shader/dxil.c | 1060 +++-- libs/vkd3d/libs/vkd3d-shader/fx.c | 4 +- libs/vkd3d/libs/vkd3d-shader/glsl.c | 172 +- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 152 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 62 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 3 +- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 300 +- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 2443 ++++++---- .../libs/vkd3d-shader/hlsl_constant_ops.c | 446 +- libs/vkd3d/libs/vkd3d-shader/ir.c | 3961 ++++++++++++----- libs/vkd3d/libs/vkd3d-shader/msl.c | 630 ++- libs/vkd3d/libs/vkd3d-shader/preproc.l | 12 +- libs/vkd3d/libs/vkd3d-shader/spirv.c | 1350 +++--- libs/vkd3d/libs/vkd3d-shader/tpf.c | 71 +- .../libs/vkd3d-shader/vkd3d_shader_main.c | 331 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 209 +- .../vkd3d/libs/vkd3d-utils/vkd3d_utils_main.c | 15 +- libs/vkd3d/libs/vkd3d/command.c | 92 +- libs/vkd3d/libs/vkd3d/device.c | 24 +- libs/vkd3d/libs/vkd3d/state.c | 8 +- libs/vkd3d/libs/vkd3d/utils.c | 3 +- libs/vkd3d/libs/vkd3d/vkd3d_main.c | 2 - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 5 +- 31 files changed, 7824 insertions(+), 4219 deletions(-) delete mode 100644 libs/vkd3d/include/private/vkd3d_debug.h
diff --git a/dlls/d3dcompiler_43/tests/hlsl_d3d9.c b/dlls/d3dcompiler_43/tests/hlsl_d3d9.c index 990c95be70d..d9d050c3f99 100644 --- a/dlls/d3dcompiler_43/tests/hlsl_d3d9.c +++ b/dlls/d3dcompiler_43/tests/hlsl_d3d9.c @@ -561,7 +561,6 @@ static void test_conditionals(void) return; device = test_context.device;
- todo_wine ps_code = compile_shader(ps_if_source, "ps_2_0", 0); if (ps_code) { @@ -571,15 +570,15 @@ static void test_conditionals(void) for (i = 0; i < 200; i += 40) { v = get_readback_vec4(&rb, i, 0); - todo_wine ok(compare_vec4(v, 0.9f, 0.8f, 0.7f, 0.6f, 0), - "Got unexpected value {%.8e, %.8e, %.8e, %.8e}.\n", v->x, v->y, v->z, v->w); + ok(compare_vec4(v, 0.9f, 0.8f, 0.7f, 0.6f, 0), + "Got unexpected value {%.8e, %.8e, %.8e, %.8e}.\n", v->x, v->y, v->z, v->w); }
for (i = 240; i < 640; i += 40) { v = get_readback_vec4(&rb, i, 0); - todo_wine ok(compare_vec4(v, 0.1f, 0.2f, 0.3f, 0.4f, 0), - "Got unexpected value {%.8e, %.8e, %.8e, %.8e}.\n", v->x, v->y, v->z, v->w); + ok(compare_vec4(v, 0.1f, 0.2f, 0.3f, 0.4f, 0), + "Got unexpected value {%.8e, %.8e, %.8e, %.8e}.\n", v->x, v->y, v->z, v->w); }
release_readback(&rb); @@ -1193,6 +1192,7 @@ static void check_constant_desc(const char *prefix, const D3DXCONSTANT_DESC *des ok(desc->Elements == expect->Elements, "%s: got Elements %u.\n", prefix, desc->Elements); ok(desc->StructMembers == expect->StructMembers, "%s: got StructMembers %u.\n", prefix, desc->StructMembers); ok(desc->Bytes == expect->Bytes, "%s: got Bytes %u.\n", prefix, desc->Bytes); + todo_wine_if(nonzero_defaultvalue) ok(!!desc->DefaultValue == nonzero_defaultvalue, "%s: got DefaultValue %p.\n", prefix, desc->DefaultValue); }
@@ -1287,9 +1287,7 @@ static void test_constant_table(void) static const D3DXCONSTANT_DESC expect_fields_j = {"a", D3DXRS_FLOAT4, 0, 3, D3DXPC_MATRIX_COLUMNS, D3DXPT_FLOAT, 3, 3, 1, 0, 36};
- todo_wine ps_code = compile_shader(source, "ps_2_0", 0); - if (!ps_code) - return; + ps_code = compile_shader(source, "ps_2_0", 0);
hr = pD3DXGetShaderConstantTable(ID3D10Blob_GetBufferPointer(ps_code), &constants); ok(hr == D3D_OK, "Got unexpected hr %#lx.\n", hr); diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index 08341304eea..7ee11b54396 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -19,6 +19,11 @@ #ifndef __VKD3D_COMMON_H #define __VKD3D_COMMON_H
+#ifndef VKD3D_DEBUG_ENV +#define VKD3D_DEBUG_ENV VKD3D_DEBUG +#endif +#define VKD3D_DEBUG_ENV_NAME VKD3D_EXPAND_AND_STRINGIFY(VKD3D_DEBUG_ENV) + #include "config.h" #define WIN32_LEAN_AND_MEAN #include "windows.h" @@ -38,6 +43,8 @@ #include <intrin.h> #endif
+#define VKD3D_SHADER_API_VERSION_CURRENT VKD3D_SHADER_API_VERSION_1_18 + #ifndef ARRAY_SIZE # define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) #endif @@ -147,9 +154,10 @@ enum vkd3d_dbg_level VKD3D_DBG_LEVEL_TRACE, };
-enum vkd3d_dbg_level vkd3d_dbg_get_level(void); +enum vkd3d_dbg_level vkd3d_dbg_get_level(const char *vkd3d_dbg_env_name);
-void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(3, 4); +void vkd3d_dbg_printf(const char *vkd3d_dbg_env_name, enum vkd3d_dbg_level level, + const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); void vkd3d_dbg_set_log_callback(PFN_vkd3d_log callback);
const char *vkd3d_dbg_sprintf(const char *fmt, ...) VKD3D_PRINTF_FUNC(1, 2); @@ -172,7 +180,7 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); VKD3D_DBG_PRINTF_##level
#define VKD3D_DBG_PRINTF(...) \ - vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0) + vkd3d_dbg_printf(VKD3D_DEBUG_ENV_NAME, vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0)
#define VKD3D_DBG_PRINTF_TRACE(...) VKD3D_DBG_PRINTF(__VA_ARGS__) #define VKD3D_DBG_PRINTF_WARN(...) VKD3D_DBG_PRINTF(__VA_ARGS__) @@ -181,7 +189,7 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size);
#ifdef VKD3D_ABORT_ON_ERR #define VKD3D_DBG_PRINTF_ERR(...) \ - vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); \ + vkd3d_dbg_printf(VKD3D_DEBUG_ENV_NAME, vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); \ abort(); \ } while (0) #else @@ -215,19 +223,17 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); #endif
#ifndef TRACE_ON -#define TRACE_ON() (vkd3d_dbg_get_level() == VKD3D_DBG_LEVEL_TRACE) +#define TRACE_ON() (vkd3d_dbg_get_level(VKD3D_DEBUG_ENV_NAME) == VKD3D_DBG_LEVEL_TRACE) #endif
#ifndef WARN_ON -#define WARN_ON() (vkd3d_dbg_get_level() >= VKD3D_DBG_LEVEL_WARN) +#define WARN_ON() (vkd3d_dbg_get_level(VKD3D_DEBUG_ENV_NAME) >= VKD3D_DBG_LEVEL_WARN) #endif
#ifndef FIXME_ONCE #define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) #endif
-#define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name - static inline const char *debugstr_guid(const GUID *guid) { if (!guid) @@ -341,6 +347,29 @@ static inline unsigned int vkd3d_log2i(unsigned int x) #endif }
+static inline unsigned int vkd3d_ctz(uint32_t v) +{ +#ifdef _WIN32 + ULONG result; + if (_BitScanForward(&result, v)) + return (unsigned int)result; + return 32; +#elif defined(HAVE_BUILTIN_CTZ) + return __builtin_ctz(v); +#else + unsigned int c = 31; + + v &= -v; + c = (v & 0x0000ffff) ? c - 16 : c; + c = (v & 0x00ff00ff) ? c - 8 : c; + c = (v & 0x0f0f0f0f) ? c - 4 : c; + c = (v & 0x33333333) ? c - 2 : c; + c = (v & 0x55555555) ? c - 1 : c; + + return c; +#endif +} + static inline void *vkd3d_memmem( const void *haystack, size_t haystack_len, const void *needle, size_t needle_len) { const char *str = haystack; diff --git a/libs/vkd3d/include/private/vkd3d_debug.h b/libs/vkd3d/include/private/vkd3d_debug.h deleted file mode 100644 index c5b6ccedf81..00000000000 --- a/libs/vkd3d/include/private/vkd3d_debug.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright 2016 Józef Kucia for CodeWeavers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#ifndef __VKD3D_DEBUG_H -#define __VKD3D_DEBUG_H - -#include "vkd3d_common.h" - -#include <stdarg.h> -#include <stdbool.h> -#include <stdint.h> - -#ifdef VKD3D_NO_TRACE_MESSAGES -#define TRACE(args...) do { } while (0) -#define TRACE_ON() (false) -#endif - -#ifdef VKD3D_NO_DEBUG_MESSAGES -#define WARN(args...) do { } while (0) -#define FIXME(args...) do { } while (0) -#endif - -enum vkd3d_dbg_level -{ - VKD3D_DBG_LEVEL_NONE, - VKD3D_DBG_LEVEL_ERR, - VKD3D_DBG_LEVEL_FIXME, - VKD3D_DBG_LEVEL_WARN, - VKD3D_DBG_LEVEL_TRACE, -}; - -enum vkd3d_dbg_level vkd3d_dbg_get_level(void); - -void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(3, 4); -void vkd3d_dbg_set_log_callback(PFN_vkd3d_log callback); - -const char *vkd3d_dbg_sprintf(const char *fmt, ...) VKD3D_PRINTF_FUNC(1, 2); -const char *vkd3d_dbg_vsprintf(const char *fmt, va_list args); -const char *debugstr_a(const char *str); -const char *debugstr_an(const char *str, size_t n); -const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); - -#define VKD3D_DBG_LOG(level) \ - do { \ - const enum vkd3d_dbg_level vkd3d_dbg_level = VKD3D_DBG_LEVEL_##level; \ - VKD3D_DBG_PRINTF - -#define VKD3D_DBG_LOG_ONCE(first_time_level, level) \ - do { \ - static bool vkd3d_dbg_next_time; \ - const enum vkd3d_dbg_level vkd3d_dbg_level = vkd3d_dbg_next_time \ - ? VKD3D_DBG_LEVEL_##level : VKD3D_DBG_LEVEL_##first_time_level; \ - vkd3d_dbg_next_time = true; \ - VKD3D_DBG_PRINTF - -#define VKD3D_DBG_PRINTF(...) \ - vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0) - -#ifndef TRACE -#define TRACE VKD3D_DBG_LOG(TRACE) -#endif - -#ifndef WARN -#define WARN VKD3D_DBG_LOG(WARN) -#endif - -#ifndef FIXME -#define FIXME VKD3D_DBG_LOG(FIXME) -#endif - -#define ERR VKD3D_DBG_LOG(ERR) - -#ifndef TRACE_ON -#define TRACE_ON() (vkd3d_dbg_get_level() == VKD3D_DBG_LEVEL_TRACE) -#endif - -#ifndef WARN_ON -#define WARN_ON() (vkd3d_dbg_get_level() >= VKD3D_DBG_LEVEL_WARN) -#endif - -#define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) - -#define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name - -static inline const char *debugstr_guid(const GUID *guid) -{ - if (!guid) - return "(null)"; - - return vkd3d_dbg_sprintf("{%08lx-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x}", - (unsigned long)guid->Data1, guid->Data2, guid->Data3, guid->Data4[0], - guid->Data4[1], guid->Data4[2], guid->Data4[3], guid->Data4[4], - guid->Data4[5], guid->Data4[6], guid->Data4[7]); -} - -static inline const char *debugstr_hresult(HRESULT hr) -{ - switch (hr) - { -#define TO_STR(u) case u: return #u; - TO_STR(S_OK) - TO_STR(S_FALSE) - TO_STR(E_NOTIMPL) - TO_STR(E_NOINTERFACE) - TO_STR(E_POINTER) - TO_STR(E_ABORT) - TO_STR(E_FAIL) - TO_STR(E_OUTOFMEMORY) - TO_STR(E_INVALIDARG) - TO_STR(DXGI_ERROR_NOT_FOUND) - TO_STR(DXGI_ERROR_MORE_DATA) - TO_STR(DXGI_ERROR_UNSUPPORTED) -#undef TO_STR - default: - return vkd3d_dbg_sprintf("%#x", (int)hr); - } -} - -unsigned int vkd3d_env_var_as_uint(const char *name, unsigned int default_value); - -struct vkd3d_debug_option -{ - const char *name; - uint64_t flag; -}; - -bool vkd3d_debug_list_has_member(const char *string, const char *member); -uint64_t vkd3d_parse_debug_options(const char *string, - const struct vkd3d_debug_option *options, unsigned int option_count); -void vkd3d_set_thread_name(const char *name); - -#endif /* __VKD3D_DEBUG_H */ diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h index 8286f36f6ba..38b3c82d515 100644 --- a/libs/vkd3d/include/vkd3d.h +++ b/libs/vkd3d/include/vkd3d.h @@ -102,6 +102,7 @@ enum vkd3d_api_version VKD3D_API_VERSION_1_15, VKD3D_API_VERSION_1_16, VKD3D_API_VERSION_1_17, + VKD3D_API_VERSION_1_18,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_API_VERSION), }; diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index d82869e79ea..352c222f27d 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -60,6 +60,7 @@ enum vkd3d_shader_api_version VKD3D_SHADER_API_VERSION_1_15, VKD3D_SHADER_API_VERSION_1_16, VKD3D_SHADER_API_VERSION_1_17, + VKD3D_SHADER_API_VERSION_1_18,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_API_VERSION), }; @@ -120,6 +121,16 @@ enum vkd3d_shader_structure_type * \since 1.15 */ VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO, + /** + * The structure is a vkd3d_shader_scan_thread_group_size_info structure. + * \since 1.18 + */ + VKD3D_SHADER_STRUCTURE_TYPE_SCAN_THREAD_GROUP_SIZE_INFO, + /** + * The structure is a vkd3d_shader_d3dbc_source_info structure. + * \since 1.18 + */ + VKD3D_SHADER_STRUCTURE_TYPE_D3DBC_SOURCE_INFO,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), }; @@ -417,10 +428,17 @@ struct vkd3d_shader_code { /** * Pointer to the code. Note that textual formats are not null-terminated. - * Therefore \a size should not include a null terminator, when this - * structure is passed as input to a vkd3d-shader function, and the - * allocated string will not include a null terminator when this structure - * is used as output. + * Therefore \a size should not include a null terminator when this + * structure is passed as input to a vkd3d-shader function, and \a size + * will not include a null terminator when this structure is used as + * output. + * + * For convenience, vkd3d_shader_preprocess() and vkd3d_shader_compile() + * will append a null terminator past the end of their output when + * outputting textual formats like VKD3D_SHADER_TARGET_D3D_ASM. This makes + * it safe to call functions like strlen() on \a code for such output, + * although doing so will obviously not account for any embedded null + * characters that may be present. */ const void *code; /** Size of \a code, in bytes. */ @@ -944,6 +962,83 @@ enum vkd3d_shader_parameter_name * \since 1.15 */ VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE, + /** + * Bump-mapping matrix. This parameter is used in the evaluation of the + * Shader Model 1.x instructions BEM, TEXBEM, and TEXBEML. + * + * This parameter specifies a 2x2 matrix, packed into a vector in the order + * [00, 01, 10, 11], where "01" specifies the component at column 0 and row + * 1. These coordinates correspond to the Direct3D notation. + * + * To use this parameter to implement Direct3D bump mapping, pass the values + * of the texture stage states D3DTSS_BUMPENVMAT00, D3DTSS_BUMPENVMAT01, + * D3DTSS_BUMPENVMAT10, and D3DTSS_BUMPENVMAT11, in that order. + * + * These enum values are contiguous and arithmetic may safely be performed + * on them. That is, VKD3D_SHADER_PARAMETER_NAME_BUMP_MATRIX_[n] is + * VKD3D_SHADER_PARAMETER_NAME_BUMP_MATRIX_0 plus n. + * + * The data type for each parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4. + * + * The default value for each parameter is the zero matrix [0, 0; 0, 0]. + * + * \since 1.18 + */ + VKD3D_SHADER_PARAMETER_NAME_BUMP_MATRIX_0, + VKD3D_SHADER_PARAMETER_NAME_BUMP_MATRIX_1, + VKD3D_SHADER_PARAMETER_NAME_BUMP_MATRIX_2, + VKD3D_SHADER_PARAMETER_NAME_BUMP_MATRIX_3, + VKD3D_SHADER_PARAMETER_NAME_BUMP_MATRIX_4, + VKD3D_SHADER_PARAMETER_NAME_BUMP_MATRIX_5, + /** + * Bump-mapping luminance scale factor. This parameter is used in the + * evaluation of the Shader Model 1.x instruction TEXBEML. + * + * To use this parameter to implement Direct3D bump mapping, pass the value + * of the texture stage state D3DTSS_BUMPENVLSCALE. + * + * These enum values are contiguous and arithmetic may safely be performed + * on them. That is, VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_SCALE_[n] is + * VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_SCALE_0 plus n. + * + * The data type for each parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. + * + * The default value for each parameter is 0.0. + * + * \since 1.18 + */ + VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_SCALE_0, + VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_SCALE_1, + VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_SCALE_2, + VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_SCALE_3, + VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_SCALE_4, + VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_SCALE_5, + /** + * Bump-mapping luminance offset. This parameter is used in the + * evaluation of the Shader Model 1.x instruction TEXBEML. + * + * To use this parameter to implement Direct3D bump mapping, pass the value + * of the texture stage state D3DTSS_BUMPENVLOFFSET. + * + * These enum values are contiguous and arithmetic may safely be performed + * on them. That is, VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_OFFSET_[n] is + * VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_OFFSET_0 plus n. + * + * The data type for each parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. + * + * The default value for each parameter is 0.0. + * + * \since 1.18 + */ + VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_OFFSET_0, + VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_OFFSET_1, + VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_OFFSET_2, + VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_OFFSET_3, + VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_OFFSET_4, + VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_OFFSET_5,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), }; @@ -2282,6 +2377,79 @@ struct vkd3d_shader_scan_hull_shader_tessellation_info enum vkd3d_shader_tessellator_partitioning partitioning; };
+/** + * A chained structure describing the thread group size in a compute shader. + * + * This structure extends vkd3d_shader_compile_info. + * + * \since 1.18 + */ +struct vkd3d_shader_scan_thread_group_size_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_THREAD_GROUP_SIZE_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** The thread group size in the x/y/z direction. */ + unsigned int x, y, z; +}; + +/** + * A chained structure containing legacy Direct3D bytecode compilation parameters. + * This structure specifies some information about the source environment that + * is not specified in the source shader format, but may be necessary for the + * target format. + * + * This structure is optional. + * + * This structure extends vkd3d_shader_compile_info. + * + * This structure contains only input parameters. + * + * \since 1.18 + */ +struct vkd3d_shader_d3dbc_source_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_D3DBC_SOURCE_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** + * The dimension of each texture bound to the shader. + * + * If this structure is not specified, the dimension for all textures will + * be VKD3D_SHADER_RESOURCE_TEXTURE_2D. + * + * The dimension of textures in this array not used by the shader will be + * ignored. + * + * This field is ignored for shader models 2 and higher. + */ + enum vkd3d_shader_resource_type texture_dimensions[6]; + + /** + * A mask indicating which samplers should be shadow (i.e. comparison-mode) + * samplers. When legacy Direct3D shaders are used with the Direct3D 8 and 9 + * APIs, this is implied by the format of the sampled resource; e.g. a + * D3DFMT_D24S8 texture implies shadow sampling, while a D3DFMT_A8R8G8B8 + * or D3DFMT_INTZ texture does not. + * This information is necessary when converting to other formats + * (e.g. SPIR-V, GLSL) which specify this in the shader. + * + * For example, if bit 1 is set (so the value is 0x2), this indicates that + * the sampler at bind point 1 (and no others) should be a shadow sampler. + * + * Bits in this mask corresponding to textures not used by the shader will + * be ignored. + * + * If this structure is not specified, no samplers will be considered to + * be shadow samplers. + */ + uint32_t shadow_samplers; +}; + /** * Data type of a shader varying, returned as part of struct * vkd3d_shader_signature_element. @@ -2775,6 +2943,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported * * Depending on the source and target types, this function may support the * following chained structures: + * - vkd3d_shader_d3dbc_source_info * - vkd3d_shader_descriptor_offset_info * - vkd3d_shader_hlsl_source_info * - vkd3d_shader_interface_info @@ -2784,6 +2953,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported * - vkd3d_shader_scan_descriptor_info * - vkd3d_shader_scan_hull_shader_tessellation_info * - vkd3d_shader_scan_signature_info + * - vkd3d_shader_scan_thread_group_size_info * - vkd3d_shader_spirv_domain_shader_target_info * - vkd3d_shader_spirv_target_info * - vkd3d_shader_transform_feedback_info @@ -2972,10 +3142,21 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver * \param compile_info A chained structure containing scan parameters. * \n * The scanner supports the following chained structures: + * - vkd3d_shader_d3dbc_source_info + * - vkd3d_shader_descriptor_offset_info + * - vkd3d_shader_hlsl_source_info + * - vkd3d_shader_interface_info + * - vkd3d_shader_parameter_info + * - vkd3d_shader_preprocess_info * - vkd3d_shader_scan_combined_resource_sampler_info * - vkd3d_shader_scan_descriptor_info * - vkd3d_shader_scan_hull_shader_tessellation_info * - vkd3d_shader_scan_signature_info + * - vkd3d_shader_scan_thread_group_size_info + * - vkd3d_shader_spirv_domain_shader_target_info + * - vkd3d_shader_spirv_target_info + * - vkd3d_shader_transform_feedback_info + * - vkd3d_shader_varying_map_info * \n * Although the \a compile_info parameter is read-only, chained structures * passed to this function need not be, and may serve as output parameters, diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c index 32862024b90..b5d74ca3dc7 100644 --- a/libs/vkd3d/libs/vkd3d-common/debug.c +++ b/libs/vkd3d/libs/vkd3d-common/debug.c @@ -40,8 +40,6 @@ #define VKD3D_DEBUG_BUFFER_COUNT 64 #define VKD3D_DEBUG_BUFFER_SIZE 512
-extern const char *const vkd3d_dbg_env_name; - static const char *const debug_level_names[] = { [VKD3D_DBG_LEVEL_NONE ] = "none", @@ -52,7 +50,7 @@ static const char *const debug_level_names[] = [VKD3D_DBG_LEVEL_TRACE] = "trace", };
-enum vkd3d_dbg_level vkd3d_dbg_get_level(void) +enum vkd3d_dbg_level vkd3d_dbg_get_level(const char *vkd3d_dbg_env_name) { static unsigned int level = ~0u; const char *vkd3d_debug; @@ -108,11 +106,12 @@ static uint64_t get_pthread_threadid(void) } #endif
-void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const char *fmt, ...) +void vkd3d_dbg_printf(const char *vkd3d_dbg_env_name, + enum vkd3d_dbg_level level, const char *function, const char *fmt, ...) { va_list args;
- if (vkd3d_dbg_get_level() < level) + if (vkd3d_dbg_get_level(vkd3d_dbg_env_name) < level) return;
#ifdef _WIN32 diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 6425a8f62d2..b316f6c8830 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -145,11 +145,6 @@ static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, en
static void shader_dump_atomic_op_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t atomic_flags) { - if (atomic_flags & VKD3DARF_SEQ_CST) - { - vkd3d_string_buffer_printf(&compiler->buffer, "_seqCst"); - atomic_flags &= ~VKD3DARF_SEQ_CST; - } if (atomic_flags & VKD3DARF_VOLATILE) { vkd3d_string_buffer_printf(&compiler->buffer, "_volatile"); @@ -390,13 +385,30 @@ static void shader_print_resource_type(struct vkd3d_d3d_asm_compiler *compiler,
static void shader_print_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum vsir_data_type type) { - static const char *const data_type_names[] = + const char *name; + + if ((name = vsir_data_type_get_name(type, NULL))) + vkd3d_string_buffer_printf(&compiler->buffer, "%s", name); + else + vkd3d_string_buffer_printf(&compiler->buffer, "%s<unhandled data type %#x>%s", + compiler->colours.error, type, compiler->colours.reset); +} + +static void shader_dump_resource_data_type(struct vkd3d_d3d_asm_compiler *compiler, const enum vsir_data_type *type) +{ + int i; + + /* We want the D3D names here, not the vsir ones. */ + static const char * const names[] = { [VSIR_DATA_BOOL ] = "bool", [VSIR_DATA_F16 ] = "half", [VSIR_DATA_F32 ] = "float", [VSIR_DATA_F64 ] = "double", + [VSIR_DATA_I8 ] = "int8", + [VSIR_DATA_I16 ] = "int16", [VSIR_DATA_I32 ] = "int", + [VSIR_DATA_I64 ] = "int64", [VSIR_DATA_U8 ] = "uint8", [VSIR_DATA_U16 ] = "uint16", [VSIR_DATA_U32 ] = "uint", @@ -409,23 +421,18 @@ static void shader_print_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum [VSIR_DATA_UNUSED ] = "<unused>", };
- if (type < ARRAY_SIZE(data_type_names)) - vkd3d_string_buffer_printf(&compiler->buffer, "%s", data_type_names[type]); - else - vkd3d_string_buffer_printf(&compiler->buffer, "%s<unhandled data type %#x>%s", - compiler->colours.error, type, compiler->colours.reset); -} - -static void shader_dump_resource_data_type(struct vkd3d_d3d_asm_compiler *compiler, const enum vsir_data_type *type) -{ - int i; - vkd3d_string_buffer_printf(&compiler->buffer, "(");
for (i = 0; i < 4; i++) { + size_t t = type[i]; + vkd3d_string_buffer_printf(&compiler->buffer, "%s", i == 0 ? "" : ","); - shader_print_data_type(compiler, type[i]); + if (t < ARRAY_SIZE(names) && names[t]) + vkd3d_string_buffer_printf(&compiler->buffer, "%s", names[t]); + else + vkd3d_string_buffer_printf(&compiler->buffer, "%s<unhandled data type %#zx>%s", + compiler->colours.error, t, compiler->colours.reset); }
vkd3d_string_buffer_printf(&compiler->buffer, ")"); @@ -610,6 +617,18 @@ static void shader_print_uint_literal(struct vkd3d_d3d_asm_compiler *compiler, prefix, compiler->colours.literal, i, compiler->colours.reset, suffix); }
+static void shader_print_int64_literal(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, int64_t i, const char *suffix) +{ + /* Note that we need to handle INT64_MIN here as well. */ + if (i < 0) + vkd3d_string_buffer_printf(&compiler->buffer, "%s-%s%"PRIu64"%s%s", + prefix, compiler->colours.literal, -(uint64_t)i, compiler->colours.reset, suffix); + else + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%"PRId64"%s%s", + prefix, compiler->colours.literal, i, compiler->colours.reset, suffix); +} + static void shader_print_uint64_literal(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, uint64_t i, const char *suffix) { @@ -810,6 +829,12 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const if (reg->dimension == VSIR_DIMENSION_VEC4) shader_print_double_literal(compiler, ", ", reg->u.immconst_f64[1], ""); } + else if (reg->data_type == VSIR_DATA_I64) + { + shader_print_int64_literal(compiler, "", reg->u.immconst_u64[0], ""); + if (reg->dimension == VSIR_DIMENSION_VEC4) + shader_print_int64_literal(compiler, "", reg->u.immconst_u64[1], ""); + } else if (reg->data_type == VSIR_DATA_U64) { shader_print_uint64_literal(compiler, "", reg->u.immconst_u64[0], ""); @@ -851,7 +876,7 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const && reg->type != VKD3DSPR_NULL && reg->type != VKD3DSPR_DEPTHOUT) { - if (offset != ~0u) + if (reg->idx_count) { bool is_sm_5_1 = vkd3d_shader_ver_ge(&compiler->shader_version, 5, 1);
@@ -879,10 +904,10 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const /* For descriptors in sm < 5.1 we move the reg->idx values up one slot * to normalise with 5.1. * Here we should ignore it if it's a descriptor in sm < 5.1. */ - if (reg->idx[1].offset != ~0u && (!is_descriptor || is_sm_5_1)) + if (reg->idx_count > 1 && (!is_descriptor || is_sm_5_1)) shader_print_subscript(compiler, reg->idx[1].offset, reg->idx[1].rel_addr);
- if (reg->idx[2].offset != ~0u) + if (reg->idx_count > 2) shader_print_subscript(compiler, reg->idx[2].offset, reg->idx[2].rel_addr); } } @@ -974,6 +999,22 @@ static void shader_print_reg_type(struct vkd3d_d3d_asm_compiler *compiler, vkd3d_string_buffer_printf(buffer, ">%s", suffix); }
+static void shader_print_indexable_temp_data_type(struct vkd3d_d3d_asm_compiler *compiler, + const struct vkd3d_shader_indexable_temp *t) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + + if (!(compiler->flags & VSIR_ASM_FLAG_DUMP_TYPES)) + return; + + if (t->component_count > 1) + vkd3d_string_buffer_printf(buffer, " <v%u:", t->component_count); + else + vkd3d_string_buffer_printf(buffer, " <s:"); + shader_print_data_type(compiler, t->data_type); + vkd3d_string_buffer_printf(buffer, ">"); +} + static void shader_print_write_mask(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, uint32_t mask, const char *suffix) { @@ -1528,6 +1569,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, vkd3d_string_buffer_printf(buffer, " %sx%u%s", compiler->colours.reg, ins->declaration.indexable_temp.register_idx, compiler->colours.reset); shader_print_subscript(compiler, ins->declaration.indexable_temp.register_size, NULL); + shader_print_indexable_temp_data_type(compiler, &ins->declaration.indexable_temp); shader_print_uint_literal(compiler, ", ", ins->declaration.indexable_temp.component_count, ""); if (ins->declaration.indexable_temp.alignment) shader_print_uint_literal(compiler, ", align ", ins->declaration.indexable_temp.alignment, ""); diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index 751e5578276..87a7d48acca 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -246,6 +246,11 @@ struct vkd3d_shader_sm1_parser bool abort;
struct vkd3d_shader_parser p; + struct vsir_program *program; + + const struct vkd3d_shader_d3dbc_source_info *d3dbc_source_info; + + uint16_t texture_descriptors;
struct { @@ -468,7 +473,7 @@ static bool has_relative_address(uint32_t param) static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info( const struct vkd3d_shader_sm1_parser *sm1, enum vkd3d_sm1_opcode opcode) { - const struct vkd3d_shader_version *version = &sm1->p.program->shader_version; + const struct vkd3d_shader_version *version = &sm1->program->shader_version; const struct vkd3d_sm1_opcode_info *info; unsigned int i = 0;
@@ -542,9 +547,9 @@ static enum vkd3d_shader_register_type parse_register_type( }
if (d3dbc_type == VKD3D_SM1_REG_ADDR) - return sm1->p.program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL ? VKD3DSPR_TEXTURE : VKD3DSPR_ADDR; + return sm1->program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL ? VKD3DSPR_TEXTURE : VKD3DSPR_ADDR; if (d3dbc_type == VKD3D_SM1_REG_TEXCRDOUT) - return vkd3d_shader_ver_ge(&sm1->p.program->shader_version, 3, 0) ? VKD3DSPR_OUTPUT : VKD3DSPR_TEXCRDOUT; + return vkd3d_shader_ver_ge(&sm1->program->shader_version, 3, 0) ? VKD3DSPR_OUTPUT : VKD3DSPR_TEXCRDOUT;
for (unsigned int i = 0; i < ARRAY_SIZE(register_types); ++i) { @@ -658,9 +663,9 @@ static unsigned int make_mask_contiguous(unsigned int mask)
static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, - unsigned int register_index, bool is_dcl, unsigned int mask) + unsigned int register_index, bool is_dcl, unsigned int mask, uint32_t dst_modifiers) { - struct vsir_program *program = sm1->p.program; + struct vsir_program *program = sm1->program; struct shader_signature *signature; struct signature_element *element;
@@ -694,7 +699,8 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp element->mask = make_mask_contiguous(mask); element->used_mask = is_dcl ? 0 : mask; if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) - element->interpolation_mode = VKD3DSIM_LINEAR; + element->interpolation_mode = (dst_modifiers & VKD3DSPDM_MSAMPCENTROID) + ? VKD3DSIM_LINEAR_CENTROID : VKD3DSIM_LINEAR;
return true; } @@ -702,7 +708,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, unsigned int register_index, unsigned int mask) { - struct vsir_program *program = sm1->p.program; + struct vsir_program *program = sm1->program; struct shader_signature *signature; struct signature_element *element;
@@ -747,16 +753,17 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, }
static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, - const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) + const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask, uint32_t dst_modifiers) { - const struct vkd3d_shader_version *version = &sm1->p.program->shader_version; + const struct vkd3d_shader_version *version = &sm1->program->shader_version; unsigned int register_index = reg->idx_count > 0 ? reg->idx[0].offset : 0;
switch (reg->type) { case VKD3DSPR_TEMP: if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 1 && !register_index) - return add_signature_element(sm1, true, "COLOR", 0, VKD3D_SHADER_SV_TARGET, 0, is_dcl, mask); + return add_signature_element(sm1, true, "COLOR", 0, VKD3D_SHADER_SV_TARGET, + 0, is_dcl, mask, dst_modifiers); return true;
case VKD3DSPR_INPUT: @@ -768,15 +775,15 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser * return true; } return add_signature_element(sm1, false, "COLOR", register_index, - VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); + VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask, dst_modifiers);
case VKD3DSPR_TEXTURE: return add_signature_element(sm1, false, "TEXCOORD", register_index, - VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); + VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask, dst_modifiers);
case VKD3DSPR_TEXCRDOUT: return add_signature_element(sm1, true, "TEXCOORD", register_index, - VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); + VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask, dst_modifiers);
case VKD3DSPR_OUTPUT: if (version->type == VKD3D_SHADER_TYPE_VERTEX) @@ -788,30 +795,30 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *
case VKD3DSPR_ATTROUT: return add_signature_element(sm1, true, "COLOR", register_index, - VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); + VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask, dst_modifiers);
case VKD3DSPR_COLOROUT: return add_signature_element(sm1, true, "COLOR", register_index, - VKD3D_SHADER_SV_TARGET, register_index, is_dcl, mask); + VKD3D_SHADER_SV_TARGET, register_index, is_dcl, mask, dst_modifiers);
case VKD3DSPR_DEPTHOUT: return add_signature_element(sm1, true, "DEPTH", 0, - VKD3D_SHADER_SV_DEPTH, register_index, is_dcl, 0x1); + VKD3D_SHADER_SV_DEPTH, register_index, is_dcl, 0x1, dst_modifiers);
case VKD3DSPR_RASTOUT: switch (register_index) { case 0: - return add_signature_element(sm1, true, "POSITION", 0, - VKD3D_SHADER_SV_POSITION, SM1_RASTOUT_REGISTER_OFFSET + register_index, is_dcl, mask); + return add_signature_element(sm1, true, "POSITION", 0, VKD3D_SHADER_SV_POSITION, + SM1_RASTOUT_REGISTER_OFFSET + register_index, is_dcl, mask, dst_modifiers);
case 1: - return add_signature_element(sm1, true, "FOG", 0, - VKD3D_SHADER_SV_NONE, SM1_RASTOUT_REGISTER_OFFSET + register_index, is_dcl, 0x1); + return add_signature_element(sm1, true, "FOG", 0, VKD3D_SHADER_SV_NONE, + SM1_RASTOUT_REGISTER_OFFSET + register_index, is_dcl, 0x1, dst_modifiers);
case 2: - return add_signature_element(sm1, true, "PSIZE", 0, - VKD3D_SHADER_SV_NONE, SM1_RASTOUT_REGISTER_OFFSET + register_index, is_dcl, 0x1); + return add_signature_element(sm1, true, "PSIZE", 0, VKD3D_SHADER_SV_NONE, + SM1_RASTOUT_REGISTER_OFFSET + register_index, is_dcl, 0x1, dst_modifiers);
default: vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, @@ -824,11 +831,11 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser * { case 0: return add_signature_element(sm1, false, "VPOS", 0, - VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); + VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask, dst_modifiers);
case 1: return add_signature_element(sm1, false, "VFACE", 0, - VKD3D_SHADER_SV_IS_FRONT_FACE, register_index, is_dcl, 0x1); + VKD3D_SHADER_SV_IS_FRONT_FACE, register_index, is_dcl, 0x1, dst_modifiers);
default: vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, @@ -844,10 +851,11 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser * static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser *sm1, const struct vkd3d_shader_semantic *semantic) { - const struct vkd3d_shader_version *version = &sm1->p.program->shader_version; + const struct vkd3d_shader_version *version = &sm1->program->shader_version; const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg; enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; unsigned int mask = semantic->resource.reg.write_mask; + uint32_t modifiers = semantic->resource.reg.modifiers; bool output;
static const char sm1_semantic_names[][13] = @@ -873,11 +881,11 @@ static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser * else if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_TEXTURE) output = false; else /* vpos and vface don't have a semantic. */ - return add_signature_element_from_register(sm1, reg, true, mask); + return add_signature_element_from_register(sm1, reg, true, mask, modifiers);
/* sm2 pixel shaders use DCL but don't provide a semantic. */ if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 2) - return add_signature_element_from_register(sm1, reg, true, mask); + return add_signature_element_from_register(sm1, reg, true, mask, modifiers);
/* With the exception of vertex POSITION output, none of these are system * values. Pixel POSITION input is not equivalent to SV_Position; the closer @@ -886,8 +894,8 @@ static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser * && output && semantic->usage == VKD3D_DECL_USAGE_POSITION) sysval = VKD3D_SHADER_SV_POSITION;
- return add_signature_element(sm1, output, sm1_semantic_names[semantic->usage], - semantic->usage_idx, sysval, reg->idx[0].offset, true, mask); + return add_signature_element(sm1, output, sm1_semantic_names[semantic->usage], semantic->usage_idx, sysval, + reg->idx[0].offset, true, mask, modifiers); }
static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, @@ -906,7 +914,7 @@ static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, const struct vkd3d_shader_register *reg, unsigned int mask, bool from_def) { - struct vsir_program *program = sm1->p.program; + struct vsir_program *program = sm1->program; uint32_t register_index = reg->idx[0].offset;
switch (reg->type) @@ -931,7 +939,28 @@ static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, break; }
- add_signature_element_from_register(sm1, reg, false, mask); + add_signature_element_from_register(sm1, reg, false, mask, 0); +} + +static void d3dbc_add_combined_sampler_descriptor(struct vkd3d_shader_sm1_parser *d3dbc, + unsigned int sampler_idx, enum vkd3d_shader_resource_type resource_type) +{ + struct vkd3d_shader_register_range range = {.first = sampler_idx, .last = sampler_idx}; + const struct vkd3d_shader_d3dbc_source_info *source_info = d3dbc->d3dbc_source_info; + struct vsir_program *program = d3dbc->program; + struct vkd3d_shader_descriptor_info1 *d; + + if (!vsir_program_add_descriptor(program, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, + sampler_idx, &range, resource_type, VSIR_DATA_F32)) + vkd3d_shader_parser_error(&d3dbc->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, + "Failed to create SRV descriptor for combined sampler %u.", sampler_idx); + + if (!(d = vsir_program_add_descriptor(program, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, + sampler_idx, &range, VKD3D_SHADER_RESOURCE_NONE, VSIR_DATA_UNUSED))) + vkd3d_shader_parser_error(&d3dbc->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, + "Failed to create sampler descriptor for combined sampler %u.", sampler_idx); + else if (source_info && source_info->shadow_samplers & (1u << sampler_idx)) + d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; }
/* Read a parameter token from the input stream, and possibly a relative @@ -955,7 +984,7 @@ static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1, * VS >= 2.0 have relative addressing (with token) * VS >= 1.0 < 2.0 have relative addressing (without token) * The version check below should work in general. */ - if (sm1->p.program->shader_version.major < 2) + if (sm1->program->shader_version.major < 2) { *addr_token = (1u << 31) | ((VKD3DSPR_ADDR << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2) @@ -984,7 +1013,7 @@ static void shader_sm1_skip_opcode(const struct vkd3d_shader_sm1_parser *sm1, co /* Version 2.0+ shaders may contain address tokens, but fortunately they * have a useful length mask - use it here. Version 1.x shaders contain no * such tokens. */ - if (sm1->p.program->shader_version.major >= 2) + if (sm1->program->shader_version.major >= 2) { length = (opcode_token & VKD3D_SM1_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; *ptr += length; @@ -1019,7 +1048,7 @@ static void shader_sm1_read_src_param(struct vkd3d_shader_sm1_parser *sm1, const shader_sm1_read_param(sm1, ptr, &token, &addr_token); if (has_relative_address(token)) { - if (!(src_rel_addr = vsir_program_get_src_params(sm1->p.program, 1))) + if (!(src_rel_addr = vsir_program_get_src_params(sm1->program, 1))) { vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); @@ -1040,7 +1069,7 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const shader_sm1_read_param(sm1, ptr, &token, &addr_token); if (has_relative_address(token)) { - if (!(dst_rel_addr = vsir_program_get_src_params(sm1->p.program, 1))) + if (!(dst_rel_addr = vsir_program_get_src_params(sm1->program, 1))) { vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); @@ -1052,9 +1081,9 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const shader_sm1_parse_dst_param(sm1, token, dst_rel_addr, dst_param);
if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_POINT_SIZE) - sm1->p.program->has_point_size = true; + sm1->program->has_point_size = true; if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_FOG) - sm1->p.program->has_fog = true; + sm1->program->has_fog = true; }
static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, @@ -1098,6 +1127,11 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, range->first = range->last = semantic->resource.reg.reg.idx[0].offset;
add_signature_element_from_semantic(sm1, semantic); + if (semantic->resource_type) + { + d3dbc_add_combined_sampler_descriptor(sm1, range->first, semantic->resource_type); + sm1->texture_descriptors |= (1u << range->first); + } }
static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, @@ -1192,6 +1226,43 @@ static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1) } }
+static void d3dbc_update_descriptors(struct vkd3d_shader_sm1_parser *d3dbc, + const struct vkd3d_shader_instruction *ins) +{ + enum vkd3d_shader_resource_type type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + const struct vkd3d_shader_d3dbc_source_info *source_info; + unsigned int sampler_idx; + + switch (ins->opcode) + { + case VSIR_OP_TEX: + case VSIR_OP_TEXBEM: + case VSIR_OP_TEXBEML: + case VSIR_OP_TEXDP3TEX: + case VSIR_OP_TEXLD: + case VSIR_OP_TEXM3x2TEX: + case VSIR_OP_TEXM3x3SPEC: + case VSIR_OP_TEXM3x3TEX: + case VSIR_OP_TEXM3x3VSPEC: + case VSIR_OP_TEXREG2AR: + case VSIR_OP_TEXREG2GB: + case VSIR_OP_TEXREG2RGB: + sampler_idx = ins->dst[0].reg.idx[0].offset; + if ((d3dbc->texture_descriptors & (1u << sampler_idx))) + break; + + if ((source_info = d3dbc->d3dbc_source_info) + && sampler_idx < ARRAY_SIZE(source_info->texture_dimensions)) + type = source_info->texture_dimensions[sampler_idx]; + d3dbc_add_combined_sampler_descriptor(d3dbc, sampler_idx, type); + d3dbc->texture_descriptors |= (1u << sampler_idx); + break; + + default: + break; + } +} + static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) { if ((ins->opcode == VSIR_OP_BREAKP || ins->opcode == VSIR_OP_IF) && ins->flags) @@ -1214,7 +1285,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str { struct vkd3d_shader_src_param *src_params, *predicate; const struct vkd3d_sm1_opcode_info *opcode_info; - struct vsir_program *program = sm1->p.program; + struct vsir_program *program = sm1->program; unsigned int vsir_dst_count, vsir_src_count; struct vkd3d_shader_dst_param *dst_param; const uint32_t **ptr = &sm1->ptr; @@ -1356,6 +1427,9 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str goto fail; }
+ if (program->shader_version.major == 1) + d3dbc_update_descriptors(sm1, ins); + shader_sm1_validate_instruction(sm1, ins); return;
@@ -1393,6 +1467,8 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st uint16_t shader_type; size_t token_count;
+ sm1->d3dbc_source_info = vkd3d_find_struct(compile_info->next, D3DBC_SOURCE_INFO); + token_count = code_size / sizeof(*sm1->start);
if (token_count < 2) @@ -1446,7 +1522,8 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, normalisation_level)) return VKD3D_ERROR_OUT_OF_MEMORY;
- vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); + vkd3d_shader_parser_init(&sm1->p, message_context, compile_info->source_name); + sm1->program = program; sm1->ptr = sm1->start;
return VKD3D_OK; @@ -1473,6 +1550,7 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c struct vkd3d_shader_message_context *message_context, struct vsir_program *program) { struct vkd3d_shader_sm1_parser sm1 = {0}; + struct vkd3d_shader_descriptor_info1 *d; struct vkd3d_shader_instruction *ins; unsigned int i; int ret; @@ -1501,11 +1579,29 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c } }
- for (i = 0; i < ARRAY_SIZE(program->flat_constant_count); ++i) - program->flat_constant_count[i] = get_external_constant_count(&sm1, i); + for (i = 0; i < 3; ++i) + { + struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; + unsigned int size = get_external_constant_count(&sm1, i);
- if (sm1.p.failed && ret >= 0) - ret = VKD3D_ERROR_INVALID_SHADER; + if (size) + { + if (!(d = vsir_program_add_descriptor(program, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, + i, &range, VKD3D_SHADER_RESOURCE_BUFFER, VSIR_DATA_U32))) + vkd3d_shader_parser_error(&sm1.p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, + "Failed to create CBV descriptor."); + else + d->buffer_size = size * 16; + } + } + + program->has_descriptor_info = true; + + if (TRACE_ON()) + vsir_program_trace(program); + + if (ret >= 0 && sm1.p.status < 0) + ret = sm1.p.status;
if (ret < 0) { @@ -1952,7 +2048,7 @@ static void d3dbc_write_vsir_dcl(struct d3dbc_compiler *d3dbc, const struct vkd3
reg_id = semantic->resource.reg.reg.idx[0].offset;
- if (semantic->resource.reg.reg.type != VKD3DSPR_SAMPLER) + if (semantic->resource.reg.reg.type != VKD3DSPR_COMBINED_SAMPLER) { vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_TYPE, "dcl instruction with register type %u.", semantic->resource.reg.reg.type); @@ -1988,6 +2084,9 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str
switch (ins->opcode) { + case VSIR_OP_NOP: + break; + case VSIR_OP_DEF: d3dbc_write_vsir_def(d3dbc, ins); break; @@ -2075,11 +2174,11 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, VKD3D_ASSERT(ret); reg.reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; reg.reg.idx[0].offset = element->register_index; - if (!vkd3d_shader_ver_ge(version, 3, 0)) + if ((version->type == VKD3D_SHADER_TYPE_PIXEL || output) && !vkd3d_shader_ver_ge(version, 3, 0)) { - if (reg.reg.idx[0].offset > SM1_RASTOUT_REGISTER_OFFSET) + if (reg.reg.idx[0].offset >= SM1_RASTOUT_REGISTER_OFFSET) reg.reg.idx[0].offset -= SM1_RASTOUT_REGISTER_OFFSET; - else if (reg.reg.idx[0].offset > SM1_COLOR_REGISTER_OFFSET) + else if (reg.reg.idx[0].offset >= SM1_COLOR_REGISTER_OFFSET) reg.reg.idx[0].offset -= SM1_COLOR_REGISTER_OFFSET; } } @@ -2095,6 +2194,8 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, put_u32(buffer, token);
reg.write_mask = element->mask; + if (element->interpolation_mode == VKD3DSIM_LINEAR_CENTROID) + reg.modifiers |= VKD3DSPDM_MSAMPCENTROID; write_sm1_dst_register(buffer, ®); }
@@ -2145,10 +2246,10 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer; int result;
- if ((result = vsir_allocate_temp_registers(program, message_context))) + if ((result = vsir_program_optimize(program, config_flags, compile_info, message_context))) return result;
- if ((result = vsir_update_dcl_temps(program, message_context))) + if ((result = vsir_allocate_temp_registers(program, message_context))) return result;
d3dbc.program = program; diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index c448e000cf9..9f25ae8334b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -45,6 +45,8 @@ static const unsigned int MAX_GS_OUTPUT_STREAMS = 4; (VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(0) \ | VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(1))
+#define DXIL_TYPE_SIGNED 0x1u + enum bitcode_block_id { BLOCKINFO_BLOCK = 0, @@ -645,13 +647,13 @@ enum sm6_value_type { VALUE_TYPE_INVALID, VALUE_TYPE_FUNCTION, - VALUE_TYPE_DATA, VALUE_TYPE_HANDLE, VALUE_TYPE_SSA, VALUE_TYPE_ICB, VALUE_TYPE_IDXTEMP, VALUE_TYPE_GROUPSHAREDMEM, VALUE_TYPE_CONSTANT, + VALUE_TYPE_CONSTANT_ARRAY, VALUE_TYPE_UNDEFINED, };
@@ -704,6 +706,12 @@ struct sm6_constant_data union vsir_immediate_constant immconst; };
+struct sm6_constant_array_data +{ + const struct vkd3d_shader_immediate_constant_buffer *icb; + const uint64_t *elements; +}; + struct sm6_value { const struct sm6_type *type; @@ -714,13 +722,13 @@ struct sm6_value union { struct sm6_function_data function; - const struct vkd3d_shader_immediate_constant_buffer *data; struct sm6_handle_data handle; struct sm6_ssa_data ssa; struct sm6_icb_data icb; struct sm6_idxtemp_data idxtemp; struct sm6_groupsharedmem_data groupsharedmem; struct sm6_constant_data constant; + struct sm6_constant_array_data constant_array; } u; };
@@ -892,6 +900,8 @@ struct sm6_parser const uint32_t *ptr, *start, *end; unsigned int bitpos;
+ struct vsir_program *program; + struct dxil_block root_block; struct dxil_block *current_block;
@@ -915,6 +925,7 @@ struct sm6_parser struct vkd3d_shader_dst_param *input_params; struct vkd3d_shader_dst_param *patch_constant_params; uint32_t io_regs_declared[VKD3D_BITMAP_SIZE(VKD3DSPR_COUNT)]; + struct vkd3d_shader_src_param *outpointid_param;
struct sm6_function *functions; size_t function_count; @@ -976,7 +987,7 @@ static uint32_t sm6_parser_read_uint32(struct sm6_parser *sm6) { if (sm6_parser_is_end(sm6)) { - sm6->p.failed = true; + sm6->p.status = VKD3D_ERROR_INVALID_SHADER; return 0; } return *sm6->ptr++; @@ -994,7 +1005,7 @@ static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length
if (sm6_parser_is_end(sm6)) { - sm6->p.failed = true; + sm6->p.status = VKD3D_ERROR_INVALID_SHADER; return 0; }
@@ -1006,7 +1017,7 @@ static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length ++sm6->ptr; if (sm6_parser_is_end(sm6) && l < length) { - sm6->p.failed = true; + sm6->p.status = VKD3D_ERROR_INVALID_SHADER; return bits; } sm6->bitpos = 0; @@ -1028,7 +1039,7 @@ static uint64_t sm6_parser_read_vbr(struct sm6_parser *sm6, unsigned int length)
if (sm6_parser_is_end(sm6)) { - sm6->p.failed = true; + sm6->p.status = VKD3D_ERROR_INVALID_SHADER; return 0; }
@@ -1039,9 +1050,10 @@ static uint64_t sm6_parser_read_vbr(struct sm6_parser *sm6, unsigned int length) bits = sm6_parser_read_bits(sm6, length); result |= (uint64_t)(bits & mask) << shift; shift += length - 1; - } while ((bits & flag) && !sm6->p.failed && shift < 64); + } while ((bits & flag) && (sm6->p.status >= 0) && shift < 64);
- sm6->p.failed |= !!(bits & flag); + if (bits & flag) + sm6->p.status = VKD3D_ERROR_INVALID_SHADER;
return result; } @@ -1053,7 +1065,7 @@ static void sm6_parser_align_32(struct sm6_parser *sm6)
if (sm6_parser_is_end(sm6)) { - sm6->p.failed = true; + sm6->p.status = VKD3D_ERROR_INVALID_SHADER; return; }
@@ -1144,8 +1156,8 @@ static enum vkd3d_result sm6_parser_read_unabbrev_record(struct sm6_parser *sm6)
for (i = 0; i < count; ++i) record->operands[i] = sm6_parser_read_vbr(sm6, 6); - if (sm6->p.failed) - ret = VKD3D_ERROR_INVALID_SHADER; + if (sm6->p.status < 0) + ret = sm6->p.status;
if (ret < 0 || (ret = dxil_block_add_record(block, record)) < 0) vkd3d_free(record); @@ -1156,25 +1168,25 @@ static enum vkd3d_result sm6_parser_read_unabbrev_record(struct sm6_parser *sm6) static bool sm6_parser_read_literal_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) { *op = context; - return !sm6->p.failed; + return sm6->p.status >= 0; }
static bool sm6_parser_read_fixed_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) { *op = sm6_parser_read_bits(sm6, context); - return !sm6->p.failed; + return sm6->p.status >= 0; }
static bool sm6_parser_read_vbr_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) { *op = sm6_parser_read_vbr(sm6, context); - return !sm6->p.failed; + return sm6->p.status >= 0; }
static bool sm6_parser_read_char6_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) { *op = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"[sm6_parser_read_bits(sm6, 6)]; - return !sm6->p.failed; + return sm6->p.status >= 0; }
static bool sm6_parser_read_blob_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) @@ -1194,7 +1206,7 @@ static enum vkd3d_result dxil_abbrev_init(struct dxil_abbrev *abbrev, unsigned i
abbrev->is_array = false;
- for (i = 0, prev_type = 0; i < count && !sm6->p.failed; ++i) + for (i = 0, prev_type = 0; i < count && (sm6->p.status >= 0); ++i) { if (sm6_parser_read_bits(sm6, 1)) { @@ -1247,7 +1259,7 @@ static enum vkd3d_result dxil_abbrev_init(struct dxil_abbrev *abbrev, unsigned i
abbrev->count = count;
- return sm6->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; + return sm6->p.status; }
static enum vkd3d_result sm6_parser_add_global_abbrev(struct sm6_parser *sm6) @@ -1459,7 +1471,7 @@ static enum vkd3d_result dxil_block_read(struct dxil_block *parent, struct sm6_p } break; } - } while (!sm6->p.failed); + } while (sm6->p.status >= 0);
return VKD3D_ERROR_INVALID_SHADER; } @@ -1510,8 +1522,8 @@ static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct block->length = sm6_parser_read_uint32(sm6); block->start = sm6->ptr - sm6->start;
- if (sm6->p.failed) - return VKD3D_ERROR_INVALID_SHADER; + if (sm6->p.status < 0) + return sm6->p.status;
if ((block->abbrev_count = sm6_parser_compute_global_abbrev_count_for_block_id(sm6, block->id))) { @@ -2274,6 +2286,11 @@ static inline bool sm6_value_is_constant(const struct sm6_value *value) return value->value_type == VALUE_TYPE_CONSTANT; }
+static bool sm6_value_is_constant_array(const struct sm6_value *value) +{ + return value->value_type == VALUE_TYPE_CONSTANT_ARRAY; +} + static bool sm6_value_is_constant_zero(const struct sm6_value *value) { if (value->value_type != VALUE_TYPE_CONSTANT) @@ -2317,11 +2334,6 @@ static bool sm6_value_vector_is_constant_or_undef(const struct sm6_value **value return true; }
-static bool sm6_value_is_data(const struct sm6_value *value) -{ - return value->value_type == VALUE_TYPE_DATA; -} - static bool sm6_value_is_ssa(const struct sm6_value *value) { return value->value_type == VALUE_TYPE_SSA; @@ -2424,9 +2436,9 @@ static unsigned int sm6_parser_alloc_ssa_id(struct sm6_parser *sm6) }
static void instruction_init_with_resource(struct vkd3d_shader_instruction *ins, - enum vkd3d_shader_opcode handler_idx, const struct sm6_value *resource, struct sm6_parser *sm6) + enum vkd3d_shader_opcode opcode, const struct sm6_value *resource, struct sm6_parser *dxil) { - vsir_instruction_init(ins, &sm6->p.location, handler_idx); + vsir_instruction_init(ins, &dxil->p.location, opcode); ins->resource_type = resource->u.handle.d->resource_type; ins->raw = resource->u.handle.d->kind == RESOURCE_KIND_RAWBUFFER; ins->structured = resource->u.handle.d->kind == RESOURCE_KIND_STRUCTUREDBUFFER; @@ -2437,7 +2449,7 @@ static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_ { struct vkd3d_shader_src_param *params;
- if (!(params = vsir_program_get_src_params(sm6->p.program, count))) + if (!(params = vsir_program_get_src_params(sm6->program, count))) { ERR("Failed to allocate src params.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, @@ -2454,7 +2466,7 @@ static struct vkd3d_shader_dst_param *instruction_dst_params_alloc(struct vkd3d_ { struct vkd3d_shader_dst_param *params;
- if (!(params = vsir_program_get_dst_params(sm6->p.program, count))) + if (!(params = vsir_program_get_dst_params(sm6->program, count))) { ERR("Failed to allocate dst params.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, @@ -2473,8 +2485,11 @@ static void register_init_with_id(struct vkd3d_shader_register *reg, reg->idx[0].offset = id; }
-static enum vsir_data_type vsir_data_type_from_dxil(const struct sm6_type *type) +static enum vsir_data_type vsir_data_type_from_dxil(const struct sm6_type *type, + uint32_t type_flags, struct sm6_parser *dxil) { + bool is_signed = type_flags & DXIL_TYPE_SIGNED; + if (type->class == TYPE_CLASS_INTEGER) { switch (type->u.width) @@ -2482,15 +2497,16 @@ static enum vsir_data_type vsir_data_type_from_dxil(const struct sm6_type *type) case 1: return VSIR_DATA_BOOL; case 8: - return VSIR_DATA_U8; + return is_signed ? VSIR_DATA_I8 : VSIR_DATA_U8; case 16: - return VSIR_DATA_U16; + return is_signed ? VSIR_DATA_I16 : VSIR_DATA_U16; case 32: - return VSIR_DATA_U32; + return is_signed ? VSIR_DATA_I32 : VSIR_DATA_U32; case 64: - return VSIR_DATA_U64; + return is_signed ? VSIR_DATA_I64 : VSIR_DATA_U64; default: - FIXME("Unhandled width %u.\n", type->u.width); + vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED, + "Unhandled integer width %u.", type->u.width); return VSIR_DATA_U32; } } @@ -2505,12 +2521,14 @@ static enum vsir_data_type vsir_data_type_from_dxil(const struct sm6_type *type) case 64: return VSIR_DATA_F64; default: - FIXME("Unhandled width %u.\n", type->u.width); + vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED, + "Unhandled floating-point width %u.", type->u.width); return VSIR_DATA_F32; } }
- FIXME("Unhandled type %u.\n", type->class); + vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED, + "Unhandled type %#x.", type->class); return VSIR_DATA_U32; }
@@ -2572,6 +2590,16 @@ static void register_convert_to_minimum_precision(struct vkd3d_shader_register * } break;
+ case VSIR_DATA_I16: + reg->data_type = VSIR_DATA_I32; + reg->precision = VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16; + if (reg->type == VKD3DSPR_IMMCONST) + { + for (i = 0; i < VSIR_DIMENSION_VEC4; ++i) + reg->u.immconst_u32[i] = (int16_t)reg->u.immconst_u32[i]; + } + break; + case VSIR_DATA_U16: reg->data_type = VSIR_DATA_U32; reg->precision = VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16; @@ -2590,14 +2618,14 @@ static void register_convert_to_minimum_precision(struct vkd3d_shader_register * static void register_index_address_init(struct vkd3d_shader_register_index *idx, const struct sm6_value *address, struct sm6_parser *sm6);
-static void sm6_register_from_value(struct vkd3d_shader_register *reg, const struct sm6_value *value, - struct sm6_parser *sm6) +static void vsir_register_from_dxil_value(struct vkd3d_shader_register *reg, + const struct sm6_value *value, uint32_t type_flags, struct sm6_parser *dxil) { const struct sm6_type *scalar_type; enum vsir_data_type data_type;
scalar_type = sm6_type_get_scalar_type(value->type, 0); - data_type = vsir_data_type_from_dxil(scalar_type); + data_type = vsir_data_type_from_dxil(scalar_type, type_flags, dxil);
switch (value->value_type) { @@ -2609,21 +2637,21 @@ static void sm6_register_from_value(struct vkd3d_shader_register *reg, const str case VALUE_TYPE_ICB: vsir_register_init(reg, VKD3DSPR_IMMCONSTBUFFER, data_type, 2); reg->idx[0].offset = value->u.icb.id; - register_index_address_init(®->idx[1], value->u.icb.index.index, sm6); + register_index_address_init(®->idx[1], value->u.icb.index.index, dxil); reg->idx[1].is_in_bounds = value->u.icb.index.is_in_bounds; break;
case VALUE_TYPE_IDXTEMP: vsir_register_init(reg, VKD3DSPR_IDXTEMP, data_type, 2); reg->idx[0].offset = value->u.idxtemp.id; - register_index_address_init(®->idx[1], value->u.idxtemp.index.index, sm6); + register_index_address_init(®->idx[1], value->u.idxtemp.index.index, dxil); reg->idx[1].is_in_bounds = value->u.idxtemp.index.is_in_bounds; break;
case VALUE_TYPE_GROUPSHAREDMEM: vsir_register_init(reg, VKD3DSPR_GROUPSHAREDMEM, data_type, 2); reg->idx[0].offset = value->u.groupsharedmem.id; - register_index_address_init(®->idx[1], value->u.groupsharedmem.index.index, sm6); + register_index_address_init(®->idx[1], value->u.groupsharedmem.index.index, dxil); reg->idx[1].is_in_bounds = value->u.groupsharedmem.index.is_in_bounds; break;
@@ -2640,7 +2668,7 @@ static void sm6_register_from_value(struct vkd3d_shader_register *reg, const str
case VALUE_TYPE_FUNCTION: case VALUE_TYPE_HANDLE: - case VALUE_TYPE_DATA: + case VALUE_TYPE_CONSTANT_ARRAY: vkd3d_unreachable(); }
@@ -2720,11 +2748,11 @@ static void src_param_init_vector(struct vkd3d_shader_src_param *param, unsigned param->modifiers = VKD3DSPSM_NONE; }
-static void src_param_init_from_value(struct vkd3d_shader_src_param *param, const struct sm6_value *src, - struct sm6_parser *sm6) +static void src_param_init_from_value(struct vkd3d_shader_src_param *param, + const struct sm6_value *src, uint32_t type_flags, struct sm6_parser *dxil) { src_param_init(param); - sm6_register_from_value(¶m->reg, src, sm6); + vsir_register_from_dxil_value(¶m->reg, src, type_flags, dxil); }
static void src_param_init_vector_from_reg(struct vkd3d_shader_src_param *param, @@ -2756,9 +2784,9 @@ static void register_index_address_init(struct vkd3d_shader_register_index *idx, } else { - struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(sm6->p.program, 1); + struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(sm6->program, 1); if (rel_addr) - src_param_init_from_value(rel_addr, address, sm6); + src_param_init_from_value(rel_addr, address, 0, sm6); idx->offset = 0; idx->rel_addr = rel_addr; } @@ -2783,17 +2811,19 @@ static void src_param_init_vector_from_handle(struct sm6_parser *sm6, src_param_init_vector_from_reg(param, ®); }
-static bool instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instruction *ins, struct sm6_parser *sm6) +static bool instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instruction *ins, + uint32_t type_flags, struct sm6_parser *dxil) { - struct sm6_value *dst = sm6_parser_get_current_value(sm6); + struct sm6_value *dst = sm6_parser_get_current_value(dxil); struct vkd3d_shader_dst_param *param;
- if (!(param = instruction_dst_params_alloc(ins, 1, sm6))) + if (!(param = instruction_dst_params_alloc(ins, 1, dxil))) return false;
dst_param_init(param); - sm6_parser_init_ssa_value(sm6, dst); - sm6_register_from_value(¶m->reg, dst, sm6); + sm6_parser_init_ssa_value(dxil, dst); + vsir_register_from_dxil_value(¶m->reg, dst, type_flags, dxil); + return true; }
@@ -2805,7 +2835,7 @@ static void instruction_dst_param_init_ssa_vector(struct vkd3d_shader_instructio
dst_param_init_vector(param, component_count); sm6_parser_init_ssa_value(sm6, dst); - sm6_register_from_value(¶m->reg, dst, sm6); + vsir_register_from_dxil_value(¶m->reg, dst, 0, sm6); }
static bool instruction_dst_param_init_uint_temp_vector(struct vkd3d_shader_instruction *ins, struct sm6_parser *sm6) @@ -2987,8 +3017,7 @@ static bool sm6_value_validate_is_backward_ref(const struct sm6_value *value, st { if (!value->is_back_ref) { - FIXME("Forward-referenced pointers are not supported.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, "Forward-referenced pointer declarations are not supported."); return false; } @@ -3044,7 +3073,7 @@ static bool sm6_value_validate_is_i32(const struct sm6_value *value, struct sm6_ return true; }
-static const struct sm6_value *sm6_parser_get_value_safe(struct sm6_parser *sm6, unsigned int idx) +static struct sm6_value *sm6_parser_get_value_safe(struct sm6_parser *sm6, unsigned int idx) { if (idx < sm6->value_count) return &sm6->values[idx]; @@ -3188,100 +3217,6 @@ static inline uint64_t decode_rotated_signed_value(uint64_t value) return value << 63; }
-static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, const struct sm6_type *type, - const uint64_t *operands, struct sm6_parser *sm6) -{ - struct vkd3d_shader_immediate_constant_buffer *icb; - const struct sm6_type *elem_type; - unsigned int i, size, count; - uint64_t *data64; - - elem_type = type->u.array.elem_type; - /* Multidimensional arrays are emitted in flattened form. */ - if (elem_type->class != TYPE_CLASS_INTEGER && elem_type->class != TYPE_CLASS_FLOAT) - { - FIXME("Unhandled element type %u for data array.\n", elem_type->class); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, - "The element data type for an immediate constant buffer is not scalar integer or floating point."); - return VKD3D_ERROR_INVALID_SHADER; - } - - /* Arrays of bool are not used in DXIL. dxc will emit an array of int32 instead if necessary. */ - if (!(size = elem_type->u.width / CHAR_BIT)) - { - WARN("Invalid data type width %u.\n", elem_type->u.width); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, - "An immediate constant buffer is declared with boolean elements."); - return VKD3D_ERROR_INVALID_SHADER; - } - size = max(size, sizeof(icb->data[0])); - count = operands ? type->u.array.count * size / sizeof(icb->data[0]) : 0; - - if (!(icb = vkd3d_malloc(offsetof(struct vkd3d_shader_immediate_constant_buffer, data[count])))) - { - ERR("Failed to allocate buffer, count %u.\n", count); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, - "Out of memory allocating an immediate constant buffer of count %u.", count); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - if (!shader_instruction_array_add_icb(&sm6->p.program->instructions, icb)) - { - ERR("Failed to store icb object.\n"); - vkd3d_free(icb); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, - "Out of memory storing an immediate constant buffer object."); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - - dst->value_type = VALUE_TYPE_DATA; - dst->u.data = icb; - - icb->register_idx = sm6->icb_count++; - icb->data_type = vsir_data_type_from_dxil(elem_type); - icb->element_count = type->u.array.count; - icb->component_count = 1; - icb->is_null = !operands; - - if (!operands) - return VKD3D_OK; - - count = type->u.array.count; - switch (icb->data_type) - { - case VSIR_DATA_F16: - for (i = 0; i < count; ++i) - icb->data[i] = half_to_float(operands[i]); - icb->data_type = VSIR_DATA_F32; - break; - - case VSIR_DATA_U16: - for (i = 0; i < count; ++i) - icb->data[i] = (int16_t)operands[i]; - icb->data_type = VSIR_DATA_U32; - break; - - case VSIR_DATA_F32: - case VSIR_DATA_U32: - for (i = 0; i < count; ++i) - icb->data[i] = operands[i]; - break; - - case VSIR_DATA_F64: - case VSIR_DATA_U64: - data64 = (uint64_t *)icb->data; - for (i = 0; i < count; ++i) - data64[i] = operands[i]; - break; - - default: - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, - "Invalid array of type %u.", icb->data_type); - return VKD3D_ERROR_INVALID_SHADER; - } - - return VKD3D_OK; -} - static struct sm6_index *sm6_get_value_index(struct sm6_parser *sm6, struct sm6_value *value) { switch (value->value_type) @@ -3468,8 +3403,8 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const case CST_CODE_NULL: if (sm6_type_is_array(type)) { - if ((ret = value_allocate_constant_array(dst, type, NULL, sm6)) < 0) - return ret; + dst->value_type = VALUE_TYPE_CONSTANT_ARRAY; + dst->u.constant_array.elements = NULL; } else { @@ -3529,8 +3464,8 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const if (!dxil_record_validate_operand_count(record, type->u.array.count, type->u.array.count, sm6)) return VKD3D_ERROR_INVALID_SHADER;
- if ((ret = value_allocate_constant_array(dst, type, record->operands, sm6)) < 0) - return ret; + dst->value_type = VALUE_TYPE_CONSTANT_ARRAY; + dst->u.constant_array.elements = record->operands;
break;
@@ -3651,26 +3586,19 @@ static bool bitcode_parse_alignment(uint64_t encoded_alignment, unsigned int *al return true; }
-static struct vkd3d_shader_instruction *sm6_parser_require_space(struct sm6_parser *sm6, size_t extra) +static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_parser *sm6, + enum vkd3d_shader_opcode op) { - struct vkd3d_shader_instruction_array *instructions = &sm6->p.program->instructions; + struct vkd3d_shader_instruction *ins;
- if (!shader_instruction_array_reserve(instructions, instructions->count + extra)) + if (!(ins = vsir_program_append(sm6->program))) { - ERR("Failed to allocate instruction.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory allocating instruction."); return NULL; } - return &instructions->elements[instructions->count]; -} + vsir_instruction_init(ins, &sm6->p.location, op);
-/* Space should be reserved before calling this. It is intended to require no checking of the returned pointer. */ -static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_parser *sm6, - enum vkd3d_shader_opcode handler_idx) -{ - struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); - VKD3D_ASSERT(ins); - vsir_instruction_init(ins, &sm6->p.location, handler_idx); - ++sm6->p.program->instructions.count; return ins; }
@@ -3679,7 +3607,8 @@ static void sm6_parser_declare_icb(struct sm6_parser *sm6, const struct sm6_type { struct vkd3d_shader_instruction *ins;
- ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_IMMEDIATE_CONSTANT_BUFFER); + if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_IMMEDIATE_CONSTANT_BUFFER))) + return; /* The icb value index will be resolved later so forward references can be handled. */ ins->declaration.icb = (void *)(intptr_t)init; dst->value_type = VALUE_TYPE_ICB; @@ -3690,12 +3619,22 @@ static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const stru unsigned int count, unsigned int alignment, bool has_function_scope, unsigned int init, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { - enum vsir_data_type data_type = vsir_data_type_from_dxil(elem_type); + enum vsir_data_type data_type = vsir_data_type_from_dxil(elem_type, 0, sm6); + + if (!(sm6->program->global_flags & VKD3DSGF_FORCE_NATIVE_LOW_PRECISION)) + { + if (data_type == VSIR_DATA_F16) + data_type = VSIR_DATA_F32; + else if (data_type == VSIR_DATA_I16) + data_type = VSIR_DATA_I32; + else if (data_type == VSIR_DATA_U16) + data_type = VSIR_DATA_U32; + }
if (ins) vsir_instruction_init(ins, &sm6->p.location, VSIR_OP_DCL_INDEXABLE_TEMP); - else - ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_INDEXABLE_TEMP); + else if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_INDEXABLE_TEMP))) + return; ins->declaration.indexable_temp.register_idx = sm6->indexable_temp_count++; ins->declaration.indexable_temp.register_size = count; ins->declaration.indexable_temp.alignment = alignment; @@ -3715,23 +3654,21 @@ static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6 struct vkd3d_shader_instruction *ins; unsigned int byte_count;
- ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_TGSM_RAW); + if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_TGSM_RAW))) + return; dst_param_init(&ins->declaration.tgsm_raw.reg); dst->value_type = VALUE_TYPE_GROUPSHAREDMEM; dst->u.groupsharedmem.id = sm6->tgsm_count++; dst->structure_stride = 0; - sm6_register_from_value(&ins->declaration.tgsm_raw.reg.reg, dst, sm6); + vsir_register_from_dxil_value(&ins->declaration.tgsm_raw.reg.reg, dst, 0, sm6); ins->declaration.tgsm_raw.alignment = alignment; byte_count = elem_type->u.width / CHAR_BIT; /* Convert minimum precision types to their 32-bit equivalent. */ if (byte_count == 2) byte_count = 4; if (byte_count != 4) - { - FIXME("Unsupported byte count %u.\n", byte_count); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, "Raw TGSM byte count %u is not supported.", byte_count); - } ins->declaration.tgsm_raw.byte_count = byte_count; /* The initialiser value index will be resolved later when forward references can be handled. */ ins->flags = init; @@ -3742,7 +3679,8 @@ static void sm6_parser_declare_tgsm_structured(struct sm6_parser *sm6, const str { struct vkd3d_shader_instruction *ins;
- ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_TGSM_STRUCTURED); + if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_TGSM_STRUCTURED))) + return; dst_param_init(&ins->declaration.tgsm_structured.reg); dst->value_type = VALUE_TYPE_GROUPSHAREDMEM; dst->u.groupsharedmem.id = sm6->tgsm_count++; @@ -3750,13 +3688,10 @@ static void sm6_parser_declare_tgsm_structured(struct sm6_parser *sm6, const str /* Convert minimum precision types to their 32-bit equivalent. */ if (dst->structure_stride == 2) dst->structure_stride = 4; - sm6_register_from_value(&ins->declaration.tgsm_structured.reg.reg, dst, sm6); + vsir_register_from_dxil_value(&ins->declaration.tgsm_structured.reg.reg, dst, 0, sm6); if (dst->structure_stride != 4) - { - FIXME("Unsupported structure stride %u.\n", dst->structure_stride); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, "Structured TGSM byte stride %u is not supported.", dst->structure_stride); - } ins->declaration.tgsm_structured.alignment = alignment; ins->declaration.tgsm_structured.byte_stride = dst->structure_stride; ins->declaration.tgsm_structured.structure_count = count; @@ -3781,8 +3716,7 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ { if (!sm6_type_is_scalar(type->u.array.elem_type)) { - FIXME("Unsupported nested type class %u.\n", type->u.array.elem_type->class); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, "Global array variables with nested type class %u are not supported.", type->u.array.elem_type->class); return false; @@ -3797,8 +3731,7 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ } else { - FIXME("Unsupported type class %u.\n", type->class); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, "Global variables of type class %u are not supported.", type->class); return false; } @@ -3903,33 +3836,141 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ } else { - FIXME("Unhandled address space %"PRIu64".\n", address_space); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, "Global variables with address space %"PRIu64" are not supported.", address_space); return false; }
++sm6->value_count; - return true; + return (sm6->p.status >= 0); }
static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_initialiser( size_t index, struct sm6_parser *sm6) { - const struct sm6_value *value; + struct sm6_value *value;
VKD3D_ASSERT(index); --index; - if (!(value = sm6_parser_get_value_safe(sm6, index)) || (!sm6_value_is_data(value) && !sm6_value_is_undef(value))) + if (!(value = sm6_parser_get_value_safe(sm6, index)) + || (!sm6_value_is_constant_array(value) && !sm6_value_is_undef(value))) { WARN("Invalid initialiser index %zu.\n", index); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Global variable initialiser value index %zu is invalid.", index); return NULL; } - else if (sm6_value_is_data(value)) + else if (sm6_value_is_constant_array(value)) { - return value->u.data; + const uint64_t *elements = value->u.constant_array.elements; + struct vkd3d_shader_immediate_constant_buffer *icb; + const struct sm6_array_info *array; + const struct sm6_type *elem_type; + unsigned int i, size, count; + uint64_t *data64; + + if (value->u.constant_array.icb) + return value->u.constant_array.icb; + + array = &value->type->u.array; + elem_type = array->elem_type; + /* Multidimensional arrays are emitted in flattened form. */ + if (elem_type->class != TYPE_CLASS_INTEGER && elem_type->class != TYPE_CLASS_FLOAT) + { + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, + "The element data type for an immediate constant buffer is not scalar integer or floating point."); + return NULL; + } + + /* Arrays of bool are not used in DXIL. dxc will emit an array of int32 instead if necessary. */ + if (!(size = elem_type->u.width / CHAR_BIT)) + { + WARN("Invalid data type width %u.\n", elem_type->u.width); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "An immediate constant buffer is declared with boolean elements."); + return NULL; + } + size = max(size, sizeof(icb->data[0])); + count = elements ? array->count * size / sizeof(icb->data[0]) : 0; + + if (!(icb = vkd3d_malloc(offsetof(struct vkd3d_shader_immediate_constant_buffer, data[count])))) + { + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory allocating an immediate constant buffer of count %u.", count); + return NULL; + } + + if (!vsir_program_add_icb(sm6->program, icb)) + { + vkd3d_free(icb); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory storing an immediate constant buffer object."); + return NULL; + } + + count = array->count; + icb->register_idx = sm6->icb_count++; + icb->data_type = vsir_data_type_from_dxil(elem_type, 0, sm6); + icb->element_count = count; + icb->component_count = 1; + icb->is_null = !elements; + value->u.constant_array.icb = icb; + + if (!elements) + return icb; + + switch (icb->data_type) + { + case VSIR_DATA_F16: + icb->data_type = VSIR_DATA_F32; + for (i = 0; i < count; ++i) + { + icb->data[i] = half_to_float(elements[i]); + } + break; + + case VSIR_DATA_I16: + icb->data_type = VSIR_DATA_I32; + for (i = 0; i < count; ++i) + { + icb->data[i] = (int16_t)elements[i]; + } + break; + + case VSIR_DATA_U16: + icb->data_type = VSIR_DATA_U32; + for (i = 0; i < count; ++i) + { + icb->data[i] = (int16_t)elements[i]; + } + break; + + case VSIR_DATA_F32: + case VSIR_DATA_I32: + case VSIR_DATA_U32: + for (i = 0; i < count; ++i) + { + icb->data[i] = elements[i]; + } + break; + + case VSIR_DATA_F64: + case VSIR_DATA_I64: + case VSIR_DATA_U64: + data64 = (uint64_t *)icb->data; + for (i = 0; i < count; ++i) + { + data64[i] = elements[i]; + } + break; + + default: + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Invalid array of type %u.", icb->data_type); + return NULL; + } + + return icb; } /* In VSIR, initialisation with undefined values of objects is implied, not explicit. */ return NULL; @@ -3943,15 +3984,16 @@ static bool resolve_forward_zero_initialiser(size_t index, struct sm6_parser *sm return false;
--index; - if (!(value = sm6_parser_get_value_safe(sm6, index)) - || (!sm6_value_is_data(value) && !sm6_value_is_constant(value) && !sm6_value_is_undef(value))) + if (!(value = sm6_parser_get_value_safe(sm6, index)) || (!sm6_value_is_constant_array(value) + && !sm6_value_is_constant(value) && !sm6_value_is_undef(value))) { WARN("Invalid initialiser index %zu.\n", index); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "TGSM initialiser value index %zu is invalid.", index); return false; } - else if ((sm6_value_is_data(value) && value->u.data->is_null) || sm6_value_is_constant_zero(value)) + else if ((sm6_value_is_constant_array(value) && !value->u.constant_array.elements) + || sm6_value_is_constant_zero(value)) { return true; } @@ -3961,17 +4003,16 @@ static bool resolve_forward_zero_initialiser(size_t index, struct sm6_parser *sm return false; }
- FIXME("Non-zero initialisers are not supported.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, "Non-zero TGSM initialisers are not supported."); return false; }
static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) { - struct vsir_program_iterator it = vsir_program_iterator(&sm6->p.program->instructions); - size_t i, count, base_value_idx = sm6->value_count; + struct vsir_program_iterator it = vsir_program_iterator(&sm6->program->instructions); const struct dxil_block *block = &sm6->root_block; + size_t i, base_value_idx = sm6->value_count; struct vkd3d_shader_instruction *ins; const struct dxil_record *record; enum vkd3d_result ret; @@ -3980,10 +4021,6 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) sm6->p.location.line = block->id; sm6->p.location.column = 0;
- for (i = 0, count = 0; i < block->record_count; ++i) - count += block->records[i]->code == MODULE_CODE_GLOBALVAR; - sm6_parser_require_space(sm6, count); - for (i = 0; i < block->record_count; ++i) { sm6->p.location.column = i; @@ -4001,7 +4038,10 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6)
case MODULE_CODE_GLOBALVAR: if (!sm6_parser_declare_global(sm6, record)) - return VKD3D_ERROR_INVALID_SHADER; + { + VKD3D_ASSERT(sm6->p.status < 0); + return sm6->p.status; + } break;
case MODULE_CODE_VERSION: @@ -4009,8 +4049,7 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) return VKD3D_ERROR_INVALID_SHADER; if ((version = record->operands[0]) != 1) { - FIXME("Unsupported format version %#"PRIx64".\n", version); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED, "Bitcode format version %#"PRIx64" is unsupported.", version); return VKD3D_ERROR_INVALID_SHADER; } @@ -4088,7 +4127,9 @@ static void src_params_init_from_operands(struct vkd3d_shader_src_param *src_par unsigned int i;
for (i = 0; i < count; ++i) - src_param_init_from_value(&src_params[i], operands[i], sm6); + { + src_param_init_from_value(&src_params[i], operands[i], 0, sm6); + } }
static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( @@ -4125,7 +4166,7 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shader_signature *s, bool is_input, enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) { - enum vkd3d_shader_type shader_type = sm6->p.program->shader_version.type; + enum vkd3d_shader_type shader_type = sm6->program->shader_version.type; enum vkd3d_shader_register_type io_reg_type; bool is_patch_constant, is_control_point; struct vkd3d_shader_dst_param *param; @@ -4175,7 +4216,10 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade if (is_control_point) { if (reg_type == VKD3DSPR_OUTPUT) - param->reg.idx[count].rel_addr = vsir_program_create_outpointid_param(sm6->p.program); + { + VKD3D_ASSERT(sm6->outpointid_param); + param->reg.idx[count].rel_addr = sm6->outpointid_param; + } param->reg.idx[count++].offset = 0; }
@@ -4190,7 +4234,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade
static int sm6_parser_init_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) { - if (!(sm6->output_params = vsir_program_get_dst_params(sm6->p.program, output_signature->element_count))) + if (!(sm6->output_params = vsir_program_get_dst_params(sm6->program, output_signature->element_count))) { vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, "Failed to allocate output parameters."); @@ -4204,7 +4248,7 @@ static int sm6_parser_init_output_signature(struct sm6_parser *sm6, const struct
static int sm6_parser_init_input_signature(struct sm6_parser *sm6, const struct shader_signature *input_signature) { - if (!(sm6->input_params = vsir_program_get_dst_params(sm6->p.program, input_signature->element_count))) + if (!(sm6->input_params = vsir_program_get_dst_params(sm6->program, input_signature->element_count))) { vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, "Failed to allocate input parameters."); @@ -4219,9 +4263,9 @@ static int sm6_parser_init_input_signature(struct sm6_parser *sm6, const struct static int sm6_parser_init_patch_constant_signature(struct sm6_parser *sm6, const struct shader_signature *patch_constant_signature) { - bool is_input = sm6->p.program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN; + bool is_input = sm6->program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN;
- if (!(sm6->patch_constant_params = vsir_program_get_dst_params(sm6->p.program, + if (!(sm6->patch_constant_params = vsir_program_get_dst_params(sm6->program, patch_constant_signature->element_count))) { vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, @@ -4310,8 +4354,7 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_rec WARN("Ignoring in_alloca flag.\n"); if (!(packed_operands & ALLOCA_FLAG_EXPLICIT_TYPE)) { - FIXME("Unhandled implicit type.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, "Implicit result type for ALLOCA instructions is not supported."); return; } @@ -4347,8 +4390,7 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_rec /* A size of 1 means one instance of type[0], i.e. one array. */ if (sm6_value_get_constant_uint(size, sm6) != 1) { - FIXME("Allocation size is not 1.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, "ALLOCA instruction allocation sizes other than 1 are not supported."); return; } @@ -4410,7 +4452,7 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_ || !sm6_value_validate_is_backward_ref(ptr, sm6)) return;
- sm6_register_from_value(®, ptr, sm6); + vsir_register_from_dxil_value(®, ptr, 0, sm6);
if (reg.type != VKD3DSPR_GROUPSHAREDMEM) { @@ -4430,8 +4472,7 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_
if ((op = map_dx_atomicrmw_op(code = record->operands[i++])) == VSIR_OP_INVALID) { - FIXME("Unhandled atomicrmw op %"PRIu64".\n", code); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, "Operation %"PRIu64" for an atomicrmw instruction is unhandled.", code); return; } @@ -4441,6 +4482,8 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_ /* It's currently not possible to specify an atomic ordering in HLSL, and it defaults to seq_cst. */ if ((code = record->operands[i++]) != ORDERING_SEQCST) FIXME("Unhandled atomic ordering %"PRIu64".\n", code); + else + WARN("Ignoring atomic ordering %"PRIu64".\n", code);
if ((code = record->operands[i]) != 1) WARN("Ignoring synchronisation scope %"PRIu64".\n", code); @@ -4458,7 +4501,7 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_
ins = state->ins; vsir_instruction_init(ins, &sm6->p.location, op); - ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST; + ins->flags = is_volatile ? VKD3DARF_VOLATILE : 0;
if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) return; @@ -4466,12 +4509,12 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_ src_param_init_vector_from_reg(&src_params[0], &coord); else src_param_make_constant_uint(&src_params[0], 0); - src_param_init_from_value(&src_params[1], src, sm6); + src_param_init_from_value(&src_params[1], src, 0, sm6);
sm6_parser_init_ssa_value(sm6, dst);
dst_params = instruction_dst_params_alloc(ins, 2, sm6); - sm6_register_from_value(&dst_params[0].reg, dst, sm6); + vsir_register_from_dxil_value(&dst_params[0].reg, dst, 0, sm6); dst_param_init(&dst_params[0]);
dst_params[1].reg = reg; @@ -4483,7 +4526,7 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_ }
static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a, - const struct sm6_type *type_b, struct sm6_parser *sm6) + const struct sm6_type *type_b, struct sm6_parser *sm6, enum vkd3d_shader_opcode *aux_opcode) { bool is_int = sm6_type_is_bool_i16_i32_i64(type_a); bool is_double = sm6_type_is_double(type_a); @@ -4506,12 +4549,17 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty "Type mismatch in binary operation arguments."); }
+ *aux_opcode = VSIR_OP_NOP; + switch (code) { case BINOP_ADD: + op = is_int ? VSIR_OP_IADD : (is_double ? VSIR_OP_DADD : VSIR_OP_ADD); + is_valid = !is_bool; + break; case BINOP_SUB: - /* NEG is applied later for subtraction. */ op = is_int ? VSIR_OP_IADD : (is_double ? VSIR_OP_DADD : VSIR_OP_ADD); + *aux_opcode = is_int ? VSIR_OP_INEG : VSIR_OP_NEG; is_valid = !is_bool; break; case BINOP_AND: @@ -4559,8 +4607,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty is_valid = is_int; break; default: - FIXME("Unhandled binary op %#"PRIx64".\n", code); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, "Binary operation %#"PRIx64" is unhandled.", code); return VSIR_OP_INVALID; } @@ -4576,10 +4623,12 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty }
static void sm6_parser_emit_binop(struct sm6_parser *sm6, const struct dxil_record *record, - struct vkd3d_shader_instruction *ins, struct sm6_value *dst) + struct sm6_block *code_block, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { + enum vkd3d_shader_opcode opcode, aux_opcode; struct vkd3d_shader_src_param *src_params; - enum vkd3d_shader_opcode handler_idx; + struct vkd3d_shader_dst_param *dst_params; + uint32_t type_flags = 0, aux_id = 0; const struct sm6_value *a, *b; uint64_t code, flags; bool silence_warning; @@ -4596,15 +4645,34 @@ static void sm6_parser_emit_binop(struct sm6_parser *sm6, const struct dxil_reco return;
code = record->operands[i++]; - if ((handler_idx = map_binary_op(code, a->type, b->type, sm6)) == VSIR_OP_INVALID) + if ((opcode = map_binary_op(code, a->type, b->type, sm6, &aux_opcode)) == VSIR_OP_INVALID) return;
- vsir_instruction_init(ins, &sm6->p.location, handler_idx); + if (aux_opcode != VSIR_OP_NOP) + { + vsir_instruction_init(ins, &sm6->p.location, aux_opcode); + + if (!(dst_params = instruction_dst_params_alloc(ins, 1, sm6)) + || !(src_params = instruction_src_params_alloc(ins, 1, sm6))) + return; + + aux_id = sm6_parser_alloc_ssa_id(sm6); + + src_param_init_from_value(&src_params[0], b, DXIL_TYPE_SIGNED, sm6); + + dst_param_init(&dst_params[0]); + register_init_with_id(&dst_params[0].reg, VKD3DSPR_SSA, src_params[0].reg.data_type, aux_id); + + ++ins; + ++code_block->instruction_count; + } + + vsir_instruction_init(ins, &sm6->p.location, opcode);
flags = (record->operand_count > i) ? record->operands[i] : 0; silence_warning = false;
- switch (handler_idx) + switch (opcode) { case VSIR_OP_ADD: case VSIR_OP_MUL: @@ -4621,11 +4689,13 @@ static void sm6_parser_emit_binop(struct sm6_parser *sm6, const struct dxil_reco case VSIR_OP_ISHL: silence_warning = !(flags & ~(OB_NO_UNSIGNED_WRAP | OB_NO_SIGNED_WRAP)); break; + case VSIR_OP_IDIV: + case VSIR_OP_IREM: case VSIR_OP_ISHR: + type_flags |= DXIL_TYPE_SIGNED; + /* fall through */ case VSIR_OP_USHR: - case VSIR_OP_IDIV: case VSIR_OP_UDIV_SIMPLE: - case VSIR_OP_IREM: case VSIR_OP_UREM: silence_warning = !(flags & ~PEB_EXACT); break; @@ -4646,21 +4716,29 @@ static void sm6_parser_emit_binop(struct sm6_parser *sm6, const struct dxil_reco
if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) return; - src_param_init_from_value(&src_params[0], a, sm6); - src_param_init_from_value(&src_params[1], b, sm6); - if (code == BINOP_SUB) - src_params[1].modifiers = VKD3DSPSM_NEG; + + src_param_init_from_value(&src_params[0], a, type_flags, sm6); + + if (aux_opcode == VSIR_OP_NOP) + { + src_param_init_from_value(&src_params[1], b, type_flags, sm6); + } + else + { + src_param_init(&src_params[1]); + register_init_with_id(&src_params[1].reg, VKD3DSPR_SSA, src_params[0].reg.data_type, aux_id); + }
dst->type = a->type;
- if (handler_idx == VSIR_OP_ISHL || handler_idx == VSIR_OP_ISHR || handler_idx == VSIR_OP_USHR) + if (opcode == VSIR_OP_ISHL || opcode == VSIR_OP_ISHR || opcode == VSIR_OP_USHR) { /* DXC emits AND instructions where necessary to mask shift counts. * Shift binops do not imply masking the shift as the TPF equivalents * do. */ ins->flags |= VKD3DSI_SHIFT_UNMASKED; } - instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, type_flags, sm6); }
static const struct sm6_block *sm6_function_get_block(const struct sm6_function *function, uint64_t index, @@ -4710,7 +4788,7 @@ static void sm6_parser_emit_br(struct sm6_parser *sm6, const struct dxil_record dxil_record_validate_operand_max_count(record, i, sm6);
code_block->terminator.type = TERMINATOR_COND_BR; - sm6_register_from_value(&code_block->terminator.conditional_reg, value, sm6); + vsir_register_from_dxil_value(&code_block->terminator.conditional_reg, value, 0, sm6); code_block->terminator.true_block = sm6_function_get_block(function, record->operands[0], sm6); code_block->terminator.false_block = sm6_function_get_block(function, record->operands[1], sm6); } @@ -4781,7 +4859,9 @@ static bool sm6_parser_emit_composite_construct(struct sm6_parser *sm6, const st unsigned int i;
for (i = 0; i < component_count; ++i) - sm6_register_from_value(&operand_regs[i], operands[i], sm6); + { + vsir_register_from_dxil_value(&operand_regs[i], operands[i], 0, sm6); + }
return sm6_parser_emit_reg_composite_construct(sm6, operand_regs, component_count, state, reg); } @@ -4797,11 +4877,11 @@ static bool sm6_parser_emit_coordinate_construct(struct sm6_parser *sm6, const s { if (!z_operand && operands[component_count]->value_type == VALUE_TYPE_UNDEFINED) break; - sm6_register_from_value(&operand_regs[component_count], operands[component_count], sm6); + vsir_register_from_dxil_value(&operand_regs[component_count], operands[component_count], 0, sm6); }
if (z_operand) - sm6_register_from_value(&operand_regs[component_count++], z_operand, sm6); + vsir_register_from_dxil_value(&operand_regs[component_count++], z_operand, 0, sm6);
return sm6_parser_emit_reg_composite_construct(sm6, operand_regs, component_count, state, reg); } @@ -4822,7 +4902,7 @@ static void sm6_parser_emit_dx_void(struct sm6_parser *sm6, enum dx_intrinsic_op { struct vkd3d_shader_instruction *ins = state->ins; vsir_instruction_init(ins, &sm6->p.location, sm6_dx_map_void_op(op)); - instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static enum vkd3d_shader_opcode map_dx_unary_op(enum dx_intrinsic_opcode op) @@ -4919,13 +4999,16 @@ static void sm6_parser_emit_dx_unary(struct sm6_parser *sm6, enum dx_intrinsic_o vsir_instruction_init(ins, &sm6->p.location, map_dx_unary_op(op)); if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) return; - src_param_init_from_value(src_param, operands[0], sm6); + src_param_init_from_value(src_param, operands[0], 0, sm6);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
-static enum vkd3d_shader_opcode map_dx_binary_op(enum dx_intrinsic_opcode op, const struct sm6_type *type) +static enum vkd3d_shader_opcode map_dx_binary_op(enum dx_intrinsic_opcode op, + const struct sm6_type *type, uint32_t *type_flags) { + *type_flags = 0; + switch (op) { case DX_FMAX: @@ -4933,8 +5016,10 @@ static enum vkd3d_shader_opcode map_dx_binary_op(enum dx_intrinsic_opcode op, co case DX_FMIN: return type->u.width == 64 ? VSIR_OP_DMIN : VSIR_OP_MIN; case DX_IMAX: + *type_flags |= DXIL_TYPE_SIGNED; return VSIR_OP_IMAX; case DX_IMIN: + *type_flags |= DXIL_TYPE_SIGNED; return VSIR_OP_IMIN; case DX_QUAD_READ_LANE_AT: return VSIR_OP_QUAD_READ_LANE_AT; @@ -4954,14 +5039,15 @@ static void sm6_parser_emit_dx_binary(struct sm6_parser *sm6, enum dx_intrinsic_ { struct vkd3d_shader_instruction *ins = state->ins; struct vkd3d_shader_src_param *src_params; + uint32_t type_flags;
- vsir_instruction_init(ins, &sm6->p.location, map_dx_binary_op(op, operands[0]->type)); + vsir_instruction_init(ins, &sm6->p.location, map_dx_binary_op(op, operands[0]->type, &type_flags)); if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) return; - src_param_init_from_value(&src_params[0], operands[0], sm6); - src_param_init_from_value(&src_params[1], operands[1], sm6); + src_param_init_from_value(&src_params[0], operands[0], type_flags, sm6); + src_param_init_from_value(&src_params[1], operands[1], type_flags, sm6);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, type_flags, sm6); }
static enum vkd3d_shader_opcode map_dx_atomic_binop(const struct sm6_value *operand, struct sm6_parser *sm6) @@ -4990,8 +5076,7 @@ static enum vkd3d_shader_opcode map_dx_atomic_binop(const struct sm6_value *oper return VSIR_OP_IMM_ATOMIC_XOR; /* DXIL currently doesn't use SUB and NAND. */ default: - FIXME("Unhandled atomic binop %"PRIu64".\n", code); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, "Operation %"PRIu64" for an atomic binop instruction is unhandled.", code); return VSIR_OP_INVALID; } @@ -5006,18 +5091,18 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr unsigned int i, coord_idx, coord_count = 1; struct vkd3d_shader_dst_param *dst_params; struct vkd3d_shader_src_param *src_params; - enum vkd3d_shader_opcode handler_idx; struct vkd3d_shader_instruction *ins; const struct sm6_value *resource; struct vkd3d_shader_register reg; + enum vkd3d_shader_opcode opcode;
resource = operands[0]; if (!sm6_value_validate_is_handle(resource, sm6)) return;
if (is_cmp_xchg) - handler_idx = VSIR_OP_IMM_ATOMIC_CMP_EXCH; - else if ((handler_idx = map_dx_atomic_binop(operands[1], sm6)) == VSIR_OP_INVALID) + opcode = VSIR_OP_IMM_ATOMIC_CMP_EXCH; + else if ((opcode = map_dx_atomic_binop(operands[1], sm6)) == VSIR_OP_INVALID) return;
coord_idx = 2 - is_cmp_xchg; @@ -5030,7 +5115,7 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr } else { - sm6_register_from_value(®, operands[coord_idx], sm6); + vsir_register_from_dxil_value(®, operands[coord_idx], 0, sm6); }
for (i = coord_idx + coord_count; i < coord_idx + 3; ++i) @@ -5039,26 +5124,26 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr { WARN("Ignoring unexpected operand.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, - "Ignoring an unexpected defined operand value for atomic instruction %u.", handler_idx); + "Ignoring an unexpected defined operand value for atomic instruction %u.", opcode); break; } }
ins = state->ins; - vsir_instruction_init(ins, &sm6->p.location, handler_idx); + vsir_instruction_init(ins, &sm6->p.location, opcode);
if (!(src_params = instruction_src_params_alloc(ins, 2 + is_cmp_xchg, sm6))) return; src_param_init_vector_from_reg(&src_params[0], ®); if (is_cmp_xchg) - src_param_init_from_value(&src_params[1], operands[4], sm6); - src_param_init_from_value(&src_params[1 + is_cmp_xchg], operands[5], sm6); + src_param_init_from_value(&src_params[1], operands[4], 0, sm6); + src_param_init_from_value(&src_params[1 + is_cmp_xchg], operands[5], 0, sm6);
sm6_parser_init_ssa_value(sm6, dst);
dst_params = instruction_dst_params_alloc(ins, 2, sm6); dst_param_init(&dst_params[0]); - sm6_register_from_value(&dst_params[0].reg, dst, sm6); + vsir_register_from_dxil_value(&dst_params[0].reg, dst, 0, sm6); dst_param_init(&dst_params[1]); sm6_register_from_handle(sm6, &resource->u.handle, &dst_params[1].reg); } @@ -5099,8 +5184,7 @@ static void sm6_parser_emit_dx_buffer_update_counter(struct sm6_parser *sm6, enu
if (!sm6_value_is_constant(operands[1])) { - FIXME("Unsupported dynamic update operand.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, "A dynamic update value for a UAV counter operation is not supported."); return; } @@ -5118,7 +5202,7 @@ static void sm6_parser_emit_dx_buffer_update_counter(struct sm6_parser *sm6, enu return; src_param_init_vector_from_handle(sm6, &src_params[0], &resource->u.handle);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_dx_calculate_lod(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5152,7 +5236,7 @@ static void sm6_parser_emit_dx_calculate_lod(struct sm6_parser *sm6, enum dx_int src_param_init_scalar(&src_params[1], !clamp); src_param_init_vector_from_handle(sm6, &src_params[2], &sampler->u.handle);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5180,7 +5264,7 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr
type = sm6_type_get_scalar_type(dst->type, 0); VKD3D_ASSERT(type); - src_param->reg.data_type = vsir_data_type_from_dxil(type); + src_param->reg.data_type = vsir_data_type_from_dxil(type, 0, sm6); if (data_type_is_64_bit(src_param->reg.data_type)) src_param->swizzle = vsir_swizzle_64_from_32(src_param->swizzle); else @@ -5189,16 +5273,17 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr instruction_dst_param_init_ssa_vector(ins, sm6_type_max_vector_size(type), sm6); }
-static void sm6_parser_dcl_register_builtin(struct sm6_parser *sm6, enum vkd3d_shader_opcode handler_idx, +static void sm6_parser_dcl_register_builtin(struct sm6_parser *dxil, enum vkd3d_shader_opcode opcode, enum vkd3d_shader_register_type reg_type, enum vsir_data_type data_type, unsigned int component_count) { struct vkd3d_shader_dst_param *dst_param; struct vkd3d_shader_instruction *ins;
- if (!bitmap_is_set(sm6->io_regs_declared, reg_type)) + if (!bitmap_is_set(dxil->io_regs_declared, reg_type)) { - bitmap_set(sm6->io_regs_declared, reg_type); - ins = sm6_parser_add_instruction(sm6, handler_idx); + bitmap_set(dxil->io_regs_declared, reg_type); + if (!(ins = sm6_parser_add_instruction(dxil, opcode))) + return; dst_param = &ins->declaration.dst; vsir_register_init(&dst_param->reg, reg_type, data_type, 0); dst_param_init_vector(dst_param, component_count); @@ -5220,7 +5305,7 @@ static void sm6_parser_emit_dx_input_register_mov(struct sm6_parser *sm6, struct src_param->reg.dimension = VSIR_DIMENSION_VEC4; src_param_init(src_param);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_dx_coverage(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5323,7 +5408,7 @@ static void sm6_parser_emit_dx_discard(struct sm6_parser *sm6, enum dx_intrinsic vsir_instruction_init(ins, &sm6->p.location, VSIR_OP_DISCARD);
if ((src_param = instruction_src_params_alloc(ins, 1, sm6))) - src_param_init_from_value(src_param, operands[0], sm6); + src_param_init_from_value(src_param, operands[0], 0, sm6); }
static void sm6_parser_emit_dx_domain_location(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5350,7 +5435,7 @@ static void sm6_parser_emit_dx_domain_location(struct sm6_parser *sm6, enum dx_i src_param->reg.dimension = VSIR_DIMENSION_VEC4; src_param_init_scalar(src_param, component_idx);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_dx_dot(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5359,21 +5444,21 @@ static void sm6_parser_emit_dx_dot(struct sm6_parser *sm6, enum dx_intrinsic_opc struct vkd3d_shader_src_param *src_params; struct vkd3d_shader_instruction *ins; struct vkd3d_shader_register regs[2]; - enum vkd3d_shader_opcode handler_idx; + enum vkd3d_shader_opcode opcode; unsigned int component_count;
switch (op) { case DX_DOT2: - handler_idx = VSIR_OP_DP2; + opcode = VSIR_OP_DP2; component_count = 2; break; case DX_DOT3: - handler_idx = VSIR_OP_DP3; + opcode = VSIR_OP_DP3; component_count = 3; break; case DX_DOT4: - handler_idx = VSIR_OP_DP4; + opcode = VSIR_OP_DP4; component_count = 4; break; default: @@ -5386,13 +5471,13 @@ static void sm6_parser_emit_dx_dot(struct sm6_parser *sm6, enum dx_intrinsic_opc return;
ins = state->ins; - vsir_instruction_init(ins, &sm6->p.location, handler_idx); + vsir_instruction_init(ins, &sm6->p.location, opcode); if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) return; src_param_init_vector_from_reg(&src_params[0], ®s[0]); src_param_init_vector_from_reg(&src_params[1], ®s[1]);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_dx_eval_attrib(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5407,7 +5492,7 @@ static void sm6_parser_emit_dx_eval_attrib(struct sm6_parser *sm6, enum dx_intri row_index = sm6_value_get_constant_uint(operands[0], sm6); column_index = sm6_value_get_constant_uint(operands[2], sm6);
- signature = &sm6->p.program->input_signature; + signature = &sm6->program->input_signature; if (row_index >= signature->element_count) { WARN("Invalid row index %u.\n", row_index); @@ -5437,9 +5522,9 @@ static void sm6_parser_emit_dx_eval_attrib(struct sm6_parser *sm6, enum dx_intri register_index_address_init(&src_params[0].reg.idx[0], operands[1], sm6);
if (op == DX_EVAL_SAMPLE_INDEX) - src_param_init_from_value(&src_params[1], operands[3], sm6); + src_param_init_from_value(&src_params[1], operands[3], 0, sm6);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5448,13 +5533,12 @@ static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_op struct vkd3d_shader_instruction *ins = state->ins; struct vkd3d_shader_src_param *src_param;
- vsir_instruction_init(ins, &sm6->p.location, VSIR_OP_MOV); + vsir_instruction_init(ins, &sm6->p.location, VSIR_OP_ABS); if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) return; - src_param_init_from_value(src_param, operands[0], sm6); - src_param->modifiers = VKD3DSPSM_ABS; + src_param_init_from_value(src_param, operands[0], 0, sm6);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_dx_compute_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5494,7 +5578,7 @@ static void sm6_parser_emit_dx_compute_builtin(struct sm6_parser *sm6, enum dx_i component_idx = sm6_value_get_constant_uint(operands[0], sm6); src_param_init_scalar(src_param, component_idx);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static enum vkd3d_shader_opcode sm6_dx_map_ma_op(enum dx_intrinsic_opcode op, const struct sm6_type *type) @@ -5524,9 +5608,11 @@ static void sm6_parser_emit_dx_ma(struct sm6_parser *sm6, enum dx_intrinsic_opco if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) return; for (i = 0; i < 3; ++i) - src_param_init_from_value(&src_params[i], operands[i], sm6); + { + src_param_init_from_value(&src_params[i], operands[i], 0, sm6); + }
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5554,7 +5640,7 @@ static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_in if (is_texture) { ins->flags = VKD3DSI_RESINFO_UINT; - src_param_init_from_value(&src_params[0], operands[1], sm6); + src_param_init_from_value(&src_params[0], operands[1], 0, sm6); component_count = VKD3D_VEC4_SIZE;
if (resource_kind_is_multisampled(resource_kind)) @@ -5627,9 +5713,11 @@ static void sm6_parser_emit_dx_tertiary(struct sm6_parser *sm6, enum dx_intrinsi if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) return; for (i = 0; i < 3; ++i) - src_param_init_from_value(&src_params[i], operands[i], sm6); + { + src_param_init_from_value(&src_params[i], operands[i], 0, sm6); + }
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5638,7 +5726,7 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin bool is_control_point = op == DX_LOAD_OUTPUT_CONTROL_POINT; bool is_patch_constant = op == DX_LOAD_PATCH_CONSTANT; struct vkd3d_shader_instruction *ins = state->ins; - struct vsir_program *program = sm6->p.program; + struct vsir_program *program = sm6->program; unsigned int count, row_index, column_index; const struct vkd3d_shader_dst_param *params; struct vkd3d_shader_src_param *src_param; @@ -5697,7 +5785,7 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin register_index_address_init(&src_param->reg.idx[count], operands[3], sm6); }
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_dx_make_double(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5717,7 +5805,7 @@ static void sm6_parser_emit_dx_make_double(struct sm6_parser *sm6, enum dx_intri src_params[0].reg = reg; src_param_init_vector(&src_params[0], 2);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_dx_output_control_point_id(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5768,9 +5856,9 @@ static void sm6_parser_emit_dx_quad_op(struct sm6_parser *sm6, enum dx_intrinsic
if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) return; - src_param_init_from_value(src_param, operands[0], sm6); + src_param_init_from_value(src_param, operands[0], 0, sm6);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_dx_raw_buffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5915,7 +6003,7 @@ static void sm6_parser_emit_dx_buffer_load(struct sm6_parser *sm6, enum dx_intri
if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) return; - src_param_init_from_value(&src_params[0], operands[1], sm6); + src_param_init_from_value(&src_params[0], operands[1], 0, sm6); if (!sm6_value_is_undef(operands[2])) { /* Constant zero would be ok, but is not worth checking for unless it shows up. */ @@ -5980,7 +6068,7 @@ static void sm6_parser_emit_dx_buffer_store(struct sm6_parser *sm6, enum dx_intr
if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) return; - src_param_init_from_value(&src_params[0], operands[1], sm6); + src_param_init_from_value(&src_params[0], operands[1], 0, sm6); if (!sm6_value_is_undef(operands[2])) { /* Constant zero would have no effect, but is not worth checking for unless it shows up. */ @@ -6010,7 +6098,7 @@ static void sm6_parser_emit_dx_get_sample_count(struct sm6_parser *sm6, enum dx_ src_param->reg.dimension = VSIR_DIMENSION_VEC4; src_param_init(src_param);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); ins->dst->reg.data_type = VSIR_DATA_U32; }
@@ -6035,14 +6123,14 @@ static void sm6_parser_emit_dx_get_sample_pos(struct sm6_parser *sm6, enum dx_in if (op == DX_TEX2DMS_GET_SAMPLE_POS) { src_param_init_vector_from_handle(sm6, &src_params[0], &resource->u.handle); - src_param_init_from_value(&src_params[1], operands[1], sm6); + src_param_init_from_value(&src_params[1], operands[1], 0, sm6); } else { src_param_init_vector(&src_params[0], 2); vsir_register_init(&src_params[0].reg, VKD3DSPR_RASTERIZER, VSIR_DATA_F32, 0); src_params[0].reg.dimension = VSIR_DIMENSION_VEC4; - src_param_init_from_value(&src_params[1], operands[0], sm6); + src_param_init_from_value(&src_params[1], operands[0], 0, sm6); }
instruction_dst_param_init_ssa_vector(ins, 2, sm6); @@ -6054,11 +6142,14 @@ static unsigned int sm6_value_get_texel_offset(const struct sm6_value *value, st }
static void instruction_set_texel_offset(struct vkd3d_shader_instruction *ins, - const struct sm6_value **operands, struct sm6_parser *sm6) + const struct sm6_value **operands, unsigned int count, struct sm6_parser *sm6) { ins->texel_offset.u = sm6_value_get_texel_offset(operands[0], sm6); ins->texel_offset.v = sm6_value_get_texel_offset(operands[1], sm6); - ins->texel_offset.w = sm6_value_get_texel_offset(operands[2], sm6); + if (count == 3) + ins->texel_offset.w = sm6_value_get_texel_offset(operands[2], sm6); + else + ins->texel_offset.w = 0; }
static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -6104,7 +6195,7 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_ instruction_init_with_resource(ins, (op == DX_SAMPLE_B) ? VSIR_OP_SAMPLE_B : VSIR_OP_SAMPLE_LOD, resource, sm6); src_params = instruction_src_params_alloc(ins, 4, sm6); - src_param_init_from_value(&src_params[3], operands[9], sm6); + src_param_init_from_value(&src_params[3], operands[9], 0, sm6); break; case DX_SAMPLE_C: clamp_idx = 10; @@ -6113,7 +6204,7 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_ instruction_init_with_resource(ins, (op == DX_SAMPLE_C_LZ) ? VSIR_OP_SAMPLE_C_LZ : VSIR_OP_SAMPLE_C, resource, sm6); src_params = instruction_src_params_alloc(ins, 4, sm6); - src_param_init_from_value(&src_params[3], operands[9], sm6); + src_param_init_from_value(&src_params[3], operands[9], 0, sm6); component_count = 1; break; case DX_SAMPLE_GRAD: @@ -6140,7 +6231,7 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_ src_param_init_vector_from_reg(&src_params[0], &coord); src_param_init_vector_from_handle(sm6, &src_params[1], &resource->u.handle); src_param_init_vector_from_handle(sm6, &src_params[2], &sampler->u.handle); - instruction_set_texel_offset(ins, &operands[6], sm6); + instruction_set_texel_offset(ins, &operands[6], 3, sm6);
instruction_dst_param_init_ssa_vector(ins, component_count, sm6); } @@ -6148,7 +6239,7 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_ static void sm6_parser_emit_dx_sample_index(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { - const struct shader_signature *signature = &sm6->p.program->input_signature; + const struct shader_signature *signature = &sm6->program->input_signature; struct vkd3d_shader_instruction *ins = state->ins; struct vkd3d_shader_src_param *src_param; unsigned int element_idx; @@ -6170,7 +6261,7 @@ static void sm6_parser_emit_dx_sample_index(struct sm6_parser *sm6, enum dx_intr src_param->reg = sm6->input_params[element_idx].reg; src_param_init(src_param);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_dx_saturate(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -6179,13 +6270,12 @@ static void sm6_parser_emit_dx_saturate(struct sm6_parser *sm6, enum dx_intrinsi struct vkd3d_shader_instruction *ins = state->ins; struct vkd3d_shader_src_param *src_param;
- vsir_instruction_init(ins, &sm6->p.location, VSIR_OP_MOV); + vsir_instruction_init(ins, &sm6->p.location, VSIR_OP_SATURATE); if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) return; - src_param_init_from_value(src_param, operands[0], sm6); + src_param_init_from_value(src_param, operands[0], 0, sm6);
- if (instruction_dst_param_init_ssa_scalar(ins, sm6)) - ins->dst->modifiers = VKD3DSPDM_SATURATE; + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_dx_split_double(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -6197,7 +6287,7 @@ static void sm6_parser_emit_dx_split_double(struct sm6_parser *sm6, enum dx_intr vsir_instruction_init(ins, &sm6->p.location, VSIR_OP_MOV); if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) return; - src_param_init_from_value(src_param, operands[0], sm6); + src_param_init_from_value(src_param, operands[0], 0, sm6);
instruction_dst_param_init_ssa_vector(ins, 2, sm6); } @@ -6207,7 +6297,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr { bool is_patch_constant = op == DX_STORE_PATCH_CONSTANT; struct vkd3d_shader_instruction *ins = state->ins; - struct vsir_program *program = sm6->p.program; + struct vsir_program *program = sm6->program; struct vkd3d_shader_src_param *src_param; struct vkd3d_shader_dst_param *dst_param; const struct shader_signature *signature; @@ -6227,8 +6317,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr return; } e = &signature->elements[row_index]; - if (!e->sysval_semantic) - column_index += vsir_write_mask_get_component_idx(e->mask); + column_index += vsir_write_mask_get_component_idx(e->mask);
if (column_index >= VKD3D_VEC4_SIZE) { @@ -6263,7 +6352,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr }
if ((src_param = instruction_src_params_alloc(ins, 1, sm6))) - src_param_init_from_value(src_param, value, sm6); + src_param_init_from_value(src_param, value, 0, sm6); }
static void sm6_parser_emit_dx_texture_gather(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -6309,14 +6398,14 @@ static void sm6_parser_emit_dx_texture_gather(struct sm6_parser *sm6, enum dx_in instruction_init_with_resource(ins, extended_offset ? VSIR_OP_GATHER4_PO_C : VSIR_OP_GATHER4_C, resource, sm6); if (!(src_params = instruction_src_params_alloc(ins, 4 + extended_offset, sm6))) return; - src_param_init_from_value(&src_params[3 + extended_offset], operands[9], sm6); + src_param_init_from_value(&src_params[3 + extended_offset], operands[9], 0, sm6); }
src_param_init_vector_from_reg(&src_params[0], &coord); if (extended_offset) src_param_init_vector_from_reg(&src_params[1], &offset); else - instruction_set_texel_offset(ins, &operands[6], sm6); + instruction_set_texel_offset(ins, &operands[6], 2, sm6); src_param_init_vector_from_handle(sm6, &src_params[1 + extended_offset], &resource->u.handle); src_param_init_vector_from_handle(sm6, &src_params[2 + extended_offset], &sampler->u.handle); /* Swizzle stored in the sampler parameter is the scalar component index to be gathered. */ @@ -6362,7 +6451,7 @@ static void sm6_parser_emit_dx_texture_load(struct sm6_parser *sm6, enum dx_intr ins = state->ins; instruction_init_with_resource(ins, is_uav ? VSIR_OP_LD_UAV_TYPED : is_multisample ? VSIR_OP_LD2DMS : VSIR_OP_LD, resource, sm6); - instruction_set_texel_offset(ins, &operands[5], sm6); + instruction_set_texel_offset(ins, &operands[5], 3, sm6);
for (i = 0; i < VKD3D_VEC4_SIZE; ++i) ins->resource_data_type[i] = resource->u.handle.d->resource_data_type; @@ -6372,7 +6461,7 @@ static void sm6_parser_emit_dx_texture_load(struct sm6_parser *sm6, enum dx_intr src_param_init_vector_from_reg(&src_params[0], &coord); src_param_init_vector_from_handle(sm6, &src_params[1], &resource->u.handle); if (is_multisample) - src_param_init_from_value(&src_params[2], mip_level_or_sample_count, sm6); + src_param_init_from_value(&src_params[2], mip_level_or_sample_count, 0, sm6);
instruction_dst_param_init_ssa_vector(ins, VKD3D_VEC4_SIZE, sm6); } @@ -6436,7 +6525,7 @@ static void sm6_parser_emit_dx_wave_active_ballot(struct sm6_parser *sm6, enum d vsir_instruction_init(ins, &sm6->p.location, VSIR_OP_WAVE_ACTIVE_BALLOT); if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) return; - src_param_init_from_value(src_param, operands[0], sm6); + src_param_init_from_value(src_param, operands[0], 0, sm6);
instruction_dst_param_init_ssa_vector(ins, VKD3D_VEC4_SIZE, sm6); } @@ -6476,9 +6565,9 @@ static void sm6_parser_emit_dx_wave_active_bit(struct sm6_parser *sm6, enum dx_i
if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) return; - src_param_init_from_value(src_param, operands[0], sm6); + src_param_init_from_value(src_param, operands[0], 0, sm6);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static enum vkd3d_shader_opcode sm6_dx_map_wave_op(enum dxil_wave_op_kind op, bool is_signed, bool is_float, @@ -6527,9 +6616,9 @@ static void sm6_parser_emit_dx_wave_op(struct sm6_parser *sm6, enum dx_intrinsic
if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) return; - src_param_init_from_value(src_param, operands[0], sm6); + src_param_init_from_value(src_param, operands[0], 0, sm6);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_dx_wave_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -6918,14 +7007,15 @@ static void sm6_parser_emit_call(struct sm6_parser *sm6, const struct dxil_recor fn_value->u.function.name, &operands[1], operand_count - 1, state, dst); }
-static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_type *from, - const struct sm6_type *to, struct sm6_parser *sm6) +static enum vkd3d_shader_opcode dxil_map_cast_op(uint64_t code, const struct sm6_type *from, + uint32_t *src_type_flags, const struct sm6_type *to, struct sm6_parser *dxil) { enum vkd3d_shader_opcode op = VSIR_OP_INVALID; bool from_int, to_int, from_fp, to_fp; unsigned int from_width, to_width; bool is_valid = false;
+ *src_type_flags = 0; from_int = sm6_type_is_integer(from); to_int = sm6_type_is_integer(to); from_fp = sm6_type_is_floating_point(from); @@ -6934,15 +7024,13 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ /* NOTE: DXIL currently doesn't use vectors here. */ if ((!from_int && !from_fp) || (!to_int && !to_fp)) { - FIXME("Unhandled cast of type class %u to type class %u.\n", from->class, to->class); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Cast of type class %u to type class %u is not implemented.", from->class, to->class); return VSIR_OP_INVALID; } if (to->u.width == 8 || from->u.width == 8) { - FIXME("Unhandled 8-bit value.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Cast to/from an 8-bit type is not implemented."); return VSIR_OP_INVALID; } @@ -6982,6 +7070,7 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ case CAST_SITOFP: op = VSIR_OP_ITOF; is_valid = from_int && to_fp; + *src_type_flags |= DXIL_TYPE_SIGNED; break;
case CAST_FPTRUNC: @@ -7000,16 +7089,14 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ break;
default: - FIXME("Unhandled cast op %"PRIu64".\n", code); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Cast operation %"PRIu64" is unhandled.", code); return VSIR_OP_INVALID; }
if (!is_valid) { - FIXME("Invalid types %u and/or %u for op %"PRIu64".\n", from->class, to->class, code); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Cast operation %"PRIu64" from type class %u, width %u to type class %u, width %u is invalid.", code, from->class, from->u.width, to->class, to->u.width); return VSIR_OP_INVALID; @@ -7032,22 +7119,23 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ return op; }
-static void sm6_parser_emit_cast(struct sm6_parser *sm6, const struct dxil_record *record, +static void sm6_parser_emit_cast(struct sm6_parser *dxil, const struct dxil_record *record, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { struct vkd3d_shader_src_param *src_param; - enum vkd3d_shader_opcode handler_idx; const struct sm6_value *value; + enum vkd3d_shader_opcode op; const struct sm6_type *type; + uint32_t src_type_flags; unsigned int i = 0;
- if (!(value = sm6_parser_get_value_by_ref(sm6, record, NULL, &i))) + if (!(value = sm6_parser_get_value_by_ref(dxil, record, NULL, &i))) return;
- if (!dxil_record_validate_operand_count(record, i + 2, i + 2, sm6)) + if (!dxil_record_validate_operand_count(record, i + 2, i + 2, dxil)) return;
- if (!(type = sm6_parser_get_type(sm6, record->operands[i++]))) + if (!(type = sm6_parser_get_type(dxil, record->operands[i++]))) return;
dst->type = type; @@ -7060,35 +7148,36 @@ static void sm6_parser_emit_cast(struct sm6_parser *sm6, const struct dxil_recor return; }
- if ((handler_idx = sm6_map_cast_op(record->operands[i], value->type, type, sm6)) == VSIR_OP_INVALID) + if ((op = dxil_map_cast_op(record->operands[i], value->type, &src_type_flags, type, dxil)) == VSIR_OP_INVALID) return;
- vsir_instruction_init(ins, &sm6->p.location, handler_idx); + vsir_instruction_init(ins, &dxil->p.location, op);
- if (handler_idx == VSIR_OP_NOP) + if (op == VSIR_OP_NOP) { *dst = *value; dst->type = type; return; }
- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + if (!(src_param = instruction_src_params_alloc(ins, 1, dxil))) return; - src_param_init_from_value(src_param, value, sm6); + src_param_init_from_value(src_param, value, src_type_flags, dxil);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, dxil);
/* VSIR bitcasts are represented by source registers with types different * from the types they were written with, rather than with different types * for the MOV source and destination. */ - if (handler_idx == VSIR_OP_MOV) + if (op == VSIR_OP_MOV) src_param->reg.data_type = ins->dst[0].reg.data_type; }
struct sm6_cmp_info { - enum vkd3d_shader_opcode handler_idx; + enum vkd3d_shader_opcode opcode; bool src_swap; + uint32_t type_flags; };
static const struct sm6_cmp_info *sm6_map_cmp2_op(uint64_t code) @@ -7118,10 +7207,10 @@ static const struct sm6_cmp_info *sm6_map_cmp2_op(uint64_t code) [ICMP_UGE] = {VSIR_OP_UGE}, [ICMP_ULT] = {VSIR_OP_ULT}, [ICMP_ULE] = {VSIR_OP_UGE, true}, - [ICMP_SGT] = {VSIR_OP_ILT, true}, - [ICMP_SGE] = {VSIR_OP_IGE}, - [ICMP_SLT] = {VSIR_OP_ILT}, - [ICMP_SLE] = {VSIR_OP_IGE, true}, + [ICMP_SGT] = {VSIR_OP_ILT, true, DXIL_TYPE_SIGNED}, + [ICMP_SGE] = {VSIR_OP_IGE, false, DXIL_TYPE_SIGNED}, + [ICMP_SLT] = {VSIR_OP_ILT, false, DXIL_TYPE_SIGNED}, + [ICMP_SLE] = {VSIR_OP_IGE, true, DXIL_TYPE_SIGNED}, };
return (code < ARRAY_SIZE(cmp_op_table)) ? &cmp_op_table[code] : NULL; @@ -7188,7 +7277,7 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor "Type mismatch in comparison operation arguments."); }
- if (!(cmp = sm6_map_cmp2_op(code)) || !cmp->handler_idx || cmp->handler_idx == VSIR_OP_INVALID) + if (!(cmp = sm6_map_cmp2_op(code)) || !cmp->opcode || cmp->opcode == VSIR_OP_INVALID) { FIXME("Unhandled operation %"PRIu64".\n", code); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, @@ -7196,7 +7285,7 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor return; }
- vsir_instruction_init(ins, &sm6->p.location, cmp->handler_idx); + vsir_instruction_init(ins, &sm6->p.location, cmp->opcode);
flags = (record->operand_count > i) ? record->operands[i] : 0; silence_warning = false; @@ -7222,10 +7311,10 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor
if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) return; - src_param_init_from_value(&src_params[0 ^ cmp->src_swap], a, sm6); - src_param_init_from_value(&src_params[1 ^ cmp->src_swap], b, sm6); + src_param_init_from_value(&src_params[0 ^ cmp->src_swap], a, cmp->type_flags, sm6); + src_param_init_from_value(&src_params[1 ^ cmp->src_swap], b, cmp->type_flags, sm6);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_record *record, @@ -7245,7 +7334,7 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re || !sm6_value_validate_is_backward_ref(ptr, sm6)) return;
- sm6_register_from_value(®, ptr, sm6); + vsir_register_from_dxil_value(®, ptr, 0, sm6);
if (reg.type != VKD3DSPR_GROUPSHAREDMEM) { @@ -7288,6 +7377,8 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re /* It's currently not possible to specify an atomic ordering in HLSL, and it defaults to seq_cst. */ if (success_ordering != ORDERING_SEQCST) FIXME("Unhandled success ordering %"PRIu64".\n", success_ordering); + else + WARN("Ignoring success ordering %"PRIu64".\n", success_ordering); if (success_ordering != failure_ordering) FIXME("Unhandled failure ordering %"PRIu64".\n", failure_ordering);
@@ -7295,19 +7386,19 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re FIXME("Ignoring weak cmpxchg.\n");
vsir_instruction_init(ins, &sm6->p.location, VSIR_OP_IMM_ATOMIC_CMP_EXCH); - ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST; + ins->flags = is_volatile ? VKD3DARF_VOLATILE : 0;
if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) return; src_param_make_constant_uint(&src_params[0], 0); - src_param_init_from_value(&src_params[1], cmp, sm6); - src_param_init_from_value(&src_params[2], new, sm6); + src_param_init_from_value(&src_params[1], cmp, 0, sm6); + src_param_init_from_value(&src_params[2], new, 0, sm6);
sm6_parser_init_ssa_value(sm6, dst);
if (!(dst_params = instruction_dst_params_alloc(ins, 2, sm6))) return; - sm6_register_from_value(&dst_params[0].reg, dst, sm6); + vsir_register_from_dxil_value(&dst_params[0].reg, dst, 0, sm6); dst_param_init(&dst_params[0]); dst_params[1].reg = reg; dst_param_init(&dst_params[1]); @@ -7366,10 +7457,10 @@ static void sm6_parser_emit_extractval(struct sm6_parser *sm6, const struct dxil
if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) return; - sm6_register_from_value(&src_param->reg, src, sm6); + vsir_register_from_dxil_value(&src_param->reg, src, 0, sm6); src_param_init_scalar(src_param, elem_idx);
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record *record, @@ -7515,7 +7606,7 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor if (record->operands[i]) WARN("Ignoring volatile modifier.\n");
- sm6_register_from_value(®, ptr, sm6); + vsir_register_from_dxil_value(®, ptr, 0, sm6);
if (ptr->structure_stride) { @@ -7530,7 +7621,7 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor src_param_make_constant_uint(&src_params[0], reg.idx[1].offset); /* Struct offset is always zero as there is no struct, just an array. */ src_param_make_constant_uint(&src_params[1], 0); - src_param_init_from_value(&src_params[2], ptr, sm6); + src_param_init_from_value(&src_params[2], ptr, 0, sm6); src_params[2].reg.alignment = alignment; /* The offset is already in src_params[0]. */ src_params[2].reg.idx_count = 1; @@ -7544,11 +7635,11 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor return; if (operand_count > 1) src_param_make_constant_uint(&src_params[0], 0); - src_param_init_from_value(&src_params[operand_count - 1], ptr, sm6); + src_param_init_from_value(&src_params[operand_count - 1], ptr, 0, sm6); src_params[operand_count - 1].reg.alignment = alignment; }
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static int phi_incoming_compare(const void *a, const void *b) @@ -7702,7 +7793,7 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco if (record->operands[i]) WARN("Ignoring volatile modifier.\n");
- sm6_register_from_value(®, ptr, sm6); + vsir_register_from_dxil_value(®, ptr, 0, sm6);
if (ptr->structure_stride) { @@ -7717,7 +7808,7 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco src_param_make_constant_uint(&src_params[0], reg.idx[1].offset); /* Struct offset is always zero as there is no struct, just an array. */ src_param_make_constant_uint(&src_params[1], 0); - src_param_init_from_value(&src_params[2], src, sm6); + src_param_init_from_value(&src_params[2], src, 0, sm6); } else { @@ -7728,7 +7819,7 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco return; if (operand_count > 1) src_param_make_constant_uint(&src_params[0], 0); - src_param_init_from_value(&src_params[operand_count - 1], src, sm6); + src_param_init_from_value(&src_params[operand_count - 1], src, 0, sm6); }
dst_param = instruction_dst_params_alloc(ins, 1, sm6); @@ -7778,7 +7869,7 @@ static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_rec return; }
- sm6_register_from_value(&terminator->conditional_reg, src, sm6); + vsir_register_from_dxil_value(&terminator->conditional_reg, src, 0, sm6); terminator->type = TERMINATOR_SWITCH;
terminator->case_count = record->operand_count / 2u; @@ -7854,9 +7945,11 @@ static void sm6_parser_emit_vselect(struct sm6_parser *sm6, const struct dxil_re if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) return; for (i = 0; i < 3; ++i) - src_param_init_from_value(&src_params[i], src[i], sm6); + { + src_param_init_from_value(&src_params[i], src[i], 0, sm6); + }
- instruction_dst_param_init_ssa_scalar(ins, sm6); + instruction_dst_param_init_ssa_scalar(ins, 0, sm6); }
static bool sm6_metadata_value_is_node(const struct sm6_metadata_value *m) @@ -8195,7 +8288,7 @@ static enum vkd3d_result sm6_function_resolve_phi_incomings(const struct sm6_fun vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, "The type of a phi incoming value does not match the result type."); } - sm6_register_from_value(&phi->incoming[j].reg, src, sm6); + vsir_register_from_dxil_value(&phi->incoming[j].reg, src, 0, sm6); } } } @@ -8206,7 +8299,7 @@ static enum vkd3d_result sm6_function_resolve_phi_incomings(const struct sm6_fun static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const struct dxil_block *block, struct sm6_function *function) { - struct vsir_program *program = sm6->p.program; + struct vsir_program *program = sm6->program; struct vkd3d_shader_instruction *ins; size_t i, block_idx, block_count; const struct dxil_record *record; @@ -8301,7 +8394,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const break; } case FUNC_CODE_INST_BINOP: - sm6_parser_emit_binop(sm6, record, ins, dst); + sm6_parser_emit_binop(sm6, record, code_block, ins, dst); break; case FUNC_CODE_INST_BR: sm6_parser_emit_br(sm6, record, function, code_block, ins); @@ -8355,8 +8448,8 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const return VKD3D_ERROR_INVALID_SHADER; }
- if (sm6->p.failed) - return VKD3D_ERROR; + if (sm6->p.status < 0) + return sm6->p.status;
if (record->attachment) metadata_attachment_record_apply(record->attachment, record->code, ins, dst, sm6); @@ -8403,18 +8496,26 @@ static void sm6_block_emit_terminator(const struct sm6_block *block, struct sm6_ case TERMINATOR_UNCOND_BR: if (!block->terminator.true_block) return; - ins = sm6_parser_add_instruction(sm6, VSIR_OP_BRANCH); + if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_BRANCH))) + return; if (!(src_params = instruction_src_params_alloc(ins, 1, sm6))) + { + vkd3d_shader_instruction_make_nop(ins); return; + } vsir_src_param_init_label(&src_params[0], block->terminator.true_block->id); break;
case TERMINATOR_COND_BR: if (!block->terminator.true_block || !block->terminator.false_block) return; - ins = sm6_parser_add_instruction(sm6, VSIR_OP_BRANCH); + if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_BRANCH))) + return; if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) + { + vkd3d_shader_instruction_make_nop(ins); return; + } src_param_init(&src_params[0]); src_params[0].reg = block->terminator.conditional_reg; vsir_src_param_init_label(&src_params[1], block->terminator.true_block->id); @@ -8422,9 +8523,13 @@ static void sm6_block_emit_terminator(const struct sm6_block *block, struct sm6_ break;
case TERMINATOR_SWITCH: - ins = sm6_parser_add_instruction(sm6, VSIR_OP_SWITCH_MONOLITHIC); + if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_SWITCH_MONOLITHIC))) + return; if (!(src_params = instruction_src_params_alloc(ins, block->terminator.case_count * 2u + 1, sm6))) + { + vkd3d_shader_instruction_make_nop(ins); return; + } src_param_init(&src_params[0]); src_params[0].reg = block->terminator.conditional_reg; /* TODO: emit the merge block id. */ @@ -8438,7 +8543,7 @@ static void sm6_block_emit_terminator(const struct sm6_block *block, struct sm6_ switch_case = &block->terminator.cases[i]; if (!(case_block = switch_case->block)) { - VKD3D_ASSERT(sm6->p.failed); + VKD3D_ASSERT(sm6->p.status < 0); continue; } if (switch_case->is_default) @@ -8491,11 +8596,18 @@ static void sm6_block_emit_phi(const struct sm6_block *block, struct sm6_parser src_phi = &block->phi[i]; incoming_count = src_phi->incoming_count;
- ins = sm6_parser_add_instruction(sm6, VSIR_OP_PHI); + if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_PHI))) + return; if (!(src_params = instruction_src_params_alloc(ins, incoming_count * 2u, sm6))) + { + vkd3d_shader_instruction_make_nop(ins); return; + } if (!(dst_param = instruction_dst_params_alloc(ins, 1, sm6))) + { + vkd3d_shader_instruction_make_nop(ins); return; + }
for (j = 0; j < incoming_count; ++j) { @@ -8507,11 +8619,11 @@ static void sm6_block_emit_phi(const struct sm6_block *block, struct sm6_parser if (incoming_block) vsir_src_param_init_label(&src_params[index + 1], incoming_block->id); else - VKD3D_ASSERT(sm6->p.failed); + VKD3D_ASSERT(sm6->p.status < 0); }
dst_param_init(dst_param); - sm6_register_from_value(&dst_param->reg, &src_phi->value, sm6); + vsir_register_from_dxil_value(&dst_param->reg, &src_phi->value, 0, sm6); } }
@@ -8577,42 +8689,45 @@ static void sm6_parser_emit_label(struct sm6_parser *sm6, unsigned int label_id) struct vkd3d_shader_src_param *src_param; struct vkd3d_shader_instruction *ins;
- ins = sm6_parser_add_instruction(sm6, VSIR_OP_LABEL); - + if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_LABEL))) + return; if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + { + vkd3d_shader_instruction_make_nop(ins); return; + } vsir_src_param_init_label(src_param, label_id); }
static enum vkd3d_result sm6_function_emit_blocks(const struct sm6_function *function, struct sm6_parser *sm6) { - struct vsir_program *program = sm6->p.program; - unsigned int i; + struct vsir_program *program = sm6->program; + struct vkd3d_shader_instruction *ins; + unsigned int i, j;
- program->block_count = function->block_count; + program->block_count = max(program->block_count, function->block_count);
for (i = 0; i < function->block_count; ++i) { const struct sm6_block *block = function->blocks[i];
- /* Space for the label and terminator. */ - if (!sm6_parser_require_space(sm6, block->instruction_count + block->phi_count + 2)) - { - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, - "Out of memory emitting shader instructions."); - return VKD3D_ERROR_OUT_OF_MEMORY; - } sm6_parser_emit_label(sm6, block->id); sm6_block_emit_phi(block, sm6);
- memcpy(&program->instructions.elements[program->instructions.count], block->instructions, - block->instruction_count * sizeof(*block->instructions)); - program->instructions.count += block->instruction_count; - + for (j = 0; j < block->instruction_count; ++j) + { + if (!(ins = vsir_program_append(program))) + { + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory emitting block instructions."); + return sm6->p.status; + } + *ins = block->instructions[j]; + } sm6_block_emit_terminator(block, sm6); }
- return VKD3D_OK; + return sm6->p.status; }
static bool sm6_parser_allocate_named_metadata(struct sm6_parser *sm6) @@ -8813,8 +8928,8 @@ static enum vkd3d_result sm6_parser_metadata_init(struct sm6_parser *sm6, const if (!(value = sm6_parser_get_value_safe(sm6, value_idx))) return VKD3D_ERROR_INVALID_SHADER;
- if (!sm6_value_is_constant(value) && !sm6_value_is_undef(value) && !sm6_value_is_data(value) - && !sm6_value_is_function_dcl(value)) + if (!sm6_value_is_constant(value) && !sm6_value_is_undef(value) + && !sm6_value_is_constant_array(value) && !sm6_value_is_function_dcl(value)) { WARN("Value at index %u is not a constant or a function declaration.\n", value_idx); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_METADATA, @@ -9510,8 +9625,8 @@ static enum vkd3d_result sm6_parser_descriptor_type_init(struct sm6_parser *sm6, struct vkd3d_shader_instruction *ins; const struct sm6_metadata_node *node; const struct sm6_metadata_value *m; + enum vkd3d_result ret = VKD3D_OK; struct sm6_descriptor_info *d; - enum vkd3d_result ret; unsigned int i;
for (i = 0; i < descriptor_node->operand_count; ++i) @@ -9560,42 +9675,41 @@ static enum vkd3d_result sm6_parser_descriptor_type_init(struct sm6_parser *sm6, return VKD3D_ERROR_INVALID_SHADER; }
- if (!(ins = sm6_parser_require_space(sm6, 1))) - { - ERR("Failed to allocate instruction.\n"); + if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_NOP))) return VKD3D_ERROR_OUT_OF_MEMORY; - }
switch (type) { case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: if ((ret = sm6_parser_resources_load_cbv(sm6, node, d, ins)) < 0) - return ret; + goto done; break; case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: if ((ret = sm6_parser_resources_load_srv(sm6, node, d, ins)) < 0) - return ret; + goto done; break; case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: if ((ret = sm6_parser_resources_load_uav(sm6, node, d, ins)) < 0) - return ret; + goto done; break; case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: if ((ret = sm6_parser_resources_load_sampler(sm6, node, d, ins)) < 0) - return ret; + goto done; break; default: - FIXME("Unsupported descriptor type %u.\n", type); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, - "Resource descriptor type %u is unsupported.", type); - return VKD3D_ERROR_INVALID_SHADER; + "Resource descriptor type %#x is unsupported.", type); + ret = VKD3D_ERROR_INVALID_SHADER; + goto done; }
++sm6->descriptor_count; - ++sm6->p.program->instructions.count; }
- return VKD3D_OK; +done: + if (ret < 0) + vsir_instruction_init(ins, &ins->location, VSIR_OP_NOP); + return ret; }
static enum vkd3d_result sm6_parser_resources_init(struct sm6_parser *sm6) @@ -9711,7 +9825,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const { unsigned int i, j, column_count, operand_count, index; const struct sm6_metadata_node *node, *element_node; - struct vsir_program *program = sm6->p.program; + struct vsir_program *program = sm6->program; struct signature_element *elements, *e; unsigned int values[10]; bool native_16bit; @@ -9930,7 +10044,7 @@ invalid: static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m, enum vkd3d_tessellator_domain tessellator_domain) { - struct vsir_program *program = sm6->p.program; + struct vsir_program *program = sm6->program; enum vkd3d_result ret;
if (!sm6_metadata_value_is_node(m)) @@ -9953,6 +10067,14 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons &program->patch_constant_signature, tessellator_domain, false)) < 0) return ret;
+ if (sm6->program->shader_version.type == VKD3D_SHADER_TYPE_HULL + && !(sm6->outpointid_param = vsir_program_create_outpointid_param(sm6->program))) + { + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Failed to allocate outpointid parameter."); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + if ((ret = sm6_parser_init_input_signature(sm6, &program->input_signature)) < 0) return ret;
@@ -9983,14 +10105,15 @@ static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm rotated_flags = (rotated_flags >> 1) | ((rotated_flags & 1) << 4); global_flags = (global_flags & ~mask) | rotated_flags;
- ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_GLOBAL_FLAGS); + if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_GLOBAL_FLAGS))) + return; ins->declaration.global_flags = global_flags; - sm6->p.program->global_flags = global_flags; + sm6->program->global_flags = global_flags; }
static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, const struct sm6_metadata_value *m) { - struct vkd3d_shader_version *version = &sm6->p.program->shader_version; + struct vkd3d_shader_version *version = &sm6->program->shader_version; const struct sm6_metadata_node *node; struct vkd3d_shader_instruction *ins; unsigned int group_sizes[3]; @@ -10040,30 +10163,33 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co } }
- ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_THREAD_GROUP); + if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_THREAD_GROUP))) + return VKD3D_ERROR_OUT_OF_MEMORY; ins->declaration.thread_group_size.x = group_sizes[0]; ins->declaration.thread_group_size.y = group_sizes[1]; ins->declaration.thread_group_size.z = group_sizes[2]; - sm6->p.program->thread_group_size = ins->declaration.thread_group_size; + sm6->program->thread_group_size = ins->declaration.thread_group_size;
return VKD3D_OK; }
-static void sm6_parser_emit_dcl_count(struct sm6_parser *sm6, enum vkd3d_shader_opcode handler_idx, unsigned int count) +static void sm6_parser_emit_dcl_count(struct sm6_parser *dxil, enum vkd3d_shader_opcode opcode, unsigned int count) { struct vkd3d_shader_instruction *ins;
- ins = sm6_parser_add_instruction(sm6, handler_idx); + if (!(ins = sm6_parser_add_instruction(dxil, opcode))) + return; ins->declaration.count = count; }
-static void sm6_parser_emit_dcl_primitive_topology(struct sm6_parser *sm6, - enum vkd3d_shader_opcode handler_idx, enum vkd3d_primitive_type primitive_type, +static void sm6_parser_emit_dcl_primitive_topology(struct sm6_parser *dxil, + enum vkd3d_shader_opcode opcode, enum vkd3d_primitive_type primitive_type, unsigned int patch_vertex_count) { struct vkd3d_shader_instruction *ins;
- ins = sm6_parser_add_instruction(sm6, handler_idx); + if (!(ins = sm6_parser_add_instruction(dxil, opcode))) + return; ins->declaration.primitive_type.type = primitive_type; ins->declaration.primitive_type.patch_vertex_count = patch_vertex_count; } @@ -10080,9 +10206,10 @@ static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, "Domain shader tessellator domain %u is unhandled.", tessellator_domain); }
- ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_TESSELLATOR_DOMAIN); + if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_TESSELLATOR_DOMAIN))) + return; ins->declaration.tessellator_domain = tessellator_domain; - sm6->p.program->tess_domain = tessellator_domain; + sm6->program->tess_domain = tessellator_domain; }
static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, @@ -10108,10 +10235,11 @@ static void sm6_parser_emit_dcl_tessellator_partitioning(struct sm6_parser *sm6, "Hull shader tessellator partitioning %u is unhandled.", tessellator_partitioning); }
- ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_TESSELLATOR_PARTITIONING); + if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_TESSELLATOR_PARTITIONING))) + return; ins->declaration.tessellator_partitioning = tessellator_partitioning;
- sm6->p.program->tess_partitioning = tessellator_partitioning; + sm6->program->tess_partitioning = tessellator_partitioning; }
static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser *sm6, @@ -10126,10 +10254,11 @@ static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser * "Hull shader tessellator output primitive %u is unhandled.", primitive); }
- ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE); + if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE))) + return; ins->declaration.tessellator_output_primitive = primitive;
- sm6->p.program->tess_output_primitive = primitive; + sm6->program->tess_output_primitive = primitive; }
static void sm6_parser_emit_dcl_max_tessellation_factor(struct sm6_parser *sm6, struct sm6_metadata_value *m) @@ -10153,7 +10282,8 @@ static void sm6_parser_emit_dcl_max_tessellation_factor(struct sm6_parser *sm6, "Hull shader max tessellation factor %f is invalid.", max_tessellation_factor); }
- ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_HS_MAX_TESSFACTOR); + if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_HS_MAX_TESSFACTOR))) + return; ins->declaration.max_tessellation_factor = max_tessellation_factor; }
@@ -10241,8 +10371,8 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s }
sm6_parser_emit_dcl_primitive_topology(sm6, VSIR_OP_DCL_INPUT_PRIMITIVE, input_primitive, patch_vertex_count); - sm6->p.program->input_primitive = input_primitive; - sm6->p.program->input_control_point_count = input_control_point_count; + sm6->program->input_primitive = input_primitive; + sm6->program->input_control_point_count = input_control_point_count;
i = operands[1]; /* Max total scalar count sets an upper limit. We would need to scan outputs to be more precise. */ @@ -10253,7 +10383,7 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s "Geometry shader output vertex count %u is invalid.", i); } sm6_parser_emit_dcl_count(sm6, VSIR_OP_DCL_VERTICES_OUT, i); - sm6->p.program->vertices_out_count = i; + sm6->program->vertices_out_count = i;
if (operands[2] > 1) { @@ -10271,7 +10401,7 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s output_primitive = VKD3D_PT_TRIANGLELIST; } sm6_parser_emit_dcl_primitive_topology(sm6, VSIR_OP_DCL_OUTPUT_TOPOLOGY, output_primitive, 0); - sm6->p.program->output_topology = output_primitive; + sm6->program->output_topology = output_primitive;
i = operands[4]; if (!i || i > MAX_GS_INSTANCE_COUNT) @@ -10326,7 +10456,7 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa
sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); sm6_parser_validate_control_point_count(sm6, operands[1], true, "Domain shader input"); - sm6->p.program->input_control_point_count = operands[1]; + sm6->program->input_control_point_count = operands[1];
return operands[0]; } @@ -10334,7 +10464,7 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m) { - struct vsir_program *program = sm6->p.program; + struct vsir_program *program = sm6->program; const struct sm6_metadata_node *node; unsigned int operands[6] = {0}; unsigned int i; @@ -10633,7 +10763,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro { size_t count, length, function_count, expected_function_count, byte_code_size = dxbc_desc->byte_code_size; const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; - struct shader_signature *patch_constant_signature, *output_signature, *input_signature; uint32_t version_token, dxil_version, token_count, magic; const uint32_t *byte_code = dxbc_desc->byte_code; unsigned int chunk_offset, chunk_size; @@ -10727,9 +10856,10 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_NORMALISED_SM6)) return VKD3D_ERROR_OUT_OF_MEMORY; - vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); + vkd3d_shader_parser_init(&sm6->p, message_context, compile_info->source_name); sm6->ptr = &sm6->start[1]; sm6->bitpos = 2; + sm6->program = program;
switch (program->shader_version.type) { @@ -10746,9 +10876,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro break; }
- input_signature = &program->input_signature; - output_signature = &program->output_signature; - patch_constant_signature = &program->patch_constant_signature; program->features = dxbc_desc->features; memset(dxbc_desc, 0, sizeof(*dxbc_desc));
@@ -10890,15 +11017,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro goto fail; }
- if (!sm6_parser_require_space(sm6, output_signature->element_count + input_signature->element_count - + patch_constant_signature->element_count)) - { - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, - "Out of memory emitting shader signature declarations."); - ret = VKD3D_ERROR_OUT_OF_MEMORY; - goto fail; - } - program->ssa_count = sm6->ssa_next_id;
if (!(fn = sm6_parser_get_function(sm6, sm6->entry_point))) @@ -10949,15 +11067,13 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro
dxil_block_destroy(&sm6->root_block);
- if (sm6->p.failed) - { - ret = VKD3D_ERROR_INVALID_SHADER; + if (sm6->p.status < 0) goto fail; - } - return VKD3D_OK;
fail: + if (ret >= 0 && sm6->p.status < 0) + ret = sm6->p.status; sm6_parser_cleanup(sm6); vsir_program_cleanup(program); return ret; diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index 676c501bb08..4848c531ced 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -1607,7 +1607,7 @@ static void write_fx_2_type_iter(const struct hlsl_type *type, const char *name, }
buffer = &fx->unstructured; - offset = put_u32(buffer, hlsl_sm1_base_type(type, ctx->is_combined_sampler)); + offset = put_u32(buffer, hlsl_sm1_base_type(type, ctx->is_combined_sampler, HLSL_SAMPLER_DIM_GENERIC)); put_u32(buffer, get_fx_2_type_class(type)); *ctx->names++ = put_u32(buffer, 0); *ctx->semantics++ = put_u32(buffer, 0); @@ -4205,7 +4205,7 @@ static void fx_parse_shader_blob(struct fx_parser *parser, enum vkd3d_shader_sou
static const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_17}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_CURRENT}, };
info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c index dfe0a40ddf0..4d7505d8740 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -280,7 +280,7 @@ static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, break;
case VKD3DSPR_THREADID: - vkd3d_string_buffer_printf(buffer, "gl_GlobalInvocationID"); + vkd3d_string_buffer_printf(buffer, "uvec4(gl_GlobalInvocationID, 0)"); break;
case VKD3DSPR_IDXTEMP: @@ -288,6 +288,14 @@ static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, shader_glsl_print_subscript(buffer, gen, reg->idx[1].rel_addr, reg->idx[1].offset); break;
+ case VKD3DSPR_SAMPLEMASK: + if (gen->program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled sample coverage mask in shader type #%x.", + gen->program->shader_version.type); + vkd3d_string_buffer_printf(buffer, "o_mask"); + break; + default: vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled register type %#x.", reg->type); @@ -384,7 +392,7 @@ static void shader_glsl_print_src(struct vkd3d_string_buffer *buffer, struct vkd const struct vkd3d_shader_src_param *vsir_src, uint32_t mask, enum vsir_data_type data_type) { const struct vkd3d_shader_register *reg = &vsir_src->reg; - struct vkd3d_string_buffer *register_name, *str; + struct vkd3d_string_buffer *register_name; enum vsir_data_type src_data_type; unsigned int size;
@@ -401,39 +409,11 @@ static void shader_glsl_print_src(struct vkd3d_string_buffer *buffer, struct vkd
shader_glsl_print_register_name(register_name, gen, reg);
- if (!vsir_src->modifiers) - str = buffer; - else - str = vkd3d_string_buffer_get(&gen->string_buffers); - size = reg->dimension == VSIR_DIMENSION_VEC4 ? 4 : 1; - shader_glsl_print_bitcast(str, gen, register_name->buffer, data_type, src_data_type, size); + shader_glsl_print_bitcast(buffer, gen, register_name->buffer, data_type, src_data_type, size); if (reg->dimension == VSIR_DIMENSION_VEC4) - shader_glsl_print_swizzle(str, vsir_src->swizzle, mask); - - switch (vsir_src->modifiers) - { - case VKD3DSPSM_NONE: - break; - case VKD3DSPSM_NEG: - vkd3d_string_buffer_printf(buffer, "-%s", str->buffer); - break; - case VKD3DSPSM_ABS: - vkd3d_string_buffer_printf(buffer, "abs(%s)", str->buffer); - break; - case VKD3DSPSM_ABSNEG: - vkd3d_string_buffer_printf(buffer, "-abs(%s)", str->buffer); - break; - default: - vkd3d_string_buffer_printf(buffer, "<unhandled modifier %#x>(%s)", - vsir_src->modifiers, str->buffer); - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); - break; - } + shader_glsl_print_swizzle(buffer, vsir_src->swizzle, mask);
- if (str != buffer) - vkd3d_string_buffer_release(&gen->string_buffers, str); vkd3d_string_buffer_release(&gen->string_buffers, register_name); }
@@ -495,23 +475,14 @@ static void VKD3D_PRINTF_FUNC(4, 0) shader_glsl_vprint_assignment(struct vkd3d_g struct glsl_dst *dst, enum vsir_data_type data_type, const char *format, va_list args) { struct vkd3d_string_buffer *buffer = gen->buffer; - uint32_t modifiers = dst->vsir->modifiers; bool close = true;
- /* It is always legitimate to ignore _pp. */ - modifiers &= ~VKD3DSPDM_PARTIALPRECISION; - if (dst->vsir->shift) vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift); - if (modifiers & ~VKD3DSPDM_SATURATE) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled destination modifier(s) %#x.", modifiers);
shader_glsl_print_indent(buffer, gen->indent); vkd3d_string_buffer_printf(buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); - if (modifiers & VKD3DSPDM_SATURATE) - vkd3d_string_buffer_printf(buffer, "clamp(");
switch (data_type) { @@ -520,6 +491,8 @@ static void VKD3D_PRINTF_FUNC(4, 0) shader_glsl_vprint_assignment(struct vkd3d_g "Internal compiler error: Unhandled destination register data type %#x.", data_type); /* fall through */ case VSIR_DATA_F32: + case VSIR_DATA_SNORM: + case VSIR_DATA_UNORM: close = false; break; case VSIR_DATA_I32: @@ -534,8 +507,6 @@ static void VKD3D_PRINTF_FUNC(4, 0) shader_glsl_vprint_assignment(struct vkd3d_g
if (close) vkd3d_string_buffer_printf(buffer, ")"); - if (modifiers & VKD3DSPDM_SATURATE) - vkd3d_string_buffer_printf(buffer, ", 0.0, 1.0)"); vkd3d_string_buffer_printf(buffer, ";\n"); }
@@ -611,6 +582,21 @@ static void shader_glsl_dot(struct vkd3d_glsl_generator *gen, glsl_dst_cleanup(&dst, &gen->string_buffers); }
+static void shader_glsl_saturate(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + struct glsl_src src; + struct glsl_dst dst; + uint32_t mask; + + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&src, gen, &ins->src[0], mask); + + shader_glsl_print_assignment(gen, &dst, "clamp(%s, 0.0, 1.0)", src.str->buffer); + + glsl_src_cleanup(&src, &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); +} + static void shader_glsl_intrinsic(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) { @@ -792,7 +778,7 @@ static void shader_glsl_print_texel_offset(struct vkd3d_string_buffer *buffer, s
static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) { - unsigned int resource_id, resource_idx, resource_space, sample_count; + unsigned int coord_size, resource_id, resource_idx, resource_space, sample_count; const struct glsl_resource_type_info *resource_type_info; const struct vkd3d_shader_descriptor_info1 *d; enum vkd3d_shader_resource_type resource_type; @@ -800,11 +786,9 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ enum vsir_data_type data_type; struct glsl_src coord; struct glsl_dst dst; - uint32_t coord_mask; + bool array, offset;
- if (vkd3d_shader_instruction_has_texel_offset(ins)) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled texel fetch offset."); + offset = vkd3d_shader_instruction_has_texel_offset(ins);
if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr) vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, @@ -831,20 +815,22 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_
if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) { - coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); + coord_size = resource_type_info->coord_size; + array = resource_type_info->array; } else { vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled resource type %#x.", resource_type); - coord_mask = vkd3d_write_mask_from_component_count(2); + coord_size = 2; + array = false; }
glsl_dst_init(&dst, gen, ins, &ins->dst[0]); - glsl_src_init(&coord, gen, &ins->src[0], coord_mask); + glsl_src_init(&coord, gen, &ins->src[0], vkd3d_write_mask_from_component_count(coord_size)); fetch = vkd3d_string_buffer_get(&gen->string_buffers);
- vkd3d_string_buffer_printf(fetch, "texelFetch("); + vkd3d_string_buffer_printf(fetch, "texelFetch%s(", offset ? "Offset" : ""); shader_glsl_print_combined_sampler_name(fetch, gen, resource_idx, resource_space, VKD3D_SHADER_DUMMY_SAMPLER_INDEX, 0); vkd3d_string_buffer_printf(fetch, ", %s", coord.str->buffer); @@ -860,6 +846,11 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ else shader_glsl_print_src(fetch, gen, &ins->src[2], VKD3DSP_WRITEMASK_0, ins->src[2].reg.data_type); } + if (offset) + { + vkd3d_string_buffer_printf(fetch, ", "); + shader_glsl_print_texel_offset(fetch, gen, coord_size - array, &ins->texel_offset); + } vkd3d_string_buffer_printf(fetch, ")"); shader_glsl_print_swizzle(fetch, ins->src[1].swizzle, ins->dst[0].write_mask);
@@ -1227,25 +1218,33 @@ static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d
static void shader_glsl_movc(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) { + struct vkd3d_string_buffer *src1, *src2; unsigned int component_count; - struct glsl_src src[3]; + struct glsl_src src[1]; struct glsl_dst dst; uint32_t mask;
+ /* Sadly, mix() in unextended GLSL 4.40 can only select between + * floating-point sources. The earliest version able to select between + * integer sources is version 4.50; the same functionality is also + * provided by the EXT_shader_integer_mix extension. */ + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); glsl_src_init(&src[0], gen, &ins->src[0], mask); - glsl_src_init(&src[1], gen, &ins->src[1], mask); - glsl_src_init(&src[2], gen, &ins->src[2], mask); + src1 = vkd3d_string_buffer_get(&gen->string_buffers); + src2 = vkd3d_string_buffer_get(&gen->string_buffers); + shader_glsl_print_src(src1, gen, &ins->src[1], mask, VSIR_DATA_F32); + shader_glsl_print_src(src2, gen, &ins->src[2], mask, VSIR_DATA_F32);
if ((component_count = vsir_write_mask_component_count(mask)) > 1) - shader_glsl_print_assignment(gen, &dst, "mix(%s, %s, bvec%u(%s))", - src[2].str->buffer, src[1].str->buffer, component_count, src[0].str->buffer); + shader_glsl_print_assignment_ext(gen, &dst, VSIR_DATA_F32, "mix(%s, %s, bvec%u(%s))", + src2->buffer, src1->buffer, component_count, src[0].str->buffer); else - shader_glsl_print_assignment(gen, &dst, "mix(%s, %s, bool(%s))", - src[2].str->buffer, src[1].str->buffer, src[0].str->buffer); + shader_glsl_print_assignment_ext(gen, &dst, VSIR_DATA_F32, "mix(%s, %s, bool(%s))", + src2->buffer, src1->buffer, src[0].str->buffer);
- glsl_src_cleanup(&src[2], &gen->string_buffers); - glsl_src_cleanup(&src[1], &gen->string_buffers); + vkd3d_string_buffer_release(&gen->string_buffers, src2); + vkd3d_string_buffer_release(&gen->string_buffers, src1); glsl_src_cleanup(&src[0], &gen->string_buffers); glsl_dst_cleanup(&dst, &gen->string_buffers); } @@ -1281,6 +1280,13 @@ static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, st vkd3d_string_buffer_printf(buffer, "intBitsToFloat(ivec4(gl_VertexID, 0, 0, 0))"); break;
+ case VKD3D_SHADER_SV_INSTANCE_ID: + if (version->type != VKD3D_SHADER_TYPE_VERTEX) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled SV_INSTANCE_ID in shader type #%x.", version->type); + vkd3d_string_buffer_printf(buffer, "intBitsToFloat(ivec4(gl_InstanceID, 0, 0, 0))"); + break; + case VKD3D_SHADER_SV_IS_FRONT_FACE: if (version->type != VKD3D_SHADER_TYPE_PIXEL) vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, @@ -1429,6 +1435,12 @@ static void shader_glsl_shader_epilogue(struct vkd3d_glsl_generator *gen) shader_glsl_print_write_mask(buffer, e->mask); vkd3d_string_buffer_printf(buffer, ";\n"); } + + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_SAMPLEMASK)) + { + shader_glsl_print_indent(buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "gl_SampleMask[0] = floatBitsToInt(o_mask);\n"); + } }
static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) @@ -1459,6 +1471,9 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen,
switch (ins->opcode) { + case VSIR_OP_ABS: + shader_glsl_intrinsic(gen, ins, "abs"); + break; case VSIR_OP_ADD: case VSIR_OP_IADD: shader_glsl_binop(gen, ins, "+"); @@ -1475,6 +1490,9 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, case VSIR_OP_CONTINUE: shader_glsl_continue(gen); break; + case VSIR_OP_COS: + shader_glsl_intrinsic(gen, ins, "cos"); + break; case VSIR_OP_DCL_INDEXABLE_TEMP: shader_glsl_dcl_indexable_temp(gen, ins); break; @@ -1495,6 +1513,12 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, case VSIR_OP_DP4: shader_glsl_dot(gen, ins, VKD3DSP_WRITEMASK_ALL); break; + case VSIR_OP_DSX: + shader_glsl_intrinsic(gen, ins, "dFdx"); + break; + case VSIR_OP_DSY: + shader_glsl_intrinsic(gen, ins, "dFdy"); + break; case VSIR_OP_ELSE: shader_glsl_else(gen, ins); break; @@ -1531,6 +1555,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, break; case VSIR_OP_GEO: case VSIR_OP_IGE: + case VSIR_OP_UGE: shader_glsl_relop(gen, ins, ">=", "greaterThanEqual"); break; case VSIR_OP_IF: @@ -1561,6 +1586,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, shader_glsl_relop(gen, ins, "!=", "notEqual"); break; case VSIR_OP_INEG: + case VSIR_OP_NEG: shader_glsl_unary_op(gen, ins, "-"); break; case VSIR_OP_ISHL: @@ -1620,6 +1646,12 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, case VSIR_OP_RSQ: shader_glsl_intrinsic(gen, ins, "inversesqrt"); break; + case VSIR_OP_SATURATE: + shader_glsl_saturate(gen, ins); + break; + case VSIR_OP_SIN: + shader_glsl_intrinsic(gen, ins, "sin"); + break; case VSIR_OP_SQRT: shader_glsl_intrinsic(gen, ins, "sqrt"); break; @@ -1629,6 +1661,12 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, case VSIR_OP_SWITCH: shader_glsl_switch(gen, ins); break; + case VSIR_OP_UDIV_SIMPLE: + shader_glsl_binop(gen, ins, "/"); + break; + case VSIR_OP_UREM: + shader_glsl_binop(gen, ins, "%"); + break; case VSIR_OP_XOR: shader_glsl_binop(gen, ins, "^"); break; @@ -2318,6 +2356,8 @@ static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) vkd3d_string_buffer_printf(buffer, "vec4 %s_in[%u];\n", gen->prefix, gen->limits.input_count); if (gen->limits.output_count) vkd3d_string_buffer_printf(buffer, "vec4 %s_out[%u];\n", gen->prefix, gen->limits.output_count); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_SAMPLEMASK)) + vkd3d_string_buffer_printf(gen->buffer, "float o_mask;\n"); if (program->temp_count) vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n", program->temp_count); vkd3d_string_buffer_printf(buffer, "\n"); @@ -2328,7 +2368,6 @@ static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struc struct vkd3d_string_buffer *buffer = gen->buffer; struct vkd3d_shader_instruction *ins; struct vsir_program_iterator it; - void *code;
MESSAGE("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n");
@@ -2357,13 +2396,7 @@ static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struc if (gen->failed) return VKD3D_ERROR_INVALID_SHADER;
- if ((code = vkd3d_malloc(buffer->buffer_size))) - { - memcpy(code, buffer->buffer, buffer->content_size); - out->size = buffer->content_size; - out->code = code; - } - else return VKD3D_ERROR_OUT_OF_MEMORY; + vkd3d_shader_code_from_string_buffer(out, buffer);
return VKD3D_OK; } @@ -2449,6 +2482,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags,
VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); VKD3D_ASSERT(program->has_descriptor_info); + VKD3D_ASSERT(program->has_no_modifiers);
vkd3d_glsl_generator_init(&generator, program, compile_info, combined_sampler_info, message_context); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 62335086e20..6bca2e1d1b2 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -153,6 +153,25 @@ void hlsl_free_state_block(struct hlsl_state_block *state_block) vkd3d_free(state_block); }
+void hlsl_free_default_value(struct hlsl_default_value *value) +{ + vkd3d_free((void *)value->string); +} + +void hlsl_free_default_values(struct hlsl_ir_var *decl) +{ + unsigned int component_count; + + if (!decl->default_values) + return; + + component_count = hlsl_type_component_count(decl->data_type); + for (size_t i = 0; i < component_count; ++i) + hlsl_free_default_value(&decl->default_values[i]); + vkd3d_free(decl->default_values); + decl->default_values = NULL; +} + void hlsl_free_var(struct hlsl_ir_var *decl) { unsigned int k, i; @@ -162,14 +181,7 @@ void hlsl_free_var(struct hlsl_ir_var *decl) for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) vkd3d_free((void *)decl->objects_usage[k]);
- if (decl->default_values) - { - unsigned int component_count = hlsl_type_component_count(decl->data_type); - - for (k = 0; k < component_count; ++k) - vkd3d_free((void *)decl->default_values[k].string); - vkd3d_free(decl->default_values); - } + hlsl_free_default_values(decl);
for (i = 0; i < decl->state_block_count; ++i) hlsl_free_state_block(decl->state_blocks[i]); @@ -393,6 +405,11 @@ bool hlsl_type_is_signed_integer(const struct hlsl_type *type) vkd3d_unreachable(); }
+bool hlsl_type_is_unsigned_integer(const struct hlsl_type *type) +{ + return hlsl_type_is_integer(type) && !hlsl_type_is_signed_integer(type); +} + bool hlsl_type_is_integer(const struct hlsl_type *type) { VKD3D_ASSERT(hlsl_is_numeric_type(type)); @@ -1732,6 +1749,12 @@ struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *t return &c->node; }
+struct hlsl_ir_node *hlsl_block_add_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_type *type, const struct hlsl_constant_value *value, const struct vkd3d_shader_location *loc) +{ + return append_new_instr(ctx, block, hlsl_new_constant(ctx, type, value, loc)); +} + struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc) { struct hlsl_constant_value value; @@ -1809,6 +1832,76 @@ struct hlsl_ir_node *hlsl_new_null_constant(struct hlsl_ctx *ctx, const struct v return hlsl_new_constant(ctx, ctx->builtin_types.null, &value, loc); }
+bool hlsl_constant_is_zero(struct hlsl_ir_constant *c) +{ + struct hlsl_type *data_type = c->node.data_type; + unsigned int k; + + for (k = 0; k < data_type->e.numeric.dimx; ++k) + { + switch (data_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + if (c->value.u[k].f != 0.0f) + return false; + break; + + case HLSL_TYPE_DOUBLE: + if (c->value.u[k].d != 0.0) + return false; + break; + + case HLSL_TYPE_UINT: + case HLSL_TYPE_INT: + case HLSL_TYPE_BOOL: + case HLSL_TYPE_MIN16UINT: + if (c->value.u[k].u != 0) + return false; + break; + } + } + + return true; +} + +bool hlsl_constant_is_one(struct hlsl_ir_constant *c) +{ + struct hlsl_type *data_type = c->node.data_type; + unsigned int k; + + for (k = 0; k < data_type->e.numeric.dimx; ++k) + { + switch (data_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + if (c->value.u[k].f != 1.0f) + return false; + break; + + case HLSL_TYPE_DOUBLE: + if (c->value.u[k].d != 1.0) + return false; + break; + + case HLSL_TYPE_UINT: + case HLSL_TYPE_INT: + case HLSL_TYPE_MIN16UINT: + if (c->value.u[k].u != 1) + return false; + break; + + case HLSL_TYPE_BOOL: + if (c->value.u[k].u != ~0) + return false; + break; + } + } + + return true; +} + static struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], struct hlsl_type *data_type, const struct vkd3d_shader_location *loc) @@ -1895,14 +1988,15 @@ static struct hlsl_ir_node *hlsl_new_error_expr(struct hlsl_ctx *ctx) return hlsl_new_expr(ctx, HLSL_OP0_ERROR, operands, ctx->builtin_types.error, &loc); }
-struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, - struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) +struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, + struct hlsl_block *else_block, enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc) { struct hlsl_ir_if *iff;
if (!(iff = hlsl_alloc(ctx, sizeof(*iff)))) return NULL; init_node(&iff->node, HLSL_IR_IF, NULL, loc); + iff->flatten_type = flatten_type; hlsl_src_from_node(&iff->condition, condition); hlsl_block_init(&iff->then_block); hlsl_block_add_block(&iff->then_block, then_block); @@ -1912,10 +2006,11 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond return &iff->node; }
-void hlsl_block_add_if(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *condition, - struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) +void hlsl_block_add_if(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, + enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *instr = hlsl_new_if(ctx, condition, then_block, else_block, loc); + struct hlsl_ir_node *instr = hlsl_new_if(ctx, condition, then_block, else_block, flatten_type, loc);
if (instr) { @@ -2104,9 +2199,15 @@ static struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx struct hlsl_ir_node *hlsl_block_add_resource_load(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_resource_load *load = hlsl_new_resource_load(ctx, params, loc); + struct hlsl_ir_resource_load *load; + + if (!(load = hlsl_new_resource_load(ctx, params, loc))) + { + block->value = ctx->error_instr; + return ctx->error_instr; + }
- if (load && load->sampling_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + if (load->sampling_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) hlsl_src_from_node(&load->byte_offset, hlsl_block_add_uint_constant(ctx, block, 0, loc));
return append_new_instr(ctx, block, &load->node); @@ -2130,11 +2231,11 @@ static struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, return &store->node; }
-void hlsl_block_add_resource_store(struct hlsl_ctx *ctx, struct hlsl_block *block, +struct hlsl_ir_node *hlsl_block_add_resource_store(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_resource_store_type type, const struct hlsl_deref *resource, struct hlsl_ir_node *coords, struct hlsl_ir_node *value, uint32_t writemask, const struct vkd3d_shader_location *loc) { - append_new_instr(ctx, block, hlsl_new_resource_store(ctx, type, resource, coords, value, writemask, loc)); + return append_new_instr(ctx, block, hlsl_new_resource_store(ctx, type, resource, coords, value, writemask, loc)); }
struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int component_count, @@ -2593,7 +2694,8 @@ static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_ma return NULL; }
- if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), &then_block, &else_block, &src->node.loc))) + if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), + &then_block, &else_block, src->flatten_type, &src->node.loc))) { hlsl_block_cleanup(&then_block); hlsl_block_cleanup(&else_block); @@ -3654,8 +3756,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_BIT_NOT] = "~", [HLSL_OP1_CAST] = "cast", [HLSL_OP1_CEIL] = "ceil", + [HLSL_OP1_CLZ] = "clz", [HLSL_OP1_COS] = "cos", [HLSL_OP1_COS_REDUCED] = "cos_reduced", + [HLSL_OP1_COUNTBITS] = "countbits", + [HLSL_OP1_CTZ] = "ctz", [HLSL_OP1_DSX] = "dsx", [HLSL_OP1_DSX_COARSE] = "dsx_coarse", [HLSL_OP1_DSX_FINE] = "dsx_fine", @@ -3665,6 +3770,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_EXP2] = "exp2", [HLSL_OP1_F16TOF32] = "f16tof32", [HLSL_OP1_F32TOF16] = "f32tof16", + [HLSL_OP1_FIND_MSB] = "find_msb", [HLSL_OP1_FLOOR] = "floor", [HLSL_OP1_FRACT] = "fract", [HLSL_OP1_ISINF] = "isinf", @@ -3718,8 +3824,9 @@ static void dump_ir_expr(struct vkd3d_string_buffer *buffer, const struct hlsl_i vkd3d_string_buffer_printf(buffer, "%s (", debug_hlsl_expr_op(expr->op)); for (i = 0; i < HLSL_MAX_OPERANDS && expr->operands[i].node; ++i) { + if (i) + vkd3d_string_buffer_printf(buffer, " "); dump_src(buffer, &expr->operands[i]); - vkd3d_string_buffer_printf(buffer, " "); } vkd3d_string_buffer_printf(buffer, ")"); } @@ -3763,6 +3870,13 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i vkd3d_string_buffer_printf(buffer, "unresolved_continue"); break; } + + if (jump->condition.node) + { + vkd3d_string_buffer_printf(buffer, " ("); + dump_src(buffer, &jump->condition); + vkd3d_string_buffer_printf(buffer, ")"); + } }
static void dump_ir_loop(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_loop *loop) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index d67f820fe8b..e12915f9fe6 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -233,6 +233,9 @@ struct hlsl_type /* Offset where the type's description starts in the output bytecode, in bytes. */ size_t bytecode_offset;
+ /* Offset where the type's packed description starts in the output bytecode, in bytes. */ + size_t packed_bytecode_offset; + bool is_typedef;
uint32_t is_minimum_precision : 1; @@ -245,6 +248,7 @@ struct hlsl_semantic const char *name; uint32_t index; uint32_t stream_index; + uint32_t modifiers;
/* Name exactly as it appears in the sources. */ const char *raw_name; @@ -530,6 +534,8 @@ struct hlsl_ir_var struct { bool used; + bool uav_read; + bool uav_atomics; enum hlsl_sampler_dim sampler_dim; struct vkd3d_shader_location first_sampler_dim_loc; } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; @@ -655,12 +661,20 @@ struct hlsl_ir_call struct hlsl_ir_function_decl *decl; };
+enum hlsl_if_flatten_type +{ + HLSL_IF_FLATTEN_DEFAULT, + HLSL_IF_FORCE_FLATTEN, + HLSL_IF_FORCE_BRANCH +}; + struct hlsl_ir_if { struct hlsl_ir_node node; struct hlsl_src condition; struct hlsl_block then_block; struct hlsl_block else_block; + enum hlsl_if_flatten_type flatten_type; };
enum hlsl_loop_unroll_type @@ -715,8 +729,11 @@ enum hlsl_ir_expr_op HLSL_OP1_BIT_NOT, HLSL_OP1_CAST, HLSL_OP1_CEIL, + HLSL_OP1_CLZ, HLSL_OP1_COS, HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi], writes to .x */ + HLSL_OP1_COUNTBITS, + HLSL_OP1_CTZ, HLSL_OP1_DSX, HLSL_OP1_DSX_COARSE, HLSL_OP1_DSX_FINE, @@ -726,6 +743,7 @@ enum hlsl_ir_expr_op HLSL_OP1_EXP2, HLSL_OP1_F16TOF32, HLSL_OP1_F32TOF16, + HLSL_OP1_FIND_MSB, HLSL_OP1_FLOOR, HLSL_OP1_FRACT, HLSL_OP1_ISINF, @@ -1184,8 +1202,8 @@ struct hlsl_ctx } constant_defs; /* 'c' registers where the constants expected by SM2 sincos are stored. */ struct hlsl_reg d3dsincosconst1, d3dsincosconst2; - /* Number of allocated SSA IDs, used in translation to vsir. */ - unsigned int ssa_count; + /* Number of allocated registers, used in translation to vsir. */ + unsigned int ssa_count, temp_count, indexable_temp_count;
/* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ @@ -1541,6 +1559,12 @@ static inline bool hlsl_var_has_buffer_offset_register_reservation(struct hlsl_c return var->reg_reservation.reg_type == 'c' && var->buffer == ctx->globals_buffer; }
+static inline bool hlsl_is_comparison_op(enum hlsl_ir_expr_op op) +{ + return op == HLSL_OP2_EQUAL || op == HLSL_OP2_GEQUAL + || op == HLSL_OP2_LESS || op == HLSL_OP2_NEQUAL; +} + char *hlsl_sprintf_alloc(struct hlsl_ctx *ctx, const char *fmt, ...) VKD3D_PRINTF_FUNC(2, 3);
const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op); @@ -1563,13 +1587,16 @@ struct hlsl_ir_node *hlsl_block_add_binary_expr(struct hlsl_ctx *ctx, struct hls enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2); struct hlsl_ir_node *hlsl_block_add_cast(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *arg, struct hlsl_type *type, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_block_add_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_type *type, const struct hlsl_constant_value *value, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], struct hlsl_type *data_type, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_float_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, float f, const struct vkd3d_shader_location *loc); -void hlsl_block_add_if(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *condition, - struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); +void hlsl_block_add_if(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, + enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_index(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_int_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -1585,7 +1612,7 @@ void hlsl_block_add_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_resource_load(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); -void hlsl_block_add_resource_store(struct hlsl_ctx *ctx, struct hlsl_block *block, +struct hlsl_ir_node *hlsl_block_add_resource_store(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_resource_store_type type, const struct hlsl_deref *resource, struct hlsl_ir_node *coords, struct hlsl_ir_node *value, uint32_t writemask, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_simple_load(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -1648,6 +1675,8 @@ void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c); void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new);
void hlsl_free_attribute(struct hlsl_attribute *attr); +void hlsl_free_default_value(struct hlsl_default_value *value); +void hlsl_free_default_values(struct hlsl_ir_var *decl); void hlsl_free_instr(struct hlsl_ir_node *node); void hlsl_free_instr_list(struct list *list); void hlsl_free_state_block(struct hlsl_state_block *state_block); @@ -1686,13 +1715,16 @@ struct hlsl_ir_node *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *no struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc); -struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, - struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, + struct hlsl_block *else_block, enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc); struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, enum hlsl_so_object_type so_type, struct hlsl_type *type); struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3);
+bool hlsl_constant_is_zero(struct hlsl_ir_constant *c); +bool hlsl_constant_is_one(struct hlsl_ir_constant *c); + void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var);
struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, @@ -1779,10 +1811,10 @@ bool hlsl_type_is_integer(const struct hlsl_type *type); bool hlsl_type_is_floating_point(const struct hlsl_type *type); bool hlsl_type_is_row_major(const struct hlsl_type *type); bool hlsl_type_is_signed_integer(const struct hlsl_type *type); +bool hlsl_type_is_unsigned_integer(const struct hlsl_type *type); unsigned int hlsl_type_minor_size(const struct hlsl_type *type); unsigned int hlsl_type_major_size(const struct hlsl_type *type); unsigned int hlsl_type_element_count(const struct hlsl_type *type); -bool hlsl_type_is_integer(const struct hlsl_type *type); bool hlsl_type_is_minimum_precision(const struct hlsl_type *type); bool hlsl_type_is_resource(const struct hlsl_type *type); bool hlsl_type_is_shader(const struct hlsl_type *type); @@ -1814,15 +1846,19 @@ unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref);
bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); -bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); -bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); -bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); -bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); +struct hlsl_ir_node *hlsl_fold_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block); +struct hlsl_ir_node *hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block); +struct hlsl_ir_node *hlsl_fold_constant_identities(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block); +struct hlsl_ir_node *hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block); bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), struct hlsl_block *block, void *context);
D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); -D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_combined_sampler); +D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, + bool is_combined_sampler, enum hlsl_sampler_dim sampler_dim);
struct extern_resource { diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index 0cdebb8a657..c41d807cca1 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -83,7 +83,6 @@ ComputeShader {return KW_COMPUTESHADER; } compile {return KW_COMPILE; } CompileShader {return KW_COMPILESHADER; } const {return KW_CONST; } -ConstructGSWithSO {return KW_CONSTRUCTGSWITHSO; } continue {return KW_CONTINUE; } DepthStencilState {return KW_DEPTHSTENCILSTATE; } DepthStencilView {return KW_DEPTHSTENCILVIEW; } @@ -346,7 +345,7 @@ while {return KW_WHILE; } <pp>{ANY} {}
{ANY} { - return yytext[0]; + return (unsigned char)yytext[0]; }
%% diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 024d96c5663..e349029521a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -471,7 +471,7 @@ static void append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co
hlsl_block_init(&then_block); hlsl_block_add_jump(ctx, &then_block, HLSL_IR_JUMP_BREAK, NULL, &condition->loc); - hlsl_block_add_if(ctx, cond_block, not, &then_block, NULL, &condition->loc); + hlsl_block_add_if(ctx, cond_block, not, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, &condition->loc); }
static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs) @@ -614,10 +614,6 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx if (!(ret.string = vkd3d_strdup(string->string))) return ret; } - else if (node->type == HLSL_IR_STRING_CONSTANT) - { - hlsl_fixme(ctx, &node->loc, "Evaluate string constants as static expressions."); - } else { hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, @@ -1082,7 +1078,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, field->loc = v->loc; field->name = v->name; field->semantic = v->semantic; - field->storage_modifiers = modifiers; + field->storage_modifiers = modifiers | v->semantic.modifiers; if (v->initializer.args_count) { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Illegal initializer on a struct field."); @@ -1211,16 +1207,17 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, struct parse_parameter *param, const struct vkd3d_shader_location *loc) { + uint32_t modifiers = param->modifiers | param->semantic.modifiers; struct hlsl_ir_var *var;
if (param->type->class == HLSL_CLASS_MATRIX) VKD3D_ASSERT(param->type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
- if ((param->modifiers & HLSL_STORAGE_OUT) && (param->modifiers & HLSL_STORAGE_UNIFORM)) + if ((modifiers & HLSL_STORAGE_OUT) && (modifiers & HLSL_STORAGE_UNIFORM)) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Parameter '%s' is declared as both "out" and "uniform".", param->name);
- if ((param->modifiers & HLSL_STORAGE_OUT) && !(param->modifiers & HLSL_STORAGE_IN) + if ((modifiers & HLSL_STORAGE_OUT) && !(modifiers & HLSL_STORAGE_IN) && (param->type->modifiers & HLSL_MODIFIER_CONST)) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Parameter '%s' is declared as both "out" and "const".", param->name); @@ -1234,14 +1231,14 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, "Missing default value for parameter '%s'.", param->name);
- if (param->initializer.args_count && (param->modifiers & HLSL_STORAGE_OUT)) + if (param->initializer.args_count && (modifiers & HLSL_STORAGE_OUT)) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Output parameter '%s' has a default value.", param->name);
if (hlsl_get_stream_output_type(param->type)) check_invalid_stream_output_object(ctx, param->type, param->name, loc);
- if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, + if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, modifiers, ¶m->reg_reservation))) return false; var->is_param = 1; @@ -2249,18 +2246,14 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i
if (src->type == HLSL_IR_COMPILE || src->type == HLSL_IR_SAMPLER_STATE) { - if (hlsl_is_numeric_type(dst_comp_type)) + /* Default values are discarded if they contain an object + * literal expression for a numeric component. */ + if (hlsl_is_numeric_type(dst_comp_type) && dst->default_values) { - /* Default values are discarded if they contain an object - * literal expression for a numeric component. */ - if (dst->default_values) - { - hlsl_warning(ctx, &src->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_DEFAULT_VALUE, - "Component %u in variable '%s' initializer is object literal. Default values discarded.", - k, dst->name); - vkd3d_free(dst->default_values); - dst->default_values = NULL; - } + hlsl_warning(ctx, &src->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_DEFAULT_VALUE, + "Component %u in variable '%s' initializer is object literal. Default values discarded.", + k, dst->name); + hlsl_free_default_values(dst); } } else @@ -2271,6 +2264,8 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i
if (dst->default_values) dst->default_values[*store_index] = default_value; + else + hlsl_free_default_value(&default_value);
hlsl_block_cleanup(&block); } @@ -2408,10 +2403,10 @@ static void check_invalid_object_fields(struct hlsl_ctx *ctx, const struct hlsl_
static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) { + uint32_t modifiers = v->modifiers | v->semantic.modifiers; struct hlsl_type *basic_type = v->basic_type; struct hlsl_ir_function_decl *func; struct hlsl_semantic new_semantic; - uint32_t modifiers = v->modifiers; bool unbounded_res_array = false; bool constant_buffer = false; struct hlsl_ir_var *var; @@ -3041,10 +3036,7 @@ static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, if (!param->default_values[j].string) { value.u[0] = param->default_values[j].number; - if (!(comp = hlsl_new_constant(ctx, type, &value, loc))) - return NULL; - hlsl_block_add_instr(args->instrs, comp); - + comp = hlsl_block_add_constant(ctx, args->instrs, type, &value, loc); hlsl_block_add_store_component(ctx, args->instrs, ¶m_deref, j, comp); } } @@ -3181,6 +3173,20 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, return true; }
+static bool elementwise_intrinsic_int_convert_args(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_type *type; + + if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + return false; + + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_INT, type->e.numeric.dimx, type->e.numeric.dimy); + + convert_args(ctx, params, type, loc); + return true; +} + static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3579,6 +3585,20 @@ static bool intrinsic_cosh(struct hlsl_ctx *ctx, return write_cosh_or_sinh(ctx, params, loc, false); }
+static bool intrinsic_countbits(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *type; + + if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc)) + return false; + type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT); + + operands[0] = params->args[0]; + return add_expr(ctx, params->instrs, HLSL_OP1_COUNTBITS, operands, type, loc); +} + static bool intrinsic_cross(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3925,6 +3945,69 @@ static bool intrinsic_f32tof16(struct hlsl_ctx *ctx, return add_expr(ctx, params->instrs, HLSL_OP1_F32TOF16, operands, type, loc); }
+static bool intrinsic_firstbithigh(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *type = params->args[0]->data_type; + struct hlsl_ir_node *c, *clz, *eq, *xor; + + if (hlsl_version_lt(ctx, 4, 0)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "The 'firstbithigh' intrinsic requires shader model 4.0 or higher."); + + if (hlsl_type_is_unsigned_integer(type)) + { + if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc)) + return false; + } + else + { + if (!elementwise_intrinsic_int_convert_args(ctx, params, loc)) + return false; + } + type = convert_numeric_type(ctx, type, HLSL_TYPE_UINT); + + operands[0] = params->args[0]; + if (hlsl_version_lt(ctx, 5, 0)) + return add_expr(ctx, params->instrs, HLSL_OP1_FIND_MSB, operands, type, loc); + + c = hlsl_block_add_uint_constant(ctx, params->instrs, 0x1f, loc); + + if (!(clz = add_expr(ctx, params->instrs, HLSL_OP1_CLZ, operands, type, loc))) + return false; + if (!(xor = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_BIT_XOR, c, clz, loc))) + return false; + + c = hlsl_block_add_uint_constant(ctx, params->instrs, ~0u, loc); + + if (!(eq = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_EQUAL, clz, c, loc))) + return false; + + operands[0] = eq; + operands[1] = add_implicit_conversion(ctx, params->instrs, c, type, loc); + operands[2] = xor; + return add_expr(ctx, params->instrs, HLSL_OP3_TERNARY, operands, type, loc); +} + +static bool intrinsic_firstbitlow(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *type; + + if (hlsl_version_lt(ctx, 4, 0)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "The 'firstbitlow' intrinsic requires shader model 4.0 or higher."); + + if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc)) + return false; + type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT); + + operands[0] = params->args[0]; + return add_expr(ctx, params->instrs, HLSL_OP1_CTZ, operands, type, loc); +} + static bool intrinsic_floor(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3948,9 +4031,7 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer if (!(div = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, x, y, loc))) return false;
- if (!(zero = hlsl_new_constant(ctx, div->data_type, &zero_value, loc))) - return false; - hlsl_block_add_instr(params->instrs, zero); + zero = hlsl_block_add_constant(ctx, params->instrs, div->data_type, &zero_value, loc);
if (!(abs = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, div, loc))) return false; @@ -3983,6 +4064,53 @@ static bool intrinsic_frac(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FRACT, arg, loc); }
+static bool intrinsic_frexp(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_type *type, *uint_dim_type, *int_dim_type, *bool_dim_type; + struct hlsl_ir_function_decl *func; + char *body; + + static const char template[] = + "%s frexp(%s x, out %s exp)\n" + "{\n" + /* If x is zero, always return zero for exp and mantissa. */ + " %s is_nonzero_mask = x != 0.0;\n" + " %s bits = asuint(x);\n" + /* Subtract 126, not 127, to increase the exponent */ + " %s exp_int = asint((bits & 0x7f800000u) >> 23) - 126;\n" + /* Clear the given exponent and replace it with the bit pattern + * for 2^-1 */ + " %s mantissa = asfloat((bits & 0x007fffffu) | 0x3f000000);\n" + " exp = is_nonzero_mask * %s(exp_int);\n" + " return is_nonzero_mask * mantissa;\n" + "}\n"; + + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; + type = params->args[0]->data_type; + + if (type->e.numeric.type == HLSL_TYPE_DOUBLE) + { + hlsl_fixme(ctx, loc, "frexp() on doubles."); + return false; + } + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->e.numeric.dimx, type->e.numeric.dimy); + uint_dim_type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy); + int_dim_type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_INT, type->e.numeric.dimx, type->e.numeric.dimy); + bool_dim_type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->e.numeric.dimx, type->e.numeric.dimy); + + if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, + bool_dim_type->name, uint_dim_type->name, int_dim_type->name, type->name, type->name))) + return false; + func = hlsl_compile_internal_function(ctx, "frexp", body); + vkd3d_free(body); + if (!func) + return false; + + return !!add_user_call(ctx, func, params, false, loc); +} + static bool intrinsic_fwidth(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4499,9 +4627,8 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, arg->data_type->e.numeric.dimx, arg->data_type->e.numeric.dimy);
- if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type), &zero_value, loc))) - return false; - hlsl_block_add_instr(params->instrs, zero); + zero = hlsl_block_add_constant(ctx, params->instrs, + hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type), &zero_value, loc);
/* Check if 0 < arg, cast bool to int */
@@ -4701,7 +4828,8 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * }
if (!strcmp(name, "tex2Dbias") - || !strcmp(name, "tex2Dlod")) + || !strcmp(name, "tex2Dlod") + || !strcmp(name, "texCUBEbias")) { struct hlsl_ir_node *lod, *c;
@@ -4853,6 +4981,12 @@ static bool intrinsic_texCUBE(struct hlsl_ctx *ctx, return intrinsic_tex(ctx, params, loc, "texCUBE", HLSL_SAMPLER_DIM_CUBE); }
+static bool intrinsic_texCUBEbias(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "texCUBEbias", HLSL_SAMPLER_DIM_CUBE); +} + static bool intrinsic_texCUBEgrad(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -5184,6 +5318,23 @@ static bool intrinsic_AllMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx, | VKD3DSSF_GROUP_SHARED_MEMORY | VKD3DSSF_THREAD_GROUP, loc); }
+static bool intrinsic_ConstructGSWithSO(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *compile; + + if (params->args_count != 2 && params->args_count != 6) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to ConstructGSWithSO(): expected 2 or 6, but got %u.", params->args_count); + + if (!(compile = hlsl_new_compile(ctx, HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO, + NULL, params->args, params->args_count, params->instrs, loc))) + return false; + + hlsl_block_add_instr(params->instrs, compile); + return true; +} + static bool intrinsic_DeviceMemoryBarrier(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -5233,6 +5384,7 @@ intrinsic_functions[] = /* Note: these entries should be kept in alphabetical order. */ {"AllMemoryBarrier", 0, true, intrinsic_AllMemoryBarrier}, {"AllMemoryBarrierWithGroupSync", 0, true, intrinsic_AllMemoryBarrierWithGroupSync}, + {"ConstructGSWithSO", -1, false, intrinsic_ConstructGSWithSO}, {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, {"DeviceMemoryBarrier", 0, true, intrinsic_DeviceMemoryBarrier}, {"DeviceMemoryBarrierWithGroupSync", 0, true, intrinsic_DeviceMemoryBarrierWithGroupSync}, @@ -5263,6 +5415,7 @@ intrinsic_functions[] = {"clip", 1, true, intrinsic_clip}, {"cos", 1, true, intrinsic_cos}, {"cosh", 1, true, intrinsic_cosh}, + {"countbits", 1, true, intrinsic_countbits}, {"cross", 2, true, intrinsic_cross}, {"ddx", 1, true, intrinsic_ddx}, {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, @@ -5280,9 +5433,12 @@ intrinsic_functions[] = {"f16tof32", 1, true, intrinsic_f16tof32}, {"f32tof16", 1, true, intrinsic_f32tof16}, {"faceforward", 3, true, intrinsic_faceforward}, + {"firstbithigh", 1, true, intrinsic_firstbithigh}, + {"firstbitlow", 1, true, intrinsic_firstbitlow}, {"floor", 1, true, intrinsic_floor}, {"fmod", 2, true, intrinsic_fmod}, {"frac", 1, true, intrinsic_frac}, + {"frexp", 2, true, intrinsic_frexp}, {"fwidth", 1, true, intrinsic_fwidth}, {"isinf", 1, true, intrinsic_isinf}, {"ldexp", 2, true, intrinsic_ldexp}, @@ -5327,6 +5483,7 @@ intrinsic_functions[] = {"tex3Dgrad", 4, false, intrinsic_tex3Dgrad}, {"tex3Dproj", 2, false, intrinsic_tex3Dproj}, {"texCUBE", -1, false, intrinsic_texCUBE}, + {"texCUBEbias", 2, false, intrinsic_texCUBEbias}, {"texCUBEgrad", 4, false, intrinsic_texCUBEgrad}, {"texCUBEproj", 2, false, intrinsic_texCUBEproj}, {"transpose", 1, true, intrinsic_transpose}, @@ -5471,36 +5628,6 @@ static struct hlsl_block *add_shader_compilation(struct hlsl_ctx *ctx, const cha return make_block(ctx, compile); }
-static struct hlsl_block *add_compile_variant(struct hlsl_ctx *ctx, enum hlsl_compile_type compile_type, - struct parse_initializer *args, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *compile; - - switch (compile_type) - { - case HLSL_COMPILE_TYPE_COMPILE: - vkd3d_unreachable(); - - case HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO: - if (args->args_count != 2 && args->args_count != 6) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to ConstructGSWithSO: expected 2 or 6, but got %u.", - args->args_count); - } - break; - } - - if (!(compile = hlsl_new_compile(ctx, compile_type, NULL, args->args, args->args_count, args->instrs, loc))) - { - free_parse_initializer(args); - return NULL; - } - - free_parse_initializer(args); - return make_block(ctx, compile); -} - static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -6781,7 +6908,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_COMPILESHADER %token KW_COMPUTESHADER %token KW_CONST -%token KW_CONSTRUCTGSWITHSO %token KW_CONTINUE %token KW_DEFAULT %token KW_DEPTHSTENCILSTATE @@ -7690,15 +7816,28 @@ colon_attributes: semantic: ':' any_identifier { + static const char *centroid_suffix = "_centroid"; + uint32_t modifiers = 0; + size_t len; char *p;
if (!($$.raw_name = hlsl_strdup(ctx, $2))) YYABORT;
- for (p = $2 + strlen($2); p > $2 && isdigit(p[-1]); --p) + len = strlen($2); + if (ascii_strncasecmp($2, "sv_", 3) + && len > strlen(centroid_suffix) + && !ascii_strcasecmp($2 + (len - strlen(centroid_suffix)), centroid_suffix)) + { + modifiers = HLSL_STORAGE_CENTROID; + len -= strlen(centroid_suffix); + } + + for (p = $2 + len; p > $2 && isdigit(p[-1]); --p) ; $$.name = $2; $$.index = atoi(p); + $$.modifiers = modifiers; $$.reported_missing = false; $$.reported_duplicated_output_next_index = 0; $$.reported_duplicated_input_incompatible_next_index = 0; @@ -8059,7 +8198,7 @@ resource_format: { uint32_t modifiers = $1;
- if (!($$ = apply_type_modifiers(ctx, $2, &modifiers, false, &@1))) + if (!($$ = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) YYABORT; }
@@ -8998,6 +9137,7 @@ selection_statement: { struct hlsl_ir_node *condition = node_from_block($4); const struct parse_attribute_list *attributes = &$1; + enum hlsl_if_flatten_type flatten_type = HLSL_IF_FLATTEN_DEFAULT; unsigned int i;
check_attribute_list_for_duplicates(ctx, attributes); @@ -9006,10 +9146,19 @@ selection_statement: { const struct hlsl_attribute *attr = attributes->attrs[i];
- if (!strcmp(attr->name, "branch") - || !strcmp(attr->name, "flatten")) + if (!strcmp(attr->name, "branch")) + { + if (flatten_type == HLSL_IF_FORCE_FLATTEN) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "The 'branch' and 'flatten' attributes are mutually exclusive."); + flatten_type = HLSL_IF_FORCE_BRANCH; + } + else if (!strcmp(attr->name, "flatten")) { - hlsl_warning(ctx, &@1, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, "Unhandled attribute '%s'.", attr->name); + if (flatten_type == HLSL_IF_FORCE_BRANCH) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "The 'branch' and 'flatten' attributes are mutually exclusive."); + flatten_type = HLSL_IF_FORCE_FLATTEN; } else { @@ -9017,10 +9166,16 @@ selection_statement: } }
+ if (flatten_type == HLSL_IF_FORCE_BRANCH && hlsl_version_lt(ctx, 2, 1)) + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "The 'branch' attribute requires shader model 2.1 or higher."); + } + check_condition_type(ctx, condition);
condition = add_cast(ctx, $4, condition, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &@4); - hlsl_block_add_if(ctx, $4, condition, $6.then_block, $6.else_block, &@2); + hlsl_block_add_if(ctx, $4, condition, $6.then_block, $6.else_block, flatten_type, &@2);
destroy_block($6.then_block); destroy_block($6.else_block); @@ -9282,11 +9437,6 @@ primary_expr: vkd3d_free($3); vkd3d_free($5); } - | KW_CONSTRUCTGSWITHSO '(' func_arguments ')' - { - if (!($$ = add_compile_variant(ctx, HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO, &$3, &@1))) - YYABORT; - } | var_identifier '(' func_arguments ')' { if (!($$ = add_call(ctx, $1, &$3, &@1))) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 0b3dee4d2ce..f1368b151aa 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -175,12 +175,29 @@ static unsigned int hlsl_type_get_packed_size(const struct hlsl_type *type) } }
+static unsigned int struct_field_get_packed_offset(const struct hlsl_type *record, unsigned int field_idx) +{ + unsigned int offset = 0; + + VKD3D_ASSERT(record->class == HLSL_CLASS_STRUCT); + VKD3D_ASSERT(field_idx < record->e.record.field_count); + + for (unsigned int i = 0; i < field_idx; ++i) + { + struct hlsl_struct_field *field = &record->e.record.fields[i]; + offset = align(offset, hlsl_type_get_packed_alignment(field->type)) + hlsl_type_get_packed_size(field->type); + } + + return align(offset, hlsl_type_get_packed_alignment(record->e.record.fields[field_idx].type)); +} + + static struct hlsl_ir_node *hlsl_block_add_packed_index_offset_append(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *prev_offset, struct hlsl_ir_node *idx, struct hlsl_type *type, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *idx_offset = NULL, *c; - unsigned int field_idx, offset, size, i; + unsigned int field_idx, offset, size;
switch (type->class) { @@ -203,15 +220,7 @@ static struct hlsl_ir_node *hlsl_block_add_packed_index_offset_append(struct hls
case HLSL_CLASS_STRUCT: field_idx = hlsl_ir_constant(idx)->value.u[0].u; - for (i = 0, offset = 0; i < field_idx; ++i) - { - struct hlsl_struct_field *field = &type->e.record.fields[i]; - - offset = align(offset, hlsl_type_get_packed_alignment(field->type)) - + hlsl_type_get_packed_size(field->type); - } - - offset = align(offset, hlsl_type_get_packed_alignment(type->e.record.fields[field_idx].type)); + offset = struct_field_get_packed_offset(type, field_idx); idx_offset = hlsl_block_add_uint_constant(ctx, block, offset, loc); break;
@@ -917,20 +926,20 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, return progress; }
-typedef bool (*PFN_lower_func)(struct hlsl_ctx *, struct hlsl_ir_node *, struct hlsl_block *); +typedef struct hlsl_ir_node *(*PFN_replace_func)(struct hlsl_ctx *, struct hlsl_ir_node *, struct hlsl_block *);
-static bool call_lower_func(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static bool call_replace_func(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - PFN_lower_func func = context; + struct hlsl_ir_node *replacement; + PFN_replace_func func = context; struct hlsl_block block;
hlsl_block_init(&block); - if (func(ctx, instr, &block)) + if ((replacement = func(ctx, instr, &block))) { - struct hlsl_ir_node *replacement = LIST_ENTRY(list_tail(&block.instrs), struct hlsl_ir_node, entry); - list_move_before(&instr->entry, &block.instrs); - hlsl_replace_node(instr, replacement); + if (replacement != instr) + hlsl_replace_node(instr, replacement); return true; } else @@ -940,12 +949,20 @@ static bool call_lower_func(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, vo } }
-/* Specific form of transform_ir() for passes which convert a single instruction - * to a block of one or more instructions. This helper takes care of setting up - * the block and calling hlsl_replace_node_with_block(). */ -static bool lower_ir(struct hlsl_ctx *ctx, PFN_lower_func func, struct hlsl_block *block) +/* Specific form of transform_ir() for passes which replace a single instruction + * with another instruction. This includes passes which lower an instruction + * to one or more new instructions, and passes which fold away a redundant + * instruction. + * + * New instructions should be added to "block", and the replacement instruction + * should be returned. If the instruction should be left alone, NULL should be + * returned instead. + * + * It is legal to return the same instruction from the replace function, in + * which case replace_ir() returns true but hlsl_replace_node() is skipped. */ +static bool replace_ir(struct hlsl_ctx *ctx, PFN_replace_func func, struct hlsl_block *block) { - return hlsl_transform_ir(ctx, call_lower_func, block, func); + return hlsl_transform_ir(ctx, call_replace_func, block, func); }
static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -1047,7 +1064,7 @@ static void insert_early_return_break(struct hlsl_ctx *ctx,
hlsl_block_add_jump(ctx, &then_block, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc);
- if (!(iff = hlsl_new_if(ctx, &load->node, &then_block, NULL, &cf_instr->loc))) + if (!(iff = hlsl_new_if(ctx, &load->node, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, &cf_instr->loc))) return; list_add_after(&load->node.entry, &iff->entry); } @@ -1232,7 +1249,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun
load = hlsl_block_add_simple_load(ctx, block, func->early_return_var, &cf_instr->loc); not = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_LOGIC_NOT, load, &cf_instr->loc); - hlsl_block_add_if(ctx, block, not, &then_block, NULL, &cf_instr->loc); + hlsl_block_add_if(ctx, block, not, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, &cf_instr->loc); }
return has_early_return; @@ -1286,7 +1303,8 @@ static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct h return hlsl_block_add_simple_load(ctx, block, coords, loc); }
-static bool lower_complex_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_complex_casts(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { unsigned int src_comp_count, dst_comp_count; struct hlsl_type *src_type, *dst_type; @@ -1297,17 +1315,17 @@ static bool lower_complex_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr unsigned int dst_idx;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL;
if (hlsl_ir_expr(instr)->op != HLSL_OP1_CAST) - return false; + return NULL;
arg = hlsl_ir_expr(instr)->operands[0].node; dst_type = instr->data_type; src_type = arg->data_type;
if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR) - return false; + return NULL;
src_comp_count = hlsl_type_component_count(src_type); dst_comp_count = hlsl_type_component_count(dst_type); @@ -1323,7 +1341,7 @@ static bool lower_complex_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr }
if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, &instr->loc))) - return false; + return NULL; hlsl_init_simple_deref_from_var(&var_deref, var);
for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx) @@ -1354,8 +1372,7 @@ static bool lower_complex_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr hlsl_block_add_store_component(ctx, block, &var_deref, dst_idx, cast); }
- hlsl_block_add_simple_load(ctx, block, var, &instr->loc); - return true; + return hlsl_block_add_simple_load(ctx, block, var, &instr->loc); }
/* hlsl_ir_swizzle nodes that directly point to a matrix value are only a parse-time construct that @@ -1363,7 +1380,8 @@ static bool lower_complex_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr * an assignment or as a value made from different components of the matrix. The former cases should * have already been split into several separate assignments, but the latter are lowered by this * pass. */ -static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_matrix_swizzles(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_swizzle *swizzle; struct hlsl_deref var_deref; @@ -1372,14 +1390,14 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins unsigned int k, i;
if (instr->type != HLSL_IR_SWIZZLE) - return false; + return NULL; swizzle = hlsl_ir_swizzle(instr); matrix_type = swizzle->val.node->data_type; if (matrix_type->class != HLSL_CLASS_MATRIX) - return false; + return NULL;
if (!(var = hlsl_new_synthetic_var(ctx, "matrix-swizzle", instr->data_type, &instr->loc))) - return false; + return NULL; hlsl_init_simple_deref_from_var(&var_deref, var);
for (i = 0; i < instr->data_type->e.numeric.dimx; ++i) @@ -1392,8 +1410,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins hlsl_block_add_store_component(ctx, block, &var_deref, i, load); }
- hlsl_block_add_simple_load(ctx, block, var, &instr->loc); - return true; + return hlsl_block_add_simple_load(ctx, block, var, &instr->loc); }
/* hlsl_ir_index nodes are a parse-time construct used to represent array indexing and struct @@ -1402,7 +1419,8 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins * For the latter case, this pass takes care of lowering hlsl_ir_indexes into individual * hlsl_ir_loads, or individual hlsl_ir_resource_loads, in case the indexing is a * resource access. */ -static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_index_loads(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_deref var_deref; struct hlsl_ir_index *index; @@ -1411,7 +1429,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_ir_var *var;
if (instr->type != HLSL_IR_INDEX) - return false; + return NULL; index = hlsl_ir_index(instr); val = index->val.node;
@@ -1426,14 +1444,13 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, VKD3D_ASSERT(coords->data_type->e.numeric.dimx == dim_count);
if (!(coords = add_zero_mipmap_level(ctx, block, coords, &instr->loc))) - return false; + return NULL;
params.type = HLSL_RESOURCE_LOAD; params.resource = val; params.coords = coords; params.format = val->data_type->e.resource.format; - hlsl_block_add_resource_load(ctx, block, ¶ms, &instr->loc); - return true; + return hlsl_block_add_resource_load(ctx, block, ¶ms, &instr->loc); }
if (val->type == HLSL_IR_RESOURCE_LOAD) @@ -1450,7 +1467,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_deref row_deref;
if (!(var = hlsl_new_synthetic_var(ctx, "row", instr->data_type, &instr->loc))) - return false; + return NULL; hlsl_init_simple_deref_from_var(&row_deref, var);
for (unsigned int i = 0; i < mat->data_type->e.numeric.dimx; ++i) @@ -1479,7 +1496,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, hlsl_block_add_store_component(ctx, block, &row_deref, i, &column_load->node); }
- hlsl_block_add_simple_load(ctx, block, var, &instr->loc); + return hlsl_block_add_simple_load(ctx, block, var, &instr->loc); } else { @@ -1497,14 +1514,13 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, appended_load->node.data_type = type;
hlsl_block_add_instr(block, &appended_load->node); + return &appended_load->node; } - - return true; } }
if (!(var = hlsl_new_synthetic_var(ctx, "index-val", val->data_type, &instr->loc))) - return false; + return NULL; hlsl_init_simple_deref_from_var(&var_deref, var);
hlsl_block_add_simple_store(ctx, block, var, val); @@ -1518,7 +1534,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, VKD3D_ASSERT(!hlsl_type_is_row_major(mat->data_type));
if (!(var = hlsl_new_synthetic_var(ctx, "row", instr->data_type, &instr->loc))) - return false; + return NULL; hlsl_init_simple_deref_from_var(&row_deref, var);
for (i = 0; i < mat->data_type->e.numeric.dimx; ++i) @@ -1528,37 +1544,34 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, c = hlsl_block_add_uint_constant(ctx, block, i, &instr->loc);
if (!(load = hlsl_new_load_index(ctx, &var_deref, c, &instr->loc))) - return false; + return NULL; hlsl_block_add_instr(block, &load->node);
if (!(load = hlsl_new_load_index(ctx, &load->src, index->idx.node, &instr->loc))) - return false; + return NULL; hlsl_block_add_instr(block, &load->node);
hlsl_block_add_store_index(ctx, block, &row_deref, c, &load->node, 0, &instr->loc); }
- hlsl_block_add_simple_load(ctx, block, var, &instr->loc); + return hlsl_block_add_simple_load(ctx, block, var, &instr->loc); } - else - { - hlsl_block_add_load_index(ctx, block, &var_deref, index->idx.node, &instr->loc); - } - return true; + + return hlsl_block_add_load_index(ctx, block, &var_deref, index->idx.node, &instr->loc); }
/* Lower casts from vec1 to vecN to swizzles. */ -static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { const struct hlsl_type *src_type, *dst_type; struct hlsl_type *dst_scalar_type; struct hlsl_ir_expr *cast;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; cast = hlsl_ir_expr(instr); if (cast->op != HLSL_OP1_CAST) - return false; + return NULL; src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type;
@@ -1572,17 +1585,17 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s new_cast = hlsl_block_add_cast(ctx, block, cast->operands[0].node, dst_scalar_type, &cast->node.loc);
if (dst_type->e.numeric.dimx != 1) - hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X), + return hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X), dst_type->e.numeric.dimx, new_cast, &cast->node.loc);
- return true; + return new_cast; }
- return false; + return NULL; }
/* Lowers loads from TGSMs to resource loads. */ -static bool lower_tgsm_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_tgsm_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_resource_load_params params = {.type = HLSL_RESOURCE_LOAD}; const struct vkd3d_shader_location *loc = &instr->loc; @@ -1590,29 +1603,28 @@ static bool lower_tgsm_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s struct hlsl_deref *deref;
if (instr->type != HLSL_IR_LOAD || !hlsl_is_numeric_type(instr->data_type)) - return false; + return NULL; load = hlsl_ir_load(instr); deref = &load->src;
if (!deref->var->is_tgsm) - return false; + return NULL;
if (deref->path_len) { hlsl_fixme(ctx, &instr->loc, "Load from indexed TGSM."); - return false; + return NULL; }
params.resource = hlsl_block_add_simple_load(ctx, block, deref->var, loc); params.format = instr->data_type; params.coords = hlsl_block_add_uint_constant(ctx, block, 0, &instr->loc); - hlsl_block_add_resource_load(ctx, block, ¶ms, loc); - - return true; + return hlsl_block_add_resource_load(ctx, block, ¶ms, loc); }
/* Lowers stores to TGSMs to resource stores. */ -static bool lower_tgsm_stores(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_tgsm_stores(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_store *store; struct hlsl_ir_node *coords; @@ -1620,26 +1632,24 @@ static bool lower_tgsm_stores(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_deref *deref;
if (instr->type != HLSL_IR_STORE) - return false; + return NULL; store = hlsl_ir_store(instr); deref = &store->lhs;
if (!deref->var->is_tgsm) - return false; + return NULL;
if (deref->path_len) { hlsl_fixme(ctx, &instr->loc, "Store to indexed TGSM."); - return false; + return NULL; }
hlsl_init_simple_deref_from_var(&res_deref, deref->var); coords = hlsl_block_add_uint_constant(ctx, block, 0, &instr->loc);
- hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STORE, &res_deref, + return hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STORE, &res_deref, coords, store->rhs.node, store->writemask, &instr->loc); - - return true; }
/* Allocate a unique, ordered index to each instruction, which will be used for @@ -3558,7 +3568,8 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins return false; }
-static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static struct hlsl_ir_node *fold_redundant_casts(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { if (instr->type == HLSL_IR_EXPR) { @@ -3567,20 +3578,17 @@ static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst const struct hlsl_type *src_type;
if (expr->op != HLSL_OP1_CAST) - return false; + return NULL;
src_type = expr->operands[0].node->data_type;
if (hlsl_types_are_equal(src_type, dst_type) || (src_type->e.numeric.type == dst_type->e.numeric.type && hlsl_is_vec1(src_type) && hlsl_is_vec1(dst_type))) - { - hlsl_replace_node(&expr->node, expr->operands[0].node); - return true; - } + return expr->operands[0].node; }
- return false; + return NULL; }
/* Copy an element of a complex variable. Helper for @@ -3815,17 +3823,18 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return true; }
-static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_narrowing_casts(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { const struct hlsl_type *src_type, *dst_type; struct hlsl_type *dst_vector_type; struct hlsl_ir_expr *cast;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; cast = hlsl_ir_expr(instr); if (cast->op != HLSL_OP1_CAST) - return false; + return NULL; src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type;
@@ -3838,65 +3847,58 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins /* We need to preserve the cast since it might be doing more than just * narrowing the vector. */ new_cast = hlsl_block_add_cast(ctx, block, cast->operands[0].node, dst_vector_type, &cast->node.loc); - hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, Y, Z, W), + return hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, Y, Z, W), dst_type->e.numeric.dimx, new_cast, &cast->node.loc); - return true; }
- return false; + return NULL; }
-static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static struct hlsl_ir_node *fold_swizzle_chains(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_swizzle *swizzle; struct hlsl_ir_node *next_instr;
if (instr->type != HLSL_IR_SWIZZLE) - return false; + return NULL; swizzle = hlsl_ir_swizzle(instr);
next_instr = swizzle->val.node;
if (next_instr->type == HLSL_IR_SWIZZLE) { - struct hlsl_ir_node *new_swizzle; uint32_t combined_swizzle;
combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->u.vector, swizzle->u.vector, instr->data_type->e.numeric.dimx); next_instr = hlsl_ir_swizzle(next_instr)->val.node;
- if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, - instr->data_type->e.numeric.dimx, next_instr, &instr->loc))) - return false; - - list_add_before(&instr->entry, &new_swizzle->entry); - hlsl_replace_node(instr, new_swizzle); - return true; + return hlsl_block_add_swizzle(ctx, block, combined_swizzle, + instr->data_type->e.numeric.dimx, next_instr, &instr->loc); }
- return false; + return NULL; }
-static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static struct hlsl_ir_node *fold_trivial_swizzles(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_swizzle *swizzle; unsigned int i;
if (instr->type != HLSL_IR_SWIZZLE) - return false; + return NULL; swizzle = hlsl_ir_swizzle(instr);
if (instr->data_type->e.numeric.dimx != swizzle->val.node->data_type->e.numeric.dimx) - return false; + return NULL;
for (i = 0; i < instr->data_type->e.numeric.dimx; ++i) if (hlsl_swizzle_get_component(swizzle->u.vector, i) != i) - return false; - - hlsl_replace_node(instr, swizzle->val.node); + return NULL;
- return true; + return swizzle->val.node; }
static bool remove_trivial_conditional_branches(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -3918,122 +3920,432 @@ static bool remove_trivial_conditional_branches(struct hlsl_ctx *ctx, struct hls return true; }
-static bool normalize_switch_cases(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static bool is_conditional_block_simple(const struct hlsl_block *cond_block) { - struct hlsl_ir_switch_case *c, *def = NULL; - bool missing_terminal_break = false; - struct hlsl_ir_node *node; - struct hlsl_ir_switch *s; - - if (instr->type != HLSL_IR_SWITCH) - return false; - s = hlsl_ir_switch(instr); + static const unsigned int max_cost = 10; + struct hlsl_ir_node *instr; + unsigned int cost = 0;
- LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) + LIST_FOR_EACH_ENTRY(instr, &cond_block->instrs, struct hlsl_ir_node, entry) { - bool terminal_break = false; - - if (list_empty(&c->body.instrs)) - { - terminal_break = !!list_next(&s->cases, &c->entry); - } - else + switch (instr->type) { - node = LIST_ENTRY(list_tail(&c->body.instrs), struct hlsl_ir_node, entry); - if (node->type == HLSL_IR_JUMP) - terminal_break = (hlsl_ir_jump(node)->type == HLSL_IR_JUMP_BREAK); - } + case HLSL_IR_CONSTANT: + case HLSL_IR_STRING_CONSTANT: + case HLSL_IR_SWIZZLE: + break;
- missing_terminal_break |= !terminal_break; + case HLSL_IR_EXPR: + ++cost; + break;
- if (!terminal_break) - { - if (c->is_default) - { - hlsl_error(ctx, &c->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "The 'default' case block is not terminated with 'break' or 'return'."); - } - else + case HLSL_IR_JUMP: { - hlsl_error(ctx, &c->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Switch case block '%u' is not terminated with 'break' or 'return'.", c->value); + struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + + if (jump->type != HLSL_IR_JUMP_DISCARD_NZ && jump->type != HLSL_IR_JUMP_DISCARD_NEG) + return false; + ++cost; + break; } + + case HLSL_IR_STORE: + if (hlsl_ir_store(instr)->lhs.var->is_tgsm) + return false; + ++cost; + break; + + case HLSL_IR_LOAD: + if (hlsl_ir_load(instr)->src.var->is_tgsm) + return false; + break; + + default: + return false; } + + if (cost > max_cost) + return false; }
- if (missing_terminal_break) - return true; + return true; +}
- LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) +static bool can_flatten_conditional_block(struct hlsl_ctx *ctx, const struct hlsl_block *cond_block) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &cond_block->instrs, struct hlsl_ir_node, entry) { - if (c->is_default) + switch (instr->type) { - def = c; + case HLSL_IR_CALL: + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_INTERLOCKED: + case HLSL_IR_SYNC: + goto fail;
- /* Remove preceding empty cases. */ - while (list_prev(&s->cases, &def->entry)) + case HLSL_IR_JUMP: { - c = LIST_ENTRY(list_prev(&s->cases, &def->entry), struct hlsl_ir_switch_case, entry); - if (!list_empty(&c->body.instrs)) - break; - hlsl_free_ir_switch_case(c); - } + struct hlsl_ir_jump *jump = hlsl_ir_jump(instr);
- if (list_empty(&def->body.instrs)) - { - /* Remove following empty cases. */ - while (list_next(&s->cases, &def->entry)) + if (jump->type != HLSL_IR_JUMP_DISCARD_NZ && jump->type != HLSL_IR_JUMP_DISCARD_NEG) { - c = LIST_ENTRY(list_next(&s->cases, &def->entry), struct hlsl_ir_switch_case, entry); - if (!list_empty(&c->body.instrs)) - break; - hlsl_free_ir_switch_case(c); + hlsl_fixme(ctx, &instr->loc, "Flattening conditional blocks with non-discard jump instructions."); + return false; } + break; + }
- /* Merge with the next case. */ - if (list_next(&s->cases, &def->entry)) + case HLSL_IR_STORE: + if (hlsl_ir_store(instr)->lhs.var->is_tgsm) + goto fail; + break; + + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + if (!can_flatten_conditional_block(ctx, &iff->then_block) + || !can_flatten_conditional_block(ctx, &iff->else_block)) + return false; + break; + } + + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + + if (!can_flatten_conditional_block(ctx, &loop->iter) + || !can_flatten_conditional_block(ctx, &loop->body)) + return false; + break; + } + + case HLSL_IR_SWITCH: + { + struct hlsl_ir_switch *s = hlsl_ir_switch(instr); + struct hlsl_ir_switch_case *c; + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) { - c = LIST_ENTRY(list_next(&s->cases, &def->entry), struct hlsl_ir_switch_case, entry); - c->is_default = true; - hlsl_free_ir_switch_case(def); - def = c; + if (!can_flatten_conditional_block(ctx, &c->body)) + return false; } + break; }
- break; + case HLSL_IR_CONSTANT: + case HLSL_IR_EXPR: + case HLSL_IR_INDEX: + case HLSL_IR_LOAD: + case HLSL_IR_RESOURCE_LOAD: + case HLSL_IR_STRING_CONSTANT: + case HLSL_IR_SWIZZLE: + case HLSL_IR_COMPILE: + case HLSL_IR_SAMPLER_STATE: + case HLSL_IR_STATEBLOCK_CONSTANT: + break; } }
- if (def) - { - list_remove(&def->entry); - } - else - { - if (!(def = hlsl_new_switch_case(ctx, 0, true, NULL, &s->node.loc))) - return true; - hlsl_block_add_jump(ctx, &def->body, HLSL_IR_JUMP_BREAK, NULL, &s->node.loc); - } - list_add_tail(&s->cases, &def->entry); - return true; + +fail: + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_CANNOT_FLATTEN, + "Conditional branches with side effects cannot be flattened."); + return false; }
-static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static bool lower_conditional_block_stores(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_ir_node *cond, bool is_then) { - struct hlsl_ir_node *idx; - struct hlsl_deref *deref; + struct hlsl_ir_node *load, *new_val; + struct hlsl_ir_store *store; + struct hlsl_type *rhs_type; + struct hlsl_block block; + + if (instr->type != HLSL_IR_STORE) + return false; + store = hlsl_ir_store(instr); + rhs_type = store->rhs.node->data_type; + + VKD3D_ASSERT(rhs_type->class <= HLSL_CLASS_VECTOR); + VKD3D_ASSERT(cond->data_type->e.numeric.dimx == 1); + + hlsl_block_init(&block); + + load = hlsl_block_add_load_index(ctx, &block, &store->lhs, NULL, &store->node.loc); + + if (store->writemask && !hlsl_types_are_equal(rhs_type, load->data_type)) + load = hlsl_block_add_swizzle(ctx, &block, hlsl_swizzle_from_writemask(store->writemask), + rhs_type->e.numeric.dimx, load, &store->node.loc); + + if (rhs_type->e.numeric.dimx != 1) + cond = hlsl_block_add_swizzle(ctx, &block, HLSL_SWIZZLE(X, X, X, X), + rhs_type->e.numeric.dimx, cond, &store->node.loc); + + if (is_then) + new_val = hlsl_add_conditional(ctx, &block, cond, store->rhs.node, load); + else + new_val = hlsl_add_conditional(ctx, &block, cond, load, store->rhs.node); + + list_move_before(&store->node.entry, &block.instrs); + hlsl_src_remove(&store->rhs); + hlsl_src_from_node(&store->rhs, new_val); + return true; +} + +static bool lower_conditional_block_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_ir_node *cond, bool is_then) +{ + struct hlsl_ir_node *discard_cond, *new_cond = NULL; + struct hlsl_ir_jump *jump; + struct hlsl_block block; + + if (instr->type != HLSL_IR_JUMP) + return false; + jump = hlsl_ir_jump(instr); + discard_cond = jump->condition.node; + + if (jump->type != HLSL_IR_JUMP_DISCARD_NZ) + return false; + + VKD3D_ASSERT(ctx->profile->major_version >= 4); + VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL && cond->data_type->e.numeric.dimx == 1); + VKD3D_ASSERT(discard_cond->data_type->e.numeric.dimx == 1); + + hlsl_block_init(&block); + + if (!is_then) + cond = hlsl_block_add_unary_expr(ctx, &block, HLSL_OP1_LOGIC_NOT, cond, &instr->loc); + discard_cond = hlsl_block_add_cast(ctx, &block, discard_cond, cond->data_type, &instr->loc); + + /* discard_nz (cond && discard_cond) */ + new_cond = hlsl_block_add_binary_expr(ctx, &block, HLSL_OP2_LOGIC_AND, cond, discard_cond); + + list_move_before(&jump->node.entry, &block.instrs); + hlsl_src_remove(&jump->condition); + hlsl_src_from_node(&jump->condition, new_cond); + return true; +} + +static bool lower_conditional_block_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_ir_node *cond, bool is_then) +{ + struct hlsl_ir_node *discard_cond, *new_cond = NULL; + struct hlsl_constant_value zero_value = {0}; + struct hlsl_ir_node *zero; + struct hlsl_ir_jump *jump; + struct hlsl_block block; + + if (instr->type != HLSL_IR_JUMP) + return false; + jump = hlsl_ir_jump(instr); + discard_cond = jump->condition.node; + + if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) + return false; + + VKD3D_ASSERT(ctx->profile->major_version < 4); + VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL && cond->data_type->e.numeric.dimx == 1); + + hlsl_block_init(&block); + + if (!(zero = hlsl_new_constant(ctx, discard_cond->data_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(&block, zero); + + if (zero->data_type->e.numeric.dimx != 1) + cond = hlsl_block_add_swizzle(ctx, &block, HLSL_SWIZZLE(X, X, X, X), + zero->data_type->e.numeric.dimx, cond, &instr->loc); + + if (is_then) + new_cond = hlsl_add_conditional(ctx, &block, cond, discard_cond, zero); + else + new_cond = hlsl_add_conditional(ctx, &block, cond, zero, discard_cond); + + list_move_before(&jump->node.entry, &block.instrs); + hlsl_src_remove(&jump->condition); + hlsl_src_from_node(&jump->condition, new_cond); + return true; +} + +struct flatten_conditional_block_ctx +{ + struct hlsl_ir_node *cond; + bool is_then; +}; + +static bool lower_conditional_block_instrs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct flatten_conditional_block_ctx *flatten_ctx = context; + + return lower_conditional_block_stores(ctx, instr, flatten_ctx->cond, flatten_ctx->is_then) + || lower_conditional_block_discard_nz(ctx, instr, flatten_ctx->cond, flatten_ctx->is_then) + || lower_conditional_block_discard_neg(ctx, instr, flatten_ctx->cond, flatten_ctx->is_then); +} + +static bool flatten_conditional_branches(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct flatten_conditional_block_ctx flatten_ctx; + struct hlsl_ir_if *iff; + bool force_flatten; + + if (instr->type != HLSL_IR_IF) + return false; + iff = hlsl_ir_if(instr); + + if (iff->flatten_type == HLSL_IF_FORCE_BRANCH) + return false; + + force_flatten = iff->flatten_type == HLSL_IF_FORCE_FLATTEN + || hlsl_version_lt(ctx, 2, 1); /* Always flatten branches for SM < 2.1. */ + + if (force_flatten) + { + if (!can_flatten_conditional_block(ctx, &iff->then_block) + || !can_flatten_conditional_block(ctx, &iff->else_block)) + return false; + } + else if (!is_conditional_block_simple(&iff->then_block) || !is_conditional_block_simple(&iff->else_block)) + { + /* Only flatten simple blocks by default. */ + return false; + } + + flatten_ctx.cond = iff->condition.node; + + flatten_ctx.is_then = true; + hlsl_transform_ir(ctx, lower_conditional_block_instrs, &iff->then_block, &flatten_ctx); + + flatten_ctx.is_then = false; + hlsl_transform_ir(ctx, lower_conditional_block_instrs, &iff->else_block, &flatten_ctx); + + list_move_before(&instr->entry, &iff->then_block.instrs); + list_move_before(&instr->entry, &iff->else_block.instrs); + list_remove(&instr->entry); + hlsl_free_instr(instr); + return true; +} + +static bool normalize_switch_cases(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_switch_case *c, *def = NULL; + bool missing_terminal_break = false; + struct hlsl_ir_node *node; + struct hlsl_ir_switch *s; + + if (instr->type != HLSL_IR_SWITCH) + return false; + s = hlsl_ir_switch(instr); + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) + { + bool terminal_break = false; + + if (list_empty(&c->body.instrs)) + { + terminal_break = !!list_next(&s->cases, &c->entry); + } + else + { + node = LIST_ENTRY(list_tail(&c->body.instrs), struct hlsl_ir_node, entry); + if (node->type == HLSL_IR_JUMP) + terminal_break = (hlsl_ir_jump(node)->type == HLSL_IR_JUMP_BREAK); + } + + missing_terminal_break |= !terminal_break; + + if (!terminal_break) + { + if (c->is_default) + { + hlsl_error(ctx, &c->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "The 'default' case block is not terminated with 'break' or 'return'."); + } + else + { + hlsl_error(ctx, &c->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Switch case block '%u' is not terminated with 'break' or 'return'.", c->value); + } + } + } + + if (missing_terminal_break) + return true; + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) + { + if (c->is_default) + { + def = c; + + /* Remove preceding empty cases. */ + while (list_prev(&s->cases, &def->entry)) + { + c = LIST_ENTRY(list_prev(&s->cases, &def->entry), struct hlsl_ir_switch_case, entry); + if (!list_empty(&c->body.instrs)) + break; + hlsl_free_ir_switch_case(c); + } + + if (list_empty(&def->body.instrs)) + { + /* Remove following empty cases. */ + while (list_next(&s->cases, &def->entry)) + { + c = LIST_ENTRY(list_next(&s->cases, &def->entry), struct hlsl_ir_switch_case, entry); + if (!list_empty(&c->body.instrs)) + break; + hlsl_free_ir_switch_case(c); + } + + /* Merge with the next case. */ + if (list_next(&s->cases, &def->entry)) + { + c = LIST_ENTRY(list_next(&s->cases, &def->entry), struct hlsl_ir_switch_case, entry); + c->is_default = true; + hlsl_free_ir_switch_case(def); + def = c; + } + } + + break; + } + } + + if (def) + { + list_remove(&def->entry); + } + else + { + if (!(def = hlsl_new_switch_case(ctx, 0, true, NULL, &s->node.loc))) + return true; + hlsl_block_add_jump(ctx, &def->body, HLSL_IR_JUMP_BREAK, NULL, &s->node.loc); + } + list_add_tail(&s->cases, &def->entry); + + return true; +} + +static struct hlsl_ir_node *lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_node *idx; + struct hlsl_deref *deref; struct hlsl_type *type; unsigned int i;
if (instr->type != HLSL_IR_LOAD) - return false; + return NULL;
deref = &hlsl_ir_load(instr)->src; VKD3D_ASSERT(deref->var);
if (deref->path_len == 0) - return false; + return NULL;
type = deref->var->data_type; for (i = 0; i < deref->path_len - 1; ++i) @@ -4050,7 +4362,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir enum hlsl_ir_expr_op op;
if (!(vector_load = hlsl_new_load_parent(ctx, deref, &instr->loc))) - return false; + return NULL; hlsl_block_add_instr(block, &vector_load->node);
swizzle = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X), width, idx, &instr->loc); @@ -4059,9 +4371,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir value.u[1].u = 1; value.u[2].u = 2; value.u[3].u = 3; - if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, width), &value, &instr->loc))) - return false; - hlsl_block_add_instr(block, c); + c = hlsl_block_add_constant(ctx, block, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, width), &value, &instr->loc);
operands[0] = swizzle; operands[1] = c; @@ -4077,14 +4387,14 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir * LOGIC_OR + LOGIC_AND. */ operands[0] = &vector_load->node; operands[1] = eq; - hlsl_block_add_expr(ctx, block, op, operands, instr->data_type, &instr->loc); - return true; + return hlsl_block_add_expr(ctx, block, op, operands, instr->data_type, &instr->loc); }
- return false; + return NULL; }
-static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *idx; struct hlsl_deref *deref; @@ -4092,13 +4402,13 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc unsigned int i;
if (instr->type != HLSL_IR_STORE) - return false; + return NULL;
deref = &hlsl_ir_store(instr)->lhs; VKD3D_ASSERT(deref->var);
if (deref->path_len == 0) - return false; + return NULL;
type = deref->var->data_type; for (i = 0; i < deref->path_len - 1; ++i) @@ -4113,7 +4423,7 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc hlsl_fixme(ctx, &instr->loc, "Non-constant vector addressing on store. Unrolling may be missing."); }
- return false; + return NULL; }
static bool deref_supports_sm1_indirect_addressing(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) @@ -4127,8 +4437,8 @@ static bool deref_supports_sm1_indirect_addressing(struct hlsl_ctx *ctx, const s * This is achieved through a synthetic variable. The non-constant index is compared for equality * with every possible value it can have within the array bounds, and the ternary operator is used * to update the value of the synthetic var when the equality check passes. */ -static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - struct hlsl_block *block) +static struct hlsl_ir_node *lower_nonconstant_array_loads(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_constant_value zero_value = {0}; struct hlsl_ir_node *cut_index, *zero; @@ -4140,15 +4450,15 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n bool row_major;
if (instr->type != HLSL_IR_LOAD) - return false; + return NULL; load = hlsl_ir_load(instr); deref = &load->src;
if (deref->path_len == 0) - return false; + return NULL;
if (deref_supports_sm1_indirect_addressing(ctx, deref)) - return false; + return NULL;
for (i = deref->path_len - 1; ; --i) { @@ -4159,7 +4469,7 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n }
if (i == 0) - return false; + return NULL; }
cut_index = deref->path[i_cut].node; @@ -4171,12 +4481,9 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n VKD3D_ASSERT(cut_type->class == HLSL_CLASS_ARRAY || row_major);
if (!(var = hlsl_new_synthetic_var(ctx, row_major ? "row_major-load" : "array-load", instr->data_type, &instr->loc))) - return false; - - if (!(zero = hlsl_new_constant(ctx, instr->data_type, &zero_value, &instr->loc))) - return false; - hlsl_block_add_instr(block, zero); + return NULL;
+ zero = hlsl_block_add_constant(ctx, block, instr->data_type, &zero_value, &instr->loc); hlsl_block_add_simple_store(ctx, block, var, zero);
TRACE("Lowering non-constant %s load on variable '%s'.\n", row_major ? "row_major" : "array", deref->var->name); @@ -4200,7 +4507,7 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n var_load = hlsl_block_add_simple_load(ctx, block, var, &cut_index->loc);
if (!hlsl_copy_deref(ctx, &deref_copy, deref)) - return false; + return NULL; hlsl_src_remove(&deref_copy.path[i_cut]); hlsl_src_from_node(&deref_copy.path[i_cut], const_i); specific_load = hlsl_block_add_load_index(ctx, block, &deref_copy, NULL, &cut_index->loc); @@ -4214,8 +4521,7 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n hlsl_block_add_simple_store(ctx, block, var, ternary); }
- hlsl_block_add_simple_load(ctx, block, var, &instr->loc); - return true; + return hlsl_block_add_simple_load(ctx, block, var, &instr->loc); }
static struct hlsl_type *clone_texture_array_as_combined_sampler_array(struct hlsl_ctx *ctx, struct hlsl_type *type) @@ -4486,31 +4792,30 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) }
/* Turn CAST to int or uint into TRUNC + REINTERPRET */ -static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_casts_to_int(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; struct hlsl_ir_node *arg, *trunc; struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr); if (expr->op != HLSL_OP1_CAST) - return false; + return NULL;
arg = expr->operands[0].node; if (!hlsl_type_is_integer(instr->data_type) || instr->data_type->e.numeric.type == HLSL_TYPE_BOOL) - return false; + return NULL; if (!hlsl_type_is_floating_point(arg->data_type)) - return false; + return NULL;
trunc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_TRUNC, arg, &instr->loc);
memset(operands, 0, sizeof(operands)); operands[0] = trunc; - hlsl_block_add_expr(ctx, block, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc); - - return true; + return hlsl_block_add_expr(ctx, block, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc); }
/* Turn TRUNC into: @@ -4524,16 +4829,16 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, * where the comparisons in the extra term are performed using CMP or SLT * depending on whether this is a pixel or vertex shader, respectively. */ -static bool lower_trunc(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_trunc(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *arg, *res; struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr); if (expr->op != HLSL_OP1_TRUNC) - return false; + return NULL;
arg = expr->operands[0].node;
@@ -4543,31 +4848,27 @@ static bool lower_trunc(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct struct hlsl_constant_value zero_value, one_value;
memset(&zero_value, 0, sizeof(zero_value)); - if (!(zero = hlsl_new_constant(ctx, arg->data_type, &zero_value, &instr->loc))) - return false; - hlsl_block_add_instr(block, zero); + zero = hlsl_block_add_constant(ctx, block, arg->data_type, &zero_value, &instr->loc);
one_value.u[0].f = 1.0; one_value.u[1].f = 1.0; one_value.u[2].f = 1.0; one_value.u[3].f = 1.0; - if (!(one = hlsl_new_constant(ctx, arg->data_type, &one_value, &instr->loc))) - return false; - hlsl_block_add_instr(block, one); + one = hlsl_block_add_constant(ctx, block, arg->data_type, &one_value, &instr->loc);
fract = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, arg, &instr->loc); neg_fract = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, fract, &instr->loc);
if (!(has_fract = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, neg_fract, zero, one))) - return false; + return NULL; hlsl_block_add_instr(block, has_fract);
if (!(extra = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, arg, zero, has_fract))) - return false; + return NULL; hlsl_block_add_instr(block, extra);
floor = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, arg, neg_fract); - res = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, floor, extra); + return hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, floor, extra); } else { @@ -4581,11 +4882,10 @@ static bool lower_trunc(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct floor = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, arg, neg_fract);
if (!(res = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, is_neg, has_fract, floor))) - return false; + return NULL; hlsl_block_add_instr(block, res); + return res; } - - return true; }
/* Lower modulus using: @@ -4593,7 +4893,8 @@ static bool lower_trunc(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct * mod(x, y) = x - trunc(x / y) * y; * */ -static bool lower_int_modulus_sm1(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_int_modulus_sm1(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *div, *trunc, *mul, *neg, *operands[2], *ret; struct hlsl_type *float_type; @@ -4601,15 +4902,15 @@ static bool lower_int_modulus_sm1(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins bool is_float;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr); if (expr->op != HLSL_OP2_MOD) - return false; + return NULL;
is_float = instr->data_type->e.numeric.type == HLSL_TYPE_FLOAT || instr->data_type->e.numeric.type == HLSL_TYPE_HALF; if (is_float) - return false; + return NULL; float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx);
for (unsigned int i = 0; i < 2; ++i) @@ -4622,13 +4923,11 @@ static bool lower_int_modulus_sm1(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins mul = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, trunc, operands[1]); neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, mul, &instr->loc); ret = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, operands[0], neg); - hlsl_block_add_cast(ctx, block, ret, instr->data_type, &instr->loc); - - return true; + return hlsl_block_add_cast(ctx, block, ret, instr->data_type, &instr->loc); }
/* Lower DIV to RCP + MUL. */ -static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *rcp, *ret, *operands[2]; struct hlsl_type *float_type; @@ -4636,10 +4935,10 @@ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, str bool is_float;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr); if (expr->op != HLSL_OP2_DIV) - return false; + return NULL;
is_float = instr->data_type->e.numeric.type == HLSL_TYPE_FLOAT || instr->data_type->e.numeric.type == HLSL_TYPE_HALF; @@ -4656,42 +4955,40 @@ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, str ret = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, operands[0], rcp); if (!is_float) ret = hlsl_block_add_cast(ctx, block, ret, instr->data_type, &instr->loc); - - return true; + return ret; }
/* Lower SQRT to RSQ + RCP. */ -static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_expr *expr; struct hlsl_ir_node *rsq;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr); if (expr->op != HLSL_OP1_SQRT) - return false; + return NULL;
rsq = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_RSQ, expr->operands[0].node, &instr->loc); - hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_RCP, rsq, &instr->loc); - return true; + return hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_RCP, rsq, &instr->loc); }
/* Lower DP2 to MUL + ADD */ -static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *arg1, *arg2, *mul, *add_x, *add_y; struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr); arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_DOT) - return false; + return NULL; if (arg1->data_type->e.numeric.dimx != 2) - return false; + return NULL;
if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) { @@ -4701,7 +4998,7 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h operands[1] = arg2; operands[2] = hlsl_block_add_float_constant(ctx, block, 0.0f, &expr->node.loc);
- hlsl_block_add_expr(ctx, block, HLSL_OP3_DP2ADD, operands, instr->data_type, &expr->node.loc); + return hlsl_block_add_expr(ctx, block, HLSL_OP3_DP2ADD, operands, instr->data_type, &expr->node.loc); } else { @@ -4711,32 +5008,29 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h instr->data_type->e.numeric.dimx, mul, &expr->node.loc); add_y = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(Y, Y, Y, Y), instr->data_type->e.numeric.dimx, mul, &expr->node.loc); - hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, add_x, add_y); + return hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, add_x, add_y); } - - return true; }
/* Lower ABS to MAX */ -static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *arg, *neg; struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr); arg = expr->operands[0].node; if (expr->op != HLSL_OP1_ABS) - return false; + return NULL;
neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, arg, &instr->loc); - hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MAX, neg, arg); - return true; + return hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MAX, neg, arg); }
/* Lower ROUND using FRC, ROUND(x) -> ((x + 0.5) - FRC(x + 0.5)). */ -static bool lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *arg, *neg, *sum, *frc, *half; struct hlsl_type *type = instr->data_type; @@ -4745,69 +5039,64 @@ static bool lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL;
expr = hlsl_ir_expr(instr); arg = expr->operands[0].node; if (expr->op != HLSL_OP1_ROUND) - return false; + return NULL;
component_count = hlsl_type_component_count(type); for (i = 0; i < component_count; ++i) half_value.u[i].f = 0.5f; - if (!(half = hlsl_new_constant(ctx, type, &half_value, &expr->node.loc))) - return false; - hlsl_block_add_instr(block, half); + half = hlsl_block_add_constant(ctx, block, type, &half_value, &expr->node.loc);
sum = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, arg, half); frc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, sum, &instr->loc); neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, frc, &instr->loc); - hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, sum, neg); - return true; + return hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, sum, neg); }
/* Lower CEIL to FRC */ -static bool lower_ceil(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_ceil(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *arg, *neg, *frc; struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL;
expr = hlsl_ir_expr(instr); arg = expr->operands[0].node; if (expr->op != HLSL_OP1_CEIL) - return false; + return NULL;
neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, arg, &instr->loc); frc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, neg, &instr->loc); - hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, frc, arg); - return true; + return hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, frc, arg); }
/* Lower FLOOR to FRC */ -static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *arg, *neg, *frc; struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL;
expr = hlsl_ir_expr(instr); arg = expr->operands[0].node; if (expr->op != HLSL_OP1_FLOOR) - return false; + return NULL;
frc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, arg, &instr->loc); neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, frc, &instr->loc); - hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, neg, arg); - return true; + return hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, neg, arg); }
/* Lower SIN/COS to SINCOS for SM1. */ -static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *arg, *half, *two_pi, *reciprocal_two_pi, *neg_pi; struct hlsl_constant_value half_value, two_pi_value, reciprocal_two_pi_value, neg_pi_value; @@ -4819,7 +5108,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct int i;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr);
if (expr->op == HLSL_OP1_SIN) @@ -4827,7 +5116,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct else if (expr->op == HLSL_OP1_COS) op = HLSL_OP1_COS_REDUCED; else - return false; + return NULL;
arg = expr->operands[0].node; type = arg->data_type; @@ -4841,27 +5130,22 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct neg_pi_value.u[i].f = -M_PI; }
- if (!(half = hlsl_new_constant(ctx, type, &half_value, &instr->loc)) - || !(two_pi = hlsl_new_constant(ctx, type, &two_pi_value, &instr->loc)) - || !(reciprocal_two_pi = hlsl_new_constant(ctx, type, &reciprocal_two_pi_value, &instr->loc)) - || !(neg_pi = hlsl_new_constant(ctx, type, &neg_pi_value, &instr->loc))) - return false; - hlsl_block_add_instr(block, half); - hlsl_block_add_instr(block, two_pi); - hlsl_block_add_instr(block, reciprocal_two_pi); - hlsl_block_add_instr(block, neg_pi); + half = hlsl_block_add_constant(ctx, block, type, &half_value, &instr->loc); + two_pi = hlsl_block_add_constant(ctx, block, type, &two_pi_value, &instr->loc); + reciprocal_two_pi = hlsl_block_add_constant(ctx, block, type, &reciprocal_two_pi_value, &instr->loc); + neg_pi = hlsl_block_add_constant(ctx, block, type, &neg_pi_value, &instr->loc);
if (!(mad = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, arg, reciprocal_two_pi, half))) - return false; + return NULL; hlsl_block_add_instr(block, mad); frc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, mad, &instr->loc); if (!(reduced = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, frc, two_pi, neg_pi))) - return false; + return NULL; hlsl_block_add_instr(block, reduced);
if (type->e.numeric.dimx == 1) { - sincos = hlsl_block_add_unary_expr(ctx, block, op, reduced, &instr->loc); + return hlsl_block_add_unary_expr(ctx, block, op, reduced, &instr->loc); } else { @@ -4877,7 +5161,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct }
if (!(var = hlsl_new_synthetic_var(ctx, "sincos", type, &instr->loc))) - return false; + return NULL; hlsl_init_simple_deref_from_var(&var_deref, var);
for (i = 0; i < type->e.numeric.dimx; ++i) @@ -4886,13 +5170,11 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block_add_store_component(ctx, block, &var_deref, i, sincos); }
- hlsl_block_add_load_index(ctx, block, &var_deref, NULL, &instr->loc); + return hlsl_block_add_load_index(ctx, block, &var_deref, NULL, &instr->loc); } - - return true; }
-static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *arg, *arg_cast, *neg, *one, *sub; struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; @@ -4901,10 +5183,10 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr); if (expr->op != HLSL_OP1_LOGIC_NOT) - return false; + return NULL;
arg = expr->operands[0].node; float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->e.numeric.dimx); @@ -4920,20 +5202,16 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st one_value.u[1].f = 1.0; one_value.u[2].f = 1.0; one_value.u[3].f = 1.0; - if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) - return false; - hlsl_block_add_instr(block, one); - + one = hlsl_block_add_constant(ctx, block, float_type, &one_value, &instr->loc); sub = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, one, neg);
memset(operands, 0, sizeof(operands)); operands[0] = sub; - hlsl_block_add_expr(ctx, block, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc); - return true; + return hlsl_block_add_expr(ctx, block, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc); }
/* Lower TERNARY to CMP for SM1. */ -static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *cond, *first, *second, *float_cond, *neg; struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; @@ -4941,11 +5219,11 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru struct hlsl_type *type;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL;
expr = hlsl_ir_expr(instr); if (expr->op != HLSL_OP3_TERNARY) - return false; + return NULL;
cond = expr->operands[0].node; first = expr->operands[1].node; @@ -4954,7 +5232,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru if (cond->data_type->class > HLSL_CLASS_VECTOR || instr->data_type->class > HLSL_CLASS_VECTOR) { hlsl_fixme(ctx, &instr->loc, "Lower ternary of type other than scalar or vector."); - return false; + return NULL; }
VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); @@ -4968,56 +5246,43 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru operands[0] = neg; operands[1] = second; operands[2] = first; - hlsl_block_add_expr(ctx, block, HLSL_OP3_CMP, operands, first->data_type, &instr->loc); - return true; + return hlsl_block_add_expr(ctx, block, HLSL_OP3_CMP, operands, first->data_type, &instr->loc); }
-static bool lower_resource_load_bias(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static struct hlsl_ir_node *lower_resource_load_bias(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { - struct hlsl_ir_node *swizzle, *store; + struct hlsl_ir_node *swizzle, *tmp_load; struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *tmp_load; struct hlsl_ir_var *tmp_var; - struct hlsl_deref deref;
if (instr->type != HLSL_IR_RESOURCE_LOAD) - return false; + return NULL; load = hlsl_ir_resource_load(instr); if (load->load_type != HLSL_RESOURCE_SAMPLE_LOD && load->load_type != HLSL_RESOURCE_SAMPLE_LOD_BIAS) - return false; + return NULL;
if (!load->lod.node) - return false; + return NULL;
if (!(tmp_var = hlsl_new_synthetic_var(ctx, "coords-with-lod", hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), &instr->loc))) - return false; - - if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), 4, load->lod.node, &load->lod.node->loc))) - return false; - list_add_before(&instr->entry, &swizzle->entry); + return NULL;
- if (!(store = hlsl_new_simple_store(ctx, tmp_var, swizzle))) - return false; - list_add_before(&instr->entry, &store->entry); + swizzle = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X), 4, load->lod.node, &load->lod.node->loc); + hlsl_block_add_simple_store(ctx, block, tmp_var, swizzle); + hlsl_block_add_simple_store(ctx, block, tmp_var, load->coords.node);
- hlsl_init_simple_deref_from_var(&deref, tmp_var); - if (!(store = hlsl_new_store_index(ctx, &deref, NULL, load->coords.node, 0, &instr->loc))) - return false; - list_add_before(&instr->entry, &store->entry); - - if (!(tmp_load = hlsl_new_var_load(ctx, tmp_var, &instr->loc))) - return false; - list_add_before(&instr->entry, &tmp_load->node.entry); + tmp_load = hlsl_block_add_simple_load(ctx, block, tmp_var, &instr->loc);
hlsl_src_remove(&load->coords); - hlsl_src_from_node(&load->coords, &tmp_load->node); + hlsl_src_from_node(&load->coords, tmp_load); hlsl_src_remove(&load->lod); - return true; + return &load->node; }
-static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +static struct hlsl_ir_node *lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *arg1, *arg1_cast, *arg2, *arg2_cast, *slt, *res; @@ -5027,11 +5292,10 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node bool negate = false;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr); - if (expr->op != HLSL_OP2_EQUAL && expr->op != HLSL_OP2_NEQUAL && expr->op != HLSL_OP2_LESS - && expr->op != HLSL_OP2_GEQUAL) - return false; + if (!hlsl_is_comparison_op(expr->op)) + return NULL;
arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; @@ -5087,10 +5351,7 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node one_value.u[1].f = 1.0; one_value.u[2].f = 1.0; one_value.u[3].f = 1.0; - if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) - return false; - hlsl_block_add_instr(block, one); - + one = hlsl_block_add_constant(ctx, block, float_type, &one_value, &instr->loc); slt_neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, slt, &instr->loc); res = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, one, slt_neg); } @@ -5103,8 +5364,7 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node * and casts to BOOL have already been lowered to "!= 0". */ memset(operands, 0, sizeof(operands)); operands[0] = res; - hlsl_block_add_expr(ctx, block, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc); - return true; + return hlsl_block_add_expr(ctx, block, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc); }
/* Intended to be used for SM1-SM3, lowers SLT instructions (only available in vertex shaders) to @@ -5115,7 +5375,7 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node * = ((x - y) >= 0) ? 0.0 : 1.0 * = CMP(x - y, 0.0, 1.0) */ -static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *arg1, *arg2, *arg1_cast, *arg2_cast, *neg, *sub, *zero, *one, *cmp; struct hlsl_constant_value zero_value, one_value; @@ -5123,10 +5383,10 @@ static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr); if (expr->op != HLSL_OP2_SLT) - return false; + return NULL;
arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; @@ -5138,23 +5398,18 @@ static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h sub = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, arg1_cast, neg);
memset(&zero_value, 0, sizeof(zero_value)); - if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) - return false; - hlsl_block_add_instr(block, zero); + zero = hlsl_block_add_constant(ctx, block, float_type, &zero_value, &instr->loc);
one_value.u[0].f = 1.0; one_value.u[1].f = 1.0; one_value.u[2].f = 1.0; one_value.u[3].f = 1.0; - if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) - return false; - hlsl_block_add_instr(block, one); + one = hlsl_block_add_constant(ctx, block, float_type, &one_value, &instr->loc);
if (!(cmp = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, sub, zero, one))) - return false; + return NULL; hlsl_block_add_instr(block, cmp); - - return true; + return cmp; }
/* Intended to be used for SM1-SM3, lowers CMP instructions (only available in pixel shaders) to @@ -5165,7 +5420,7 @@ static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h * = z * ((x < 0) ? 1.0 : 0.0) + y * ((x < 0) ? 0.0 : 1.0) * = z * SLT(x, 0.0) + y * (1 - SLT(x, 0.0)) */ -static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *args[3], *args_cast[3], *slt, *neg_slt, *sub, *zero, *one, *mul1, *mul2; struct hlsl_constant_value zero_value, one_value; @@ -5174,10 +5429,10 @@ static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h unsigned int i;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr); if (expr->op != HLSL_OP3_CMP) - return false; + return NULL;
float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx);
@@ -5188,28 +5443,24 @@ static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h }
memset(&zero_value, 0, sizeof(zero_value)); - if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) - return false; - hlsl_block_add_instr(block, zero); + zero = hlsl_block_add_constant(ctx, block, float_type, &zero_value, &instr->loc);
one_value.u[0].f = 1.0; one_value.u[1].f = 1.0; one_value.u[2].f = 1.0; one_value.u[3].f = 1.0; - if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) - return false; - hlsl_block_add_instr(block, one); + one = hlsl_block_add_constant(ctx, block, float_type, &one_value, &instr->loc);
slt = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_SLT, args_cast[0], zero); mul1 = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, args_cast[2], slt); neg_slt = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, slt, &instr->loc); sub = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, one, neg_slt); mul2 = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, args_cast[1], sub); - hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, mul1, mul2); - return true; + return hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, mul1, mul2); }
-static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_casts_to_bool(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_type *type = instr->data_type, *arg_type; static const struct hlsl_constant_value zero_value; @@ -5217,37 +5468,36 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr); if (expr->op != HLSL_OP1_CAST) - return false; + return NULL; arg_type = expr->operands[0].node->data_type; if (type->class > HLSL_CLASS_VECTOR || arg_type->class > HLSL_CLASS_VECTOR) - return false; + return NULL; if (type->e.numeric.type != HLSL_TYPE_BOOL) - return false; + return NULL;
/* Narrowing casts should have already been lowered. */ VKD3D_ASSERT(type->e.numeric.dimx == arg_type->e.numeric.dimx);
- zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc); - if (!zero) - return false; - hlsl_block_add_instr(block, zero); - + zero = hlsl_block_add_constant(ctx, block, arg_type, &zero_value, &instr->loc); neq = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_NEQUAL, expr->operands[0].node, zero); neq->data_type = expr->node.data_type; - - return true; + return neq; }
struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) { + struct hlsl_type *false_type = if_false->data_type; struct hlsl_type *cond_type = condition->data_type; struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; + struct hlsl_type *true_type = if_true->data_type;
- VKD3D_ASSERT(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); + VKD3D_ASSERT(hlsl_types_are_equal(true_type, false_type) + || (hlsl_is_vec1(true_type) && hlsl_is_vec1(false_type) + && true_type->e.numeric.type == false_type->e.numeric.type));
if (cond_type->e.numeric.type != HLSL_TYPE_BOOL) { @@ -5259,10 +5509,11 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc operands[0] = condition; operands[1] = if_true; operands[2] = if_false; - return hlsl_block_add_expr(ctx, instrs, HLSL_OP3_TERNARY, operands, if_true->data_type, &condition->loc); + return hlsl_block_add_expr(ctx, instrs, HLSL_OP3_TERNARY, operands, true_type, &condition->loc); }
-static bool lower_int_division_sm4(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_int_division_sm4(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; struct hlsl_type *type = instr->data_type, *utype; @@ -5271,26 +5522,23 @@ static bool lower_int_division_sm4(struct hlsl_ctx *ctx, struct hlsl_ir_node *in unsigned int i;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr); arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_DIV) - return false; + return NULL; if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; + return NULL; if (type->e.numeric.type != HLSL_TYPE_INT) - return false; + return NULL; utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy);
xor = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_BIT_XOR, arg1, arg2);
for (i = 0; i < type->e.numeric.dimx; ++i) high_bit_value.u[i].u = 0x80000000; - if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) - return false; - hlsl_block_add_instr(block, high_bit); - + high_bit = hlsl_block_add_constant(ctx, block, type, &high_bit_value, &instr->loc); and = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_BIT_AND, xor, high_bit); abs1 = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_ABS, arg1, &instr->loc); cast1 = hlsl_block_add_cast(ctx, block, abs1, utype, &instr->loc); @@ -5302,7 +5550,8 @@ static bool lower_int_division_sm4(struct hlsl_ctx *ctx, struct hlsl_ir_node *in return hlsl_add_conditional(ctx, block, and, neg, cast3); }
-static bool lower_int_modulus_sm4(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_int_modulus_sm4(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; struct hlsl_type *type = instr->data_type, *utype; @@ -5311,24 +5560,21 @@ static bool lower_int_modulus_sm4(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins unsigned int i;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr); arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_MOD) - return false; + return NULL; if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; + return NULL; if (type->e.numeric.type != HLSL_TYPE_INT) - return false; + return NULL; utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy);
for (i = 0; i < type->e.numeric.dimx; ++i) high_bit_value.u[i].u = 0x80000000; - if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) - return false; - hlsl_block_add_instr(block, high_bit); - + high_bit = hlsl_block_add_constant(ctx, block, type, &high_bit_value, &instr->loc); and = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_BIT_AND, arg1, high_bit); abs1 = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_ABS, arg1, &instr->loc); cast1 = hlsl_block_add_cast(ctx, block, abs1, utype, &instr->loc); @@ -5340,31 +5586,30 @@ static bool lower_int_modulus_sm4(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins return hlsl_add_conditional(ctx, block, and, neg, cast3); }
-static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_type *type = instr->data_type; struct hlsl_ir_node *arg, *neg; struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr);
if (expr->op != HLSL_OP1_ABS) - return false; + return NULL; if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; + return NULL; if (type->e.numeric.type != HLSL_TYPE_INT) - return false; + return NULL;
arg = expr->operands[0].node;
neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, arg, &instr->loc); - hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MAX, arg, neg); - return true; + return hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MAX, arg, neg); }
-static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *arg1, *arg2, *mult, *comps[4] = {0}, *res; struct hlsl_type *type = instr->data_type; @@ -5373,11 +5618,11 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru bool is_bool;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr);
if (expr->op != HLSL_OP2_DOT) - return false; + return NULL;
if (hlsl_type_is_integer(type)) { @@ -5398,15 +5643,15 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru
res = comps[0]; for (i = 1; i < dimx; ++i) - res = hlsl_block_add_binary_expr(ctx, block, is_bool ? HLSL_OP2_LOGIC_OR : HLSL_OP2_ADD, res, comps[i]); - - return true; + res = hlsl_block_add_binary_expr(ctx, block, is_bool ? HLSL_OP2_LOGIC_OR : HLSL_OP2_ADD, res, comps[i]); + return res; }
- return false; + return NULL; }
-static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +static struct hlsl_ir_node *lower_float_modulus(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one; struct hlsl_type *type = instr->data_type, *btype; @@ -5415,16 +5660,16 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr unsigned int i;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr); arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_MOD) - return false; + return NULL; if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; + return NULL; if (type->e.numeric.type != HLSL_TYPE_FLOAT) - return false; + return NULL; btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->e.numeric.dimx, type->e.numeric.dimy);
mul1 = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, arg2, arg1); @@ -5438,15 +5683,11 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
for (i = 0; i < type->e.numeric.dimx; ++i) one_value.u[i].f = 1.0f; - if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) - return false; - hlsl_block_add_instr(block, one); - + one = hlsl_block_add_constant(ctx, block, type, &one_value, &instr->loc); div = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_DIV, one, cond); mul2 = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, div, arg1); frc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, mul2, &instr->loc); - hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, frc, cond); - return true; + return hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, frc, cond); }
static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -5468,9 +5709,7 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, hlsl_block_init(&block);
arg_type = jump->condition.node->data_type; - if (!(zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc))) - return false; - hlsl_block_add_instr(&block, zero); + zero = hlsl_block_add_constant(ctx, &block, arg_type, &zero_value, &instr->loc);
operands[0] = jump->condition.node; operands[1] = zero; @@ -5478,9 +5717,8 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, arg_type->e.numeric.dimx, arg_type->e.numeric.dimy); cmp = hlsl_block_add_expr(ctx, &block, HLSL_OP2_LESS, operands, cmp_type, &instr->loc);
- if (!(bool_false = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc))) - return false; - hlsl_block_add_instr(&block, bool_false); + bool_false = hlsl_block_add_constant(ctx, &block, + hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc);
or = bool_false;
@@ -5499,32 +5737,56 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return true; }
-static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static struct hlsl_ir_node *lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *cond, *cond_cast, *abs, *neg; struct hlsl_type *float_type; struct hlsl_ir_jump *jump; - struct hlsl_block block;
if (instr->type != HLSL_IR_JUMP) - return false; + return NULL; jump = hlsl_ir_jump(instr); if (jump->type != HLSL_IR_JUMP_DISCARD_NZ) - return false; + return NULL;
cond = jump->condition.node; float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, cond->data_type->e.numeric.dimx);
- hlsl_block_init(&block); - - cond_cast = hlsl_block_add_cast(ctx, &block, cond, float_type, &instr->loc); - abs = hlsl_block_add_unary_expr(ctx, &block, HLSL_OP1_ABS, cond_cast, &instr->loc); - neg = hlsl_block_add_unary_expr(ctx, &block, HLSL_OP1_NEG, abs, &instr->loc); + cond_cast = hlsl_block_add_cast(ctx, block, cond, float_type, &instr->loc); + abs = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_ABS, cond_cast, &instr->loc); + neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, abs, &instr->loc);
- list_move_tail(&instr->entry, &block.instrs); hlsl_src_remove(&jump->condition); hlsl_src_from_node(&jump->condition, neg); jump->type = HLSL_IR_JUMP_DISCARD_NEG; + return &jump->node; +} + +static bool cast_discard_neg_conditions_to_vec4(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *swizzle; + struct hlsl_ir_jump *jump; + struct hlsl_block block; + unsigned int dimx; + + if (instr->type != HLSL_IR_JUMP) + return false; + jump = hlsl_ir_jump(instr); + if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) + return false; + + dimx = jump->condition.node->data_type->e.numeric.dimx; + if (dimx == 4) + return false; + + hlsl_block_init(&block); + + swizzle = hlsl_block_add_swizzle(ctx, &block, hlsl_swizzle_from_writemask((1 << dimx) - 1), 4, + jump->condition.node, &instr->loc); + + list_move_before(&instr->entry, &block.instrs); + hlsl_src_remove(&jump->condition); + hlsl_src_from_node(&jump->condition, swizzle);
return true; } @@ -5906,11 +6168,13 @@ static void mark_vars_usage(struct hlsl_ctx *ctx)
struct register_allocator { + /* Type of registers we are allocating (not counting indexable temps). */ + enum vkd3d_shader_register_type type; + struct allocation { uint32_t reg; unsigned int writemask; - unsigned int first_write, last_read;
/* Two allocations with different mode can't share the same register. */ int mode; @@ -5920,11 +6184,7 @@ struct register_allocator } *allocations; size_t count, capacity;
- /* Indexable temps are allocated separately and always keep their index regardless of their - * lifetime. */ - uint32_t indexable_count; - - /* Total number of registers allocated so far. Used to declare sm4 temp count. */ + /* Total number of registers allocated so far. */ uint32_t reg_count;
/* Special flag so allocations that can share registers prioritize those @@ -5936,7 +6196,7 @@ struct register_allocator };
static unsigned int get_available_writemask(const struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode, bool vip) + uint32_t reg_idx, int mode, bool vip) { unsigned int writemask = VKD3DSP_WRITEMASK_ALL; size_t i; @@ -5945,12 +6205,7 @@ static unsigned int get_available_writemask(const struct register_allocator *all { const struct allocation *allocation = &allocator->allocations[i];
- /* We do not overlap if first write == last read: - * this is the case where we are allocating the result of that - * expression, e.g. "add r0, r0, r1". */ - - if (allocation->reg == reg_idx - && first_write < allocation->last_read && last_read > allocation->first_write) + if (allocation->reg == reg_idx) { writemask &= ~allocation->writemask; if (allocation->mode != mode) @@ -5967,7 +6222,7 @@ static unsigned int get_available_writemask(const struct register_allocator *all }
static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx, - unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode, bool vip) + unsigned int writemask, int mode, bool vip) { struct allocation *allocation;
@@ -5978,8 +6233,6 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a allocation = &allocator->allocations[allocator->count++]; allocation->reg = reg_idx; allocation->writemask = writemask; - allocation->first_write = first_write; - allocation->last_read = last_read; allocation->mode = mode; allocation->vip = vip;
@@ -5998,8 +6251,7 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a * 'vip' can be used so that no new allocations can be made in the given register * unless they are 'vip' as well. */ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, unsigned int reg_size, - unsigned int component_count, int mode, bool force_align, bool vip) + unsigned int reg_size, unsigned int component_count, int mode, bool force_align, bool vip) { struct hlsl_reg ret = {.allocation_size = 1, .allocated = true}; unsigned int required_size = force_align ? 4 : reg_size; @@ -6012,62 +6264,34 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a { for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx) { - unsigned int available_writemask = get_available_writemask(allocator, - first_write, last_read, reg_idx, mode, vip); + unsigned int available_writemask = get_available_writemask(allocator, reg_idx, mode, vip);
if (vkd3d_popcount(available_writemask) >= pref) { unsigned int writemask = hlsl_combine_writemasks(available_writemask, vkd3d_write_mask_from_component_count(reg_size));
- ret.type = VKD3DSPR_TEMP; + ret.type = allocator->type; ret.id = reg_idx; ret.writemask = hlsl_combine_writemasks(writemask, vkd3d_write_mask_from_component_count(component_count));
- record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode, vip); + record_allocation(ctx, allocator, reg_idx, writemask, mode, vip); return ret; } } }
- ret.type = VKD3DSPR_TEMP; + ret.type = allocator->type; ret.id = allocator->reg_count; ret.writemask = vkd3d_write_mask_from_component_count(component_count); record_allocation(ctx, allocator, allocator->reg_count, - vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode, vip); - return ret; -} - -/* Allocate a register with writemask, while reserving reg_writemask. */ -static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, - struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, - uint32_t reg_writemask, uint32_t writemask, int mode, bool vip) -{ - struct hlsl_reg ret = {0}; - uint32_t reg_idx; - - VKD3D_ASSERT((reg_writemask & writemask) == writemask); - - for (reg_idx = 0;; ++reg_idx) - { - if ((get_available_writemask(allocator, first_write, last_read, - reg_idx, mode, vip) & reg_writemask) == reg_writemask) - break; - } - - record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode, vip); - - ret.type = VKD3DSPR_TEMP; - ret.id = reg_idx; - ret.allocation_size = 1; - ret.writemask = writemask; - ret.allocated = true; + vkd3d_write_mask_from_component_count(reg_size), mode, vip); return ret; }
-static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write, - unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode, bool vip) +static bool is_range_available(const struct register_allocator *allocator, + uint32_t reg_idx, unsigned int reg_size, int mode, bool vip) { unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1; unsigned int writemask; @@ -6075,18 +6299,18 @@ static bool is_range_available(const struct register_allocator *allocator, unsig
for (i = 0; i < (reg_size / 4); ++i) { - writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode, vip); + writemask = get_available_writemask(allocator, reg_idx + i, mode, vip); if (writemask != VKD3DSP_WRITEMASK_ALL) return false; } - writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode, vip); + writemask = get_available_writemask(allocator, reg_idx + (reg_size / 4), mode, vip); if ((writemask & last_reg_mask) != last_reg_mask) return false; return true; }
-static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode, bool vip) +static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, + struct register_allocator *allocator, unsigned int reg_size, int mode, bool vip) { struct hlsl_reg ret = {0}; uint32_t reg_idx; @@ -6094,35 +6318,33 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo
for (reg_idx = 0;; ++reg_idx) { - if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode, vip)) + if (is_range_available(allocator, reg_idx, reg_size, mode, vip)) break; }
for (i = 0; i < reg_size / 4; ++i) - record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode, vip); + record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, mode, vip); if (reg_size % 4) - record_allocation(ctx, allocator, reg_idx + (reg_size / 4), - (1u << (reg_size % 4)) - 1, first_write, last_read, mode, vip); + record_allocation(ctx, allocator, reg_idx + (reg_size / 4), (1u << (reg_size % 4)) - 1, mode, vip);
- ret.type = VKD3DSPR_TEMP; + ret.type = allocator->type; ret.id = reg_idx; ret.allocation_size = align(reg_size, 4) / 4; ret.allocated = true; return ret; }
-static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, const struct hlsl_type *type) +static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, + struct register_allocator *allocator, const struct hlsl_type *type) { unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC];
/* FIXME: We could potentially pack structs or arrays more efficiently... */
if (type->class <= HLSL_CLASS_VECTOR) - return allocate_register(ctx, allocator, first_write, last_read, - type->e.numeric.dimx, type->e.numeric.dimx, 0, false, false); + return allocate_register(ctx, allocator, type->e.numeric.dimx, type->e.numeric.dimx, 0, false, false); else - return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0, false); + return allocate_range(ctx, allocator, reg_size, 0, false); }
static const char *debug_register(struct hlsl_reg reg, const struct hlsl_type *type) @@ -6199,7 +6421,7 @@ static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hls return false; }
-static void register_deref_usage(struct hlsl_ctx *ctx, struct hlsl_deref *deref) +static void register_deref_usage(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) { struct hlsl_ir_var *var = deref->var; enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); @@ -6247,18 +6469,43 @@ static bool track_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *in }
case HLSL_IR_RESOURCE_LOAD: - register_deref_usage(ctx, &hlsl_ir_resource_load(instr)->resource); - if (hlsl_ir_resource_load(instr)->sampler.var) - register_deref_usage(ctx, &hlsl_ir_resource_load(instr)->sampler); + { + const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + + register_deref_usage(ctx, &load->resource); + if (load->sampler.var) + register_deref_usage(ctx, &load->sampler); + + if (hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_UAVS) + { + unsigned int index; + + hlsl_regset_index_from_deref(ctx, &load->resource, HLSL_REGSET_UAVS, &index); + load->resource.var->objects_usage[HLSL_REGSET_UAVS][index].uav_read = true; + } break; + }
case HLSL_IR_RESOURCE_STORE: register_deref_usage(ctx, &hlsl_ir_resource_store(instr)->resource); break;
case HLSL_IR_INTERLOCKED: - register_deref_usage(ctx, &hlsl_ir_interlocked(instr)->dst); + { + const struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr); + + register_deref_usage(ctx, &interlocked->dst); + + if (hlsl_deref_get_regset(ctx, &interlocked->dst) == HLSL_REGSET_UAVS) + { + unsigned int index; + + hlsl_regset_index_from_deref(ctx, &interlocked->dst, HLSL_REGSET_UAVS, &index); + interlocked->dst.var->objects_usage[HLSL_REGSET_UAVS][index].uav_read = true; + interlocked->dst.var->objects_usage[HLSL_REGSET_UAVS][index].uav_atomics = true; + } break; + }
default: break; @@ -6287,10 +6534,9 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) } }
-static void allocate_instr_temp_register(struct hlsl_ctx *ctx, - struct hlsl_ir_node *instr, struct register_allocator *allocator) +static void allocate_instr_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr) { - unsigned int reg_writemask = 0, dst_writemask = 0; + unsigned int dst_writemask = 0; bool is_per_component = false;
if (instr->reg.allocated || !instr->last_read) @@ -6302,12 +6548,10 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, { case HLSL_OP1_COS_REDUCED: dst_writemask = VKD3DSP_WRITEMASK_0; - reg_writemask = ctx->profile->major_version < 3 ? (1 << 3) - 1 : VKD3DSP_WRITEMASK_0; break;
case HLSL_OP1_SIN_REDUCED: dst_writemask = VKD3DSP_WRITEMASK_1; - reg_writemask = ctx->profile->major_version < 3 ? (1 << 3) - 1 : VKD3DSP_WRITEMASK_1; break;
case HLSL_OP1_EXP2: @@ -6329,12 +6573,22 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx,
VKD3D_ASSERT(instr->data_type->class <= HLSL_CLASS_VECTOR);
- if (reg_writemask) - instr->reg = allocate_register_with_masks(ctx, allocator, - instr->index, instr->last_read, reg_writemask, dst_writemask, 0, false); + if (dst_writemask) + { + instr->reg.writemask = dst_writemask; + instr->reg.allocation_size = 1; + instr->reg.allocated = true; + instr->reg.type = VKD3DSPR_TEMP; + instr->reg.id = ctx->temp_count++; + } else if (is_per_component) - instr->reg = allocate_numeric_registers_for_type(ctx, allocator, - instr->index, instr->last_read, instr->data_type); + { + instr->reg.writemask = vkd3d_write_mask_from_component_count(instr->data_type->e.numeric.dimx); + instr->reg.allocation_size = 1; + instr->reg.allocated = true; + instr->reg.type = VKD3DSPR_TEMP; + instr->reg.id = ctx->temp_count++; + } else { instr->reg.writemask = vkd3d_write_mask_from_component_count(instr->data_type->e.numeric.dimx); @@ -6344,40 +6598,46 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, instr->reg.id = ctx->ssa_count++; }
- TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, - debug_register(instr->reg, instr->data_type), instr->index, instr->last_read); + TRACE("Allocated anonymous expression @%u to %s.\n", instr->index, + debug_register(instr->reg, instr->data_type)); }
-static void allocate_variable_temp_register(struct hlsl_ctx *ctx, - struct hlsl_ir_var *var, struct register_allocator *allocator) +static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var) { + struct hlsl_reg *reg = &var->regs[HLSL_REGSET_NUMERIC]; + if (var->is_input_semantic || var->is_output_semantic || var->is_uniform) return;
- if (!var->regs[HLSL_REGSET_NUMERIC].allocated && var->last_read) + if (!reg->allocated && var->last_read) { if (var->indexable) { - var->regs[HLSL_REGSET_NUMERIC].id = allocator->indexable_count++; - var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1; - var->regs[HLSL_REGSET_NUMERIC].writemask = 0; - var->regs[HLSL_REGSET_NUMERIC].allocated = true; + reg->id = ctx->indexable_temp_count++; + reg->allocation_size = 1; + reg->writemask = 0; + reg->allocated = true;
- TRACE("Allocated %s to x%u[].\n", var->name, var->regs[HLSL_REGSET_NUMERIC].id); + TRACE("Allocated %s to x%u[].\n", var->name, reg->id); } else { - var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, allocator, - var->first_write, var->last_read, var->data_type); + reg->type = VKD3DSPR_TEMP; + reg->id = ctx->temp_count; + reg->allocation_size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; + if (var->data_type->class <= HLSL_CLASS_VECTOR) + reg->writemask = vkd3d_write_mask_from_component_count(var->data_type->e.numeric.dimx); + reg->allocated = true; + + ctx->temp_count += reg->allocation_size;
- TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, - debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type), var->first_write, var->last_read); + TRACE("Allocated %s to %s.\n", var->name, + debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } } }
-static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, - struct hlsl_block *block, struct register_allocator *allocator) +static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block) { struct hlsl_ir_node *instr;
@@ -6387,15 +6647,15 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, if (ctx->profile->major_version >= 4 && instr->type == HLSL_IR_CONSTANT) continue;
- allocate_instr_temp_register(ctx, instr, allocator); + allocate_instr_temp_register(ctx, instr);
switch (instr->type) { case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - allocate_temp_registers_recurse(ctx, &iff->then_block, allocator); - allocate_temp_registers_recurse(ctx, &iff->else_block, allocator); + allocate_temp_registers_recurse(ctx, &iff->then_block); + allocate_temp_registers_recurse(ctx, &iff->else_block); break; }
@@ -6404,21 +6664,21 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_load *load = hlsl_ir_load(instr); /* We need to at least allocate a variable for undefs. * FIXME: We should probably find a way to remove them instead. */ - allocate_variable_temp_register(ctx, load->src.var, allocator); + allocate_variable_temp_register(ctx, load->src.var); break; }
case HLSL_IR_LOOP: { struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); - allocate_temp_registers_recurse(ctx, &loop->body, allocator); + allocate_temp_registers_recurse(ctx, &loop->body); break; }
case HLSL_IR_STORE: { struct hlsl_ir_store *store = hlsl_ir_store(instr); - allocate_variable_temp_register(ctx, store->lhs.var, allocator); + allocate_variable_temp_register(ctx, store->lhs.var); break; }
@@ -6429,7 +6689,7 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx,
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) { - allocate_temp_registers_recurse(ctx, &c->body, allocator); + allocate_temp_registers_recurse(ctx, &c->body); } break; } @@ -6553,8 +6813,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, break; }
- constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); - constant->reg.type = VKD3DSPR_CONST; + constant->reg = allocate_numeric_registers_for_type(ctx, allocator, type); TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register(constant->reg, type));
for (unsigned int x = 0, i = 0; x < 4; ++x) @@ -6651,16 +6910,14 @@ static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_bl { type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4);
- ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); - ctx->d3dsincosconst1.type = VKD3DSPR_CONST; + ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, type); TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register(ctx->d3dsincosconst1, type)); record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f, &instr->loc); record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f, &instr->loc); record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f, &instr->loc); record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f, &instr->loc);
- ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); - ctx->d3dsincosconst2.type = VKD3DSPR_CONST; + ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, type); TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register(ctx->d3dsincosconst2, type)); record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f, &instr->loc); record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f, &instr->loc); @@ -6674,8 +6931,7 @@ static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_bl
static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *body) { - struct register_allocator allocator_used = {0}; - struct register_allocator allocator = {0}; + struct register_allocator allocator = {.type = VKD3DSPR_CONST}, allocator_used = {.type = VKD3DSPR_CONST}; struct hlsl_ir_var *var;
sort_uniforms_by_bind_count(ctx, HLSL_REGSET_NUMERIC); @@ -6698,15 +6954,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *bo { if (i < bind_count) { - if (get_available_writemask(&allocator_used, 1, UINT_MAX, - reg_idx + i, 0, false) != VKD3DSP_WRITEMASK_ALL) + if (get_available_writemask(&allocator_used, reg_idx + i, 0, false) != VKD3DSP_WRITEMASK_ALL) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "Overlapping register() reservations on 'c%u'.", reg_idx + i); } - record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); + record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 0, false); } - record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); + record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 0, false); }
var->regs[HLSL_REGSET_NUMERIC].type = VKD3DSPR_CONST; @@ -6730,8 +6985,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *bo
if (!var->regs[HLSL_REGSET_NUMERIC].allocated) { - var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0, false); - var->regs[HLSL_REGSET_NUMERIC].type = VKD3DSPR_CONST; + var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, alloc_size, 0, false); TRACE("Allocated %s to %s.\n", var->name, debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } @@ -6748,12 +7002,13 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *bo * index to all (simultaneously live) variables or intermediate values. Agnostic * as to how many registers are actually available for the current backend, and * does not handle constants. */ -static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_block *body, struct list *semantic_vars) +static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_block *body, struct list *semantic_vars) { - struct register_allocator allocator = {0}; struct hlsl_scope *scope; struct hlsl_ir_var *var;
+ ctx->indexable_temp_count = 0; + /* Reset variable temp register allocations. */ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) { @@ -6771,29 +7026,17 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_block { if (var->is_output_semantic) { - record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, - var->first_write, UINT_MAX, 0, false); + ctx->temp_count = 1; break; } } }
- allocate_temp_registers_recurse(ctx, body, &allocator); - vkd3d_free(allocator.allocations); - - if (allocator.indexable_count) - TRACE("Declaration of %s function required %u temp registers, and %u indexable temps.\n", - ctx->is_patch_constant_func ? "patch constant" : "main", - allocator.reg_count, allocator.indexable_count); - else - TRACE("Declaration of %s function required %u temp registers.\n", - ctx->is_patch_constant_func ? "patch constant" : "main", allocator.reg_count); - - return allocator.reg_count; + allocate_temp_registers_recurse(ctx, body); }
-static enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, - unsigned int storage_modifiers) +static enum vkd3d_shader_interpolation_mode get_interpolation_mode(const struct vkd3d_shader_version *version, + struct hlsl_type *type, unsigned int storage_modifiers) { unsigned int i;
@@ -6815,6 +7058,9 @@ static enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hl
VKD3D_ASSERT(hlsl_is_numeric_type(type));
+ if (version->major < 4) + storage_modifiers &= HLSL_STORAGE_LINEAR | HLSL_STORAGE_CENTROID; + if ((storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || base_type_get_semantic_equivalent(type->e.numeric.type) == HLSL_TYPE_UINT) return VKD3DSIM_CONSTANT; @@ -6921,16 +7167,14 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var { unsigned int component_count = is_primitive ? var->data_type->e.array.type->e.numeric.dimx : var->data_type->e.numeric.dimx; - int mode = (ctx->profile->major_version < 4) - ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); unsigned int reg_size = optimize ? component_count : 4; + int mode = VKD3DSIM_NONE;
- if (special_interpolation) - mode = VKD3DSIM_NONE; + if (version.major >= 4 && !special_interpolation) + mode = get_interpolation_mode(&version, var->data_type, var->storage_modifiers);
- var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX, + var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, reg_size, component_count, mode, var->force_align, vip_allocation); - var->regs[HLSL_REGSET_NUMERIC].type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT;
TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); @@ -6945,11 +7189,17 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct list *seman bool is_pixel_shader = ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL; struct hlsl_ir_var *var;
+ in_prim_allocator.type = VKD3DSPR_INPUT; in_prim_allocator.prioritize_smaller_writemasks = true; + patch_constant_out_patch_allocator.type = VKD3DSPR_INPUT; patch_constant_out_patch_allocator.prioritize_smaller_writemasks = true; + input_allocator.type = VKD3DSPR_INPUT; input_allocator.prioritize_smaller_writemasks = true; for (unsigned int i = 0; i < ARRAY_SIZE(output_allocators); ++i) + { + output_allocators[i].type = VKD3DSPR_OUTPUT; output_allocators[i].prioritize_smaller_writemasks = true; + }
LIST_FOR_EACH_ENTRY(var, semantic_vars, struct hlsl_ir_var, extern_entry) { @@ -8263,9 +8513,233 @@ static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *bod
void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body) { - lower_ir(ctx, lower_index_loads, body); + replace_ir(ctx, lower_index_loads, body); +} + +static enum hlsl_ir_expr_op invert_comparison_op(enum hlsl_ir_expr_op op) +{ + switch (op) + { + case HLSL_OP2_EQUAL: + return HLSL_OP2_NEQUAL; + + case HLSL_OP2_GEQUAL: + return HLSL_OP2_LESS; + + case HLSL_OP2_LESS: + return HLSL_OP2_GEQUAL; + + case HLSL_OP2_NEQUAL: + return HLSL_OP2_EQUAL; + + default: + vkd3d_unreachable(); + } +} + +static struct hlsl_ir_node *fold_unary_identities(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_expr *expr, *x; + + if (instr->type != HLSL_IR_EXPR) + return NULL; + + if (instr->data_type->class > HLSL_CLASS_VECTOR) + return NULL; + + expr = hlsl_ir_expr(instr); + if (!expr->operands[0].node) + return NULL; + + if (expr->operands[0].node->type != HLSL_IR_EXPR) + return NULL; + x = hlsl_ir_expr(expr->operands[0].node); + + switch (expr->op) + { + case HLSL_OP1_ABS: + /* ||x|| -> |x| */ + if (x->op == HLSL_OP1_ABS) + return &x->node; + + /* |-x| -> |x| */ + if (x->op == HLSL_OP1_NEG) + { + hlsl_src_remove(&expr->operands[0]); + hlsl_src_from_node(&expr->operands[0], x->operands[0].node); + return &expr->node; + } + break; + + case HLSL_OP1_BIT_NOT: + /* ~(~x) -> x */ + if (x->op == HLSL_OP1_BIT_NOT) + return x->operands[0].node; + break; + + case HLSL_OP1_CEIL: + case HLSL_OP1_FLOOR: + /* f(g(x)) -> g(x), where f(), g() are floor() or ceil() functions. */ + if (x->op == HLSL_OP1_CEIL || x->op == HLSL_OP1_FLOOR) + return &x->node; + break; + + case HLSL_OP1_NEG: + /* -(-x) -> x */ + if (x->op == HLSL_OP1_NEG) + return x->operands[0].node; + break; + + case HLSL_OP1_LOGIC_NOT: + /* !!x -> x */ + if (x->op == HLSL_OP1_LOGIC_NOT) + return x->operands[0].node; + + if (hlsl_is_comparison_op(x->op) + && hlsl_base_type_is_integer(x->operands[0].node->data_type->e.numeric.type) + && hlsl_base_type_is_integer(x->operands[1].node->data_type->e.numeric.type)) + { + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {x->operands[0].node, x->operands[1].node}; + + /* !(x == y) -> x != y, !(x < y) -> x >= y, etc. */ + return hlsl_block_add_expr(ctx, block, invert_comparison_op(x->op), + operands, instr->data_type, &instr->loc); + } + + break; + + default: + break; + } + + return NULL; +} + +static bool nodes_are_equivalent(const struct hlsl_ir_node *c1, const struct hlsl_ir_node *c2) +{ + if (c1 == c2) + return true; + + if (c1->type == HLSL_IR_SWIZZLE && c2->type == HLSL_IR_SWIZZLE + && hlsl_types_are_equal(c1->data_type, c2->data_type)) + { + const struct hlsl_ir_swizzle *s1 = hlsl_ir_swizzle(c1), *s2 = hlsl_ir_swizzle(c2); + + VKD3D_ASSERT(c1->data_type->class <= HLSL_CLASS_VECTOR); + + if (s1->val.node == s2->val.node && s1->u.vector == s2->u.vector) + return true; + } + + return false; +} + +/* Replaces all conditionals in an expression chain of the form (cond ? x : y) + * with x or y, assuming cond = cond_value. */ +static struct hlsl_ir_node *evaluate_conditionals_recurse(struct hlsl_ctx *ctx, + struct hlsl_block *block, const struct hlsl_ir_node *cond, bool cond_value, + struct hlsl_ir_node *instr, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_expr *expr; + struct hlsl_ir_node *res; + bool progress = false; + unsigned int i; + + if (instr->type != HLSL_IR_EXPR) + return NULL; + expr = hlsl_ir_expr(instr); + + if (expr->op == HLSL_OP3_TERNARY && nodes_are_equivalent(cond, expr->operands[0].node)) + { + struct hlsl_ir_node *x = cond_value ? expr->operands[1].node : expr->operands[2].node; + + res = evaluate_conditionals_recurse(ctx, block, cond, cond_value, x, loc); + return res ? res : x; + } + + for (i = 0; i < HLSL_MAX_OPERANDS; ++i) + { + if (!expr->operands[i].node) + break; + + operands[i] = evaluate_conditionals_recurse(ctx, block, cond, cond_value, expr->operands[i].node, loc); + + if (operands[i]) + progress = true; + else + operands[i] = expr->operands[i].node; + } + + if (progress) + return hlsl_block_add_expr(ctx, block, expr->op, operands, expr->node.data_type, loc); + + return NULL; }
+static struct hlsl_ir_node *fold_conditional_identities(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_node *c, *x, *y, *res_x, *res_y; + struct hlsl_ir_expr *expr, *ec; + + if (instr->type != HLSL_IR_EXPR) + return NULL; + + if (instr->data_type->class > HLSL_CLASS_VECTOR) + return NULL; + + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP3_TERNARY) + return NULL; + + c = expr->operands[0].node; + x = expr->operands[1].node; + y = expr->operands[2].node; + + VKD3D_ASSERT(c->data_type->e.numeric.type == HLSL_TYPE_BOOL); + + /* c ? x : x -> x */ + if (nodes_are_equivalent(x, y)) + return x; + + if (c->type == HLSL_IR_CONSTANT) + { + /* false ? x : y -> y */ + if (hlsl_constant_is_zero(hlsl_ir_constant(c))) + return y; + + /* true ? x : y -> x */ + if (hlsl_constant_is_one(hlsl_ir_constant(c))) + return x; + } + + if (x->type == HLSL_IR_CONSTANT && y->type == HLSL_IR_CONSTANT + && hlsl_types_are_equal(c->data_type, x->data_type) + && hlsl_types_are_equal(c->data_type, y->data_type)) + { + /* c ? true : false -> c */ + if (hlsl_constant_is_one(hlsl_ir_constant(x)) && hlsl_constant_is_zero(hlsl_ir_constant(y))) + return c; + + /* c ? false : true -> !c */ + if (hlsl_constant_is_zero(hlsl_ir_constant(x)) && hlsl_constant_is_one(hlsl_ir_constant(y))) + return hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_LOGIC_NOT, c, &instr->loc); + } + + /* !c ? x : y -> c ? y : x */ + ec = c->type == HLSL_IR_EXPR ? hlsl_ir_expr(c) : NULL; + if (ec && ec->op == HLSL_OP1_LOGIC_NOT) + return hlsl_add_conditional(ctx, block, ec->operands[0].node, y, x); + + res_x = evaluate_conditionals_recurse(ctx, block, c, true, x, &instr->loc); + res_y = evaluate_conditionals_recurse(ctx, block, c, false, y, &instr->loc); + if (res_x || res_y) + return hlsl_add_conditional(ctx, block, c, res_x ? res_x : x, res_y ? res_y : y); + + return NULL; +}
static bool simplify_exprs(struct hlsl_ctx *ctx, struct hlsl_block *block) { @@ -8273,10 +8747,12 @@ static bool simplify_exprs(struct hlsl_ctx *ctx, struct hlsl_block *block)
do { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); - progress |= hlsl_transform_ir(ctx, hlsl_normalize_binary_exprs, block, NULL); - progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, block, NULL); - progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, block, NULL); + progress = replace_ir(ctx, hlsl_fold_constant_exprs, block); + progress |= replace_ir(ctx, hlsl_fold_binary_exprs, block); + progress |= replace_ir(ctx, fold_unary_identities, block); + progress |= replace_ir(ctx, fold_conditional_identities, block); + progress |= replace_ir(ctx, hlsl_fold_constant_identities, block); + progress |= replace_ir(ctx, hlsl_fold_constant_swizzles, block);
any_progress |= progress; } while (progress); @@ -8288,27 +8764,28 @@ static void hlsl_run_folding_passes(struct hlsl_ctx *ctx, struct hlsl_block *bod { bool progress;
- hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); + replace_ir(ctx, fold_redundant_casts, body); do { progress = simplify_exprs(ctx, body); progress |= hlsl_copy_propagation_execute(ctx, body); - progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); + progress |= replace_ir(ctx, fold_swizzle_chains, body); + progress |= replace_ir(ctx, fold_trivial_swizzles, body); progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); + progress |= hlsl_transform_ir(ctx, flatten_conditional_branches, body, NULL); } while (progress); - hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); + replace_ir(ctx, fold_redundant_casts, body); }
void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) { bool progress;
- lower_ir(ctx, lower_complex_casts, body); - lower_ir(ctx, lower_matrix_swizzles, body); + replace_ir(ctx, lower_complex_casts, body); + replace_ir(ctx, lower_matrix_swizzles, body);
- lower_ir(ctx, lower_broadcasts, body); - while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); + replace_ir(ctx, lower_broadcasts, body); + while (replace_ir(ctx, fold_redundant_casts, body)); do { progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); @@ -8317,16 +8794,16 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) while (progress); hlsl_transform_ir(ctx, split_matrix_copies, body, NULL);
- lower_ir(ctx, lower_narrowing_casts, body); - lower_ir(ctx, lower_int_dot, body); + replace_ir(ctx, lower_narrowing_casts, body); + replace_ir(ctx, lower_int_dot, body); if (hlsl_version_ge(ctx, 4, 0)) { - lower_ir(ctx, lower_int_modulus_sm4, body); - lower_ir(ctx, lower_int_division_sm4, body); + replace_ir(ctx, lower_int_modulus_sm4, body); + replace_ir(ctx, lower_int_division_sm4, body); } - lower_ir(ctx, lower_int_abs, body); - lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_float_modulus, body); + replace_ir(ctx, lower_int_abs, body); + replace_ir(ctx, lower_casts_to_bool, body); + replace_ir(ctx, lower_float_modulus, body);
hlsl_run_folding_passes(ctx, body); } @@ -8335,6 +8812,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog struct shader_signature *signature, bool output, struct hlsl_ir_var *var) { enum vkd3d_shader_component_type component_type = VKD3D_SHADER_COMPONENT_VOID; + enum vkd3d_shader_interpolation_mode interpolation_mode = VKD3DSIM_NONE; bool is_primitive = hlsl_type_is_primitive_array(var->data_type); enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; unsigned int register_index, mask, use_mask; @@ -8342,6 +8820,9 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog enum vkd3d_shader_register_type type; struct signature_element *element;
+ if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) + interpolation_mode = get_interpolation_mode(&program->shader_version, var->data_type, var->storage_modifiers); + if (hlsl_version_ge(ctx, 4, 0)) { struct vkd3d_string_buffer *string; @@ -8421,6 +8902,13 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog || (type == VKD3DSPR_INPUT && program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL)) register_index += SM1_COLOR_REGISTER_OFFSET; } + + if (interpolation_mode == VKD3DSIM_LINEAR_CENTROID + && (vkd3d_shader_ver_ge(&program->shader_version, 3, 0) || type != VKD3DSPR_TEXTURE)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "The centroid interpolation mode is not supported by the '%s' semantic.", var->semantic.name); + } } else { @@ -8448,6 +8936,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog if (!ascii_strcasecmp(var->semantic.name, "PSIZE") && output && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX) { + program->has_point_size = true; if (var->data_type->e.numeric.dimx > 1) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "PSIZE output must have only 1 component in this shader model."); @@ -8487,13 +8976,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog element->register_count = 1; element->mask = mask; element->used_mask = use_mask; - if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) - { - if (program->shader_version.major >= 4) - element->interpolation_mode = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); - else - element->interpolation_mode = VKD3DSIM_LINEAR; - } + element->interpolation_mode = interpolation_mode;
switch (var->data_type->e.numeric.type) { @@ -8570,9 +9053,8 @@ static enum vsir_data_type vsir_data_type_from_hlsl_type(struct hlsl_ctx *ctx, c case HLSL_TYPE_DOUBLE: return VSIR_DATA_F64; case HLSL_TYPE_FLOAT: - return VSIR_DATA_F32; case HLSL_TYPE_HALF: - return VSIR_DATA_F16; + return VSIR_DATA_F32; case HLSL_TYPE_INT: return VSIR_DATA_I32; case HLSL_TYPE_UINT: @@ -8703,7 +9185,7 @@ static void sm1_generate_vsir_sampler_dcls(struct hlsl_ctx *ctx, semantic->resource_type = resource_type;
dst_param = &semantic->resource.reg; - vsir_register_init(&dst_param->reg, VKD3DSPR_SAMPLER, VSIR_DATA_F32, 1); + vsir_register_init(&dst_param->reg, VKD3DSPR_COMBINED_SAMPLER, VSIR_DATA_F32, 1); dst_param->reg.dimension = VSIR_DIMENSION_NONE; dst_param->reg.idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index + i; dst_param->write_mask = 0; @@ -8784,6 +9266,7 @@ static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src }
src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = VKD3D_SHADER_NO_SWIZZLE; for (i = 0, j = 0; i < 4; ++i) { if ((map_writemask & (1u << i)) && (j < width)) @@ -8885,6 +9368,8 @@ static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_p reg->type = VKD3DSPR_RESOURCE; reg->dimension = VSIR_DIMENSION_VEC4; reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; + if (vkd3d_shader_ver_le(version, 5, 0)) + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; reg->idx[1].offset += hlsl_offset_from_deref_safe(ctx, deref); reg->idx_count = 2; @@ -8896,6 +9381,8 @@ static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_p reg->type = VKD3DSPR_UAV; reg->dimension = VSIR_DIMENSION_VEC4; reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; + if (vkd3d_shader_ver_le(version, 5, 0)) + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; reg->idx[1].offset += hlsl_offset_from_deref_safe(ctx, deref); reg->idx_count = 2; @@ -8907,6 +9394,8 @@ static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_p reg->type = VKD3DSPR_SAMPLER; reg->dimension = VSIR_DIMENSION_NONE; reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; + if (vkd3d_shader_ver_le(version, 5, 0)) + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; reg->idx[1].offset += hlsl_offset_from_deref_safe(ctx, deref); reg->idx_count = 2; @@ -9467,8 +9956,6 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr break;
case HLSL_OP3_CMP: - if (!hlsl_type_is_floating_point(type)) - goto err; generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_CMP, 0, 0, true); break;
@@ -9530,7 +10017,7 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, else writemask = (1u << deref->var->data_type->e.numeric.dimx) - 1;
- if (version.type == VKD3D_SHADER_TYPE_PIXEL && (!ascii_strcasecmp(semantic_name, "PSIZE") + if (version.type == VKD3D_SHADER_TYPE_VERTEX && (!ascii_strcasecmp(semantic_name, "PSIZE") || (!ascii_strcasecmp(semantic_name, "FOG") && version.major < 3))) { /* These are always 1-component, but for some reason are written @@ -9817,6 +10304,8 @@ static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx,
if (jump->type == HLSL_IR_JUMP_DISCARD_NEG) { + VKD3D_ASSERT(condition->data_type->e.numeric.dimx == 4); + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_TEXKILL, 0, 1))) return;
@@ -9837,11 +10326,9 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program struct hlsl_ir_node *instr = &iff->node; struct vkd3d_shader_instruction *ins;
- if (hlsl_version_lt(ctx, 2, 1)) - { - hlsl_fixme(ctx, &instr->loc, "Flatten "if" conditionals branches."); - return; - } + /* Conditional branches should have already been flattened for SM < 2.1. */ + VKD3D_ASSERT(hlsl_version_ge(ctx, 2, 1)); + VKD3D_ASSERT(condition->data_type->e.numeric.dimx == 1 && condition->data_type->e.numeric.dimy == 1);
if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_IFC, 0, 2))) @@ -9933,7 +10420,8 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, const struct vkd3d_shader_co struct hlsl_block block;
program->ssa_count = 0; - program->temp_count = allocate_temp_registers(ctx, body, semantic_vars); + program->temp_count = 0; + allocate_temp_registers(ctx, body, semantic_vars); if (ctx->result) return;
@@ -9945,6 +10433,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, const struct vkd3d_shader_co sm1_generate_vsir_block(ctx, body, program);
program->ssa_count = ctx->ssa_count; + program->temp_count = ctx->temp_count;
if (ctx->result) return; @@ -10000,7 +10489,8 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) vkd3d_unreachable(); }
-D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_combined_sampler) +D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, + bool is_combined_sampler, enum hlsl_sampler_dim sampler_dim) { enum hlsl_type_class class = type->class;
@@ -10039,7 +10529,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_comb break;
case HLSL_CLASS_SAMPLER: - switch (type->sampler_dim) + switch (sampler_dim) { case HLSL_SAMPLER_DIM_1D: return D3DXPT_SAMPLER1D; @@ -10077,7 +10567,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_comb break;
case HLSL_CLASS_ARRAY: - return hlsl_sm1_base_type(type->e.array.type, is_combined_sampler); + return hlsl_sm1_base_type(type->e.array.type, is_combined_sampler, sampler_dim);
case HLSL_CLASS_STRUCT: return D3DXPT_VOID; @@ -10115,15 +10605,19 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_comb vkd3d_unreachable(); }
-static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, - struct hlsl_type *type, bool is_combined_sampler, unsigned int ctab_start) +static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, + bool is_combined_sampler, enum hlsl_sampler_dim sampler_dim, unsigned int ctab_start) { const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); unsigned int array_size = hlsl_get_multiarray_size(type); struct hlsl_struct_field *field; size_t i;
- if (type->bytecode_offset) + /* Native deduplicates types, but emits the correct dimension for generic + * samplers. Apparently it deals with this by never deduplicating any + * sampler types. This is not very efficient, but we may as well do the + * same. */ + if (type->bytecode_offset && array_type->class != HLSL_CLASS_SAMPLER) return;
if (array_type->class == HLSL_CLASS_STRUCT) @@ -10135,7 +10629,7 @@ static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, { field = &array_type->e.record.fields[i]; field->name_bytecode_offset = put_string(buffer, field->name); - write_sm1_type(buffer, field->type, false, ctab_start); + write_sm1_type(buffer, field->type, false, HLSL_SAMPLER_DIM_GENERIC, ctab_start); }
fields_offset = bytecode_align(buffer) - ctab_start; @@ -10155,9 +10649,11 @@ static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, else { type->bytecode_offset = put_u32(buffer, - vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type, is_combined_sampler))); + vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type, is_combined_sampler, sampler_dim))); if (hlsl_is_numeric_type(array_type)) put_u32(buffer, vkd3d_make_u32(array_type->e.numeric.dimy, array_type->e.numeric.dimx)); + else if (is_combined_sampler) + put_u32(buffer, vkd3d_make_u32(1, 4)); else put_u32(buffer, vkd3d_make_u32(1, 1)); put_u32(buffer, vkd3d_make_u32(array_size, 0)); @@ -10211,7 +10707,9 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe
++uniform_count;
- if (var->is_param && var->is_uniform) + /* Not var->is_uniform. The $ prefix is only added if the variable + * is actually declared with a 'uniform' modifier. */ + if (var->is_param && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) { char *new_name;
@@ -10268,17 +10766,33 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe { for (r = 0; r <= HLSL_REGSET_LAST; ++r) { + enum hlsl_sampler_dim sampler_dim = HLSL_SAMPLER_DIM_GENERIC; size_t var_offset, name_offset;
if (var->semantic.name || !var->regs[r].allocated || !var->last_read) continue;
+ /* Arrays can be used with multiple different dimensions. + * The dimension written into the CTAB is the dimension of the + * first usage, which is not really that sensible... */ + if (r == HLSL_REGSET_SAMPLERS) + { + for (unsigned int i = 0; i < var->bind_count[r]; ++i) + { + if (var->objects_usage[r][i].sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + sampler_dim = var->objects_usage[r][i].sampler_dim; + break; + } + } + } + var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t));
name_offset = put_string(buffer, var->name); set_u32(buffer, var_offset, name_offset - ctab_start);
- write_sm1_type(buffer, var->data_type, var->is_combined_sampler, ctab_start); + write_sm1_type(buffer, var->data_type, var->is_combined_sampler, sampler_dim, ctab_start); set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start);
if (var->default_values) @@ -10539,7 +11053,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs dst_param->write_mask = write_mask;
if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL) - ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); + ins->flags = get_interpolation_mode(version, var->data_type, var->storage_modifiers); }
static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, @@ -10795,11 +11309,32 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_ROUND_PI, 0, 0, true); return true;
+ case HLSL_OP1_CLZ: + VKD3D_ASSERT(hlsl_type_is_integer(dst_type)); + VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); + if (hlsl_type_is_signed_integer(src_type)) + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_FIRSTBIT_SHI, 0, 0, true); + else + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_FIRSTBIT_HI, 0, 0, true); + return true; + case HLSL_OP1_COS: VKD3D_ASSERT(type_is_float(dst_type)); sm4_generate_vsir_expr_with_two_destinations(ctx, program, VSIR_OP_SINCOS, expr, 1); return true;
+ case HLSL_OP1_COUNTBITS: + VKD3D_ASSERT(hlsl_type_is_integer(dst_type)); + VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_COUNTBITS, 0, 0, true); + return true; + + case HLSL_OP1_CTZ: + VKD3D_ASSERT(hlsl_type_is_integer(dst_type)); + VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_FIRSTBIT_LO, 0, 0, true); + return true; + case HLSL_OP1_DSX: VKD3D_ASSERT(type_is_float(dst_type)); generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_DSX, 0, 0, true); @@ -11437,6 +11972,7 @@ static bool sm4_generate_vsir_instr_ld(struct hlsl_ctx *ctx, const struct vkd3d_shader_version *version = &program->shader_version; const struct hlsl_ir_node *sample_index = load->sample_index.node; const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *byte_offset = load->byte_offset.node; const struct hlsl_ir_node *coords = load->coords.node; unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; const struct hlsl_deref *resource = &load->resource; @@ -11444,20 +11980,15 @@ static bool sm4_generate_vsir_instr_ld(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim = load->sampling_dim; bool tgsm = load->resource.var->is_tgsm; struct vkd3d_shader_instruction *ins; + bool multisampled, raw, structured; enum vkd3d_shader_opcode opcode; - bool multisampled, raw;
VKD3D_ASSERT(load->load_type == HLSL_RESOURCE_LOAD);
- if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - { - hlsl_fixme(ctx, &load->node.loc, "Structured buffer loads."); - return false; - } - multisampled = resource_type->class == HLSL_CLASS_TEXTURE && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); + structured = resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER;
if (!tgsm) { @@ -11468,15 +11999,19 @@ static bool sm4_generate_vsir_instr_ld(struct hlsl_ctx *ctx, hlsl_fixme(ctx, &load->node.loc, "Load from structured TGSM."); return false; } + VKD3D_ASSERT(!(structured && multisampled));
- if (uav) + if (structured) + opcode = VSIR_OP_LD_STRUCTURED; + else if (uav) opcode = VSIR_OP_LD_UAV_TYPED; else if (raw) opcode = VSIR_OP_LD_RAW; else opcode = multisampled ? VSIR_OP_LD2DMS : VSIR_OP_LD;
- if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 2 + multisampled))) + if (!(ins = generate_vsir_add_program_instruction(ctx, program, + &instr->loc, opcode, 1, 2 + multisampled + structured))) return false;
if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) @@ -11504,10 +12039,15 @@ static bool sm4_generate_vsir_instr_ld(struct hlsl_ctx *ctx, vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, coords_writemask);
if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, - &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) + &ins->src[structured ? 2 : 1], resource, ins->dst[0].write_mask, &instr->loc)) return false;
- if (multisampled) + if (structured) + { + VKD3D_ASSERT(byte_offset); + vsir_src_from_hlsl_node(&ins->src[1], ctx, byte_offset, VKD3DSP_WRITEMASK_ALL); + } + else if (multisampled) { if (sample_index->type == HLSL_IR_CONSTANT) vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, @@ -12093,16 +12633,15 @@ static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, struct list *se struct hlsl_block block = {0}; struct hlsl_scope *scope; struct hlsl_ir_var *var; - uint32_t temp_count;
ctx->is_patch_constant_func = func == ctx->patch_constant_func;
compute_liveness(ctx, body); mark_indexable_vars(ctx, body); - temp_count = allocate_temp_registers(ctx, body, semantic_vars); + allocate_temp_registers(ctx, body, semantic_vars); if (ctx->result) return; - program->temp_count = max(program->temp_count, temp_count); + program->temp_count = max(program->temp_count, ctx->temp_count);
hlsl_block_init(&block);
@@ -12113,8 +12652,8 @@ static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, struct list *se sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, &block, &var->loc); }
- if (temp_count) - sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); + if (ctx->temp_count) + sm4_generate_vsir_instr_dcl_temps(ctx, program, ctx->temp_count, &block, &func->loc);
LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) { @@ -12243,7 +12782,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un extern_resources[*count].component_type = component_type;
extern_resources[*count].regset = regset; - extern_resources[*count].id = var->regs[regset].id; + extern_resources[*count].id = var->regs[regset].id + regset_offset; extern_resources[*count].space = var->regs[regset].space; extern_resources[*count].index = var->regs[regset].index + regset_offset; extern_resources[*count].bind_count = 1; @@ -12427,7 +12966,7 @@ static void sm4_generate_vsir_add_dcl_constant_buffer(struct hlsl_ctx *ctx, return; }
- ins->declaration.cb.size = cbuffer->size; + ins->declaration.cb.size = align(cbuffer->size, 4) * sizeof(float);
src_param = &ins->declaration.cb.src; vsir_src_param_init(src_param, VKD3DSPR_CONSTBUFFER, VSIR_DATA_F32, 0); @@ -12478,16 +13017,16 @@ static void sm4_generate_vsir_add_dcl_sampler(struct hlsl_ctx *ctx, ins->declaration.sampler.range.last = array_last; ins->declaration.sampler.range.space = resource->space;
- src_param->reg.idx[0].offset = resource->id; + src_param->reg.idx[0].offset = resource->id + i; src_param->reg.idx[1].offset = array_first; src_param->reg.idx[2].offset = array_last; src_param->reg.idx_count = 3; } }
-static enum vkd3d_shader_resource_type sm4_generate_vsir_get_resource_type(const struct hlsl_type *type) +static enum vkd3d_shader_resource_type get_vsir_resource_type(enum hlsl_sampler_dim sampler_dim) { - switch (type->sampler_dim) + switch (sampler_dim) { case HLSL_SAMPLER_DIM_1D: return VKD3D_SHADER_RESOURCE_TEXTURE_1D; @@ -12516,7 +13055,7 @@ static enum vkd3d_shader_resource_type sm4_generate_vsir_get_resource_type(const } }
-static enum vsir_data_type sm4_generate_vsir_get_format_type(const struct hlsl_type *type) +static enum vsir_data_type get_vsir_resource_data_type(const struct hlsl_type *type) { const struct hlsl_type *format = type->e.resource.format;
@@ -12592,6 +13131,9 @@ static void sm4_generate_vsir_add_dcl_texture(struct hlsl_ctx *ctx, { switch (component_type->sampler_dim) { + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + opcode = VSIR_OP_DCL_RESOURCE_STRUCTURED; + break; case HLSL_SAMPLER_DIM_RAW_BUFFER: opcode = VSIR_OP_DCL_RESOURCE_RAW; break; @@ -12633,13 +13175,11 @@ static void sm4_generate_vsir_add_dcl_texture(struct hlsl_ctx *ctx, vsir_resource->range.last = array_last; vsir_resource->range.space = resource->space;
- vsir_resource->reg.reg.idx[0].offset = resource->id; + vsir_resource->reg.reg.idx[0].offset = resource->id + i; vsir_resource->reg.reg.idx[1].offset = array_first; vsir_resource->reg.reg.idx[2].offset = array_last; vsir_resource->reg.reg.idx_count = 3;
- ins->resource_type = sm4_generate_vsir_get_resource_type(resource->component_type); - if (component_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) { ins->raw = true; @@ -12647,13 +13187,15 @@ static void sm4_generate_vsir_add_dcl_texture(struct hlsl_ctx *ctx, else if (component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) { ins->structured = true; - ins->resource_stride = 4 * component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC]; + ins->resource_stride = hlsl_type_get_packed_size(component_type->e.resource.format); ins->declaration.structured_resource.byte_stride = ins->resource_stride; } else { + ins->declaration.semantic.resource_type = get_vsir_resource_type(resource->component_type->sampler_dim); + for (unsigned int j = 0; j < 4; ++j) - ins->declaration.semantic.resource_data_type[j] = sm4_generate_vsir_get_format_type(component_type); + ins->declaration.semantic.resource_data_type[j] = get_vsir_resource_data_type(component_type);
if (multisampled) ins->declaration.semantic.sample_count = component_type->sample_count; @@ -12784,6 +13326,7 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, }
program->ssa_count = 0; + program->temp_count = 0;
if (version->type == VKD3D_SHADER_TYPE_HULL) generate_vsir_add_program_instruction(ctx, program, @@ -12801,6 +13344,141 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, generate_vsir_scan_global_flags(ctx, program, semantic_vars, func);
program->ssa_count = ctx->ssa_count; + program->temp_count = ctx->temp_count; +} + +static void generate_vsir_descriptors_for_var(struct hlsl_ctx *ctx, struct vsir_program *program, + const struct hlsl_ir_var *var, enum hlsl_regset r, enum vkd3d_shader_descriptor_type type) +{ + unsigned int component_count = hlsl_type_component_count(var->data_type); + + for (unsigned int k = 0; k < component_count; ++k) + { + const struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); + struct vkd3d_shader_register_range range; + struct vkd3d_shader_descriptor_info1 *d; + unsigned int regset_offset; + enum hlsl_regset regset; + uint32_t id; + + if (!hlsl_type_is_resource(component_type)) + continue; + regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); + if (regset != r) + continue; + if (regset_offset > var->regs[r].allocation_size) + continue; + + if (!var->objects_usage[r][regset_offset].used) + continue; + + id = var->regs[r].id + regset_offset; + range.space = var->regs[r].space; + range.first = var->regs[r].index + regset_offset; + /* FIXME: 5.1 arrays. */ + range.last = var->regs[r].index + regset_offset; + + if (type == VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER) + { + if (!(d = vsir_program_add_descriptor(program, type, id, + &range, VKD3D_SHADER_RESOURCE_NONE, VSIR_DATA_UNUSED))) + return; + if (component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) + d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; + } + else + { + if (component_type->class == HLSL_CLASS_SAMPLER) + d = vsir_program_add_descriptor(program, type, id, &range, + get_vsir_resource_type(var->objects_usage[r][regset_offset].sampler_dim), VSIR_DATA_F32); + else + d = vsir_program_add_descriptor(program, type, id, &range, + get_vsir_resource_type(component_type->sampler_dim), + get_vsir_resource_data_type(component_type)); + if (!d) + return; + + if (component_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) + d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER; + else if (component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + d->structure_stride = hlsl_type_get_packed_size(component_type->e.resource.format); + else if (component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + d->sample_count = component_type->sample_count; + + if (type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV && component_type->e.resource.rasteriser_ordered) + d->uav_flags |= VKD3DSUF_RASTERISER_ORDERED_VIEW; + + if (var->objects_usage[r][regset_offset].uav_read) + d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ; + if (var->objects_usage[r][regset_offset].uav_atomics) + d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS; + } + } +} + +static void generate_vsir_descriptors(struct hlsl_ctx *ctx, struct vsir_program *program) +{ + struct vkd3d_shader_register_range range; + struct vkd3d_shader_descriptor_info1 *d; + const struct hlsl_ir_var *var; + + if (program->shader_version.major < 4) + { + uint32_t flat_constant_count = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + const struct hlsl_reg *reg = &var->regs[HLSL_REGSET_NUMERIC]; + + if (var->is_uniform && reg->allocation_size) + flat_constant_count = max(flat_constant_count, reg->id + reg->allocation_size); + + generate_vsir_descriptors_for_var(ctx, program, var, + HLSL_REGSET_SAMPLERS, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER); + generate_vsir_descriptors_for_var(ctx, program, var, + HLSL_REGSET_SAMPLERS, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV); + } + + if (flat_constant_count) + { + range.space = 0; + range.first = range.last = VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER; + if ((d = vsir_program_add_descriptor(program, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, + range.first, &range, VKD3D_SHADER_RESOURCE_BUFFER, VSIR_DATA_U32))) + d->buffer_size = flat_constant_count * 16; + } + } + else + { + struct hlsl_buffer *buffer; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + generate_vsir_descriptors_for_var(ctx, program, var, + HLSL_REGSET_SAMPLERS, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER); + generate_vsir_descriptors_for_var(ctx, program, var, + HLSL_REGSET_TEXTURES, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV); + generate_vsir_descriptors_for_var(ctx, program, var, + HLSL_REGSET_UAVS, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV); + } + + LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!buffer->reg.allocated) + continue; + + range.space = buffer->reg.space; + range.first = buffer->reg.index; + /* FIXME: 5.1 arrays. */ + range.last = buffer->reg.index; + if ((d = vsir_program_add_descriptor(program, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, + buffer->reg.id, &range, VKD3D_SHADER_RESOURCE_BUFFER, VSIR_DATA_U32))) + d->buffer_size = align(buffer->size, 4) * sizeof(float); + } + } + + program->has_descriptor_info = true; }
/* For some reason, for matrices, values from default value initializers end @@ -12915,14 +13593,16 @@ static enum D3D_RESOURCE_RETURN_TYPE sm4_data_type(const struct hlsl_type *type)
static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) { + bool structured = type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER; + switch (type->class) { case HLSL_CLASS_SAMPLER: return D3D_SIT_SAMPLER; case HLSL_CLASS_TEXTURE: - return D3D_SIT_TEXTURE; + return structured ? D3D_SIT_STRUCTURED : D3D_SIT_TEXTURE; case HLSL_CLASS_UAV: - return D3D_SIT_UAV_RWTYPED; + return structured ? D3D_SIT_UAV_RWSTRUCTURED : D3D_SIT_UAV_RWTYPED; default: break; } @@ -12998,7 +13678,8 @@ static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) vkd3d_unreachable(); }
-static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) +static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + struct hlsl_type *type, bool structured) { const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); const char *name = array_type->name ? array_type->name : "<unnamed>"; @@ -13007,7 +13688,10 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b size_t name_offset = 0; size_t i;
- if (type->bytecode_offset) + if (!structured && type->bytecode_offset) + return; + + if (structured && type->packed_bytecode_offset) return;
if (profile->major_version >= 5) @@ -13029,7 +13713,7 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b continue;
field->name_bytecode_offset = put_string(buffer, field->name); - write_sm4_type(ctx, buffer, field->type); + write_sm4_type(ctx, buffer, field->type, structured); ++field_count; }
@@ -13038,15 +13722,29 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b for (i = 0; i < array_type->e.record.field_count; ++i) { struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + unsigned int field_type_offset, offset;
if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) continue;
put_u32(buffer, field->name_bytecode_offset); - put_u32(buffer, field->type->bytecode_offset); - put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float)); + + if (!structured) + field_type_offset = field->type->bytecode_offset; + else + field_type_offset = field->type->packed_bytecode_offset; + put_u32(buffer, field_type_offset); + + if (!structured) + offset = field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float); + else + offset = struct_field_get_packed_offset(array_type, i); + put_u32(buffer, offset); } - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID)); + if (!structured) + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID)); + else + type->packed_bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID)); put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); put_u32(buffer, vkd3d_make_u32(array_size, field_count)); put_u32(buffer, fields_offset); @@ -13054,7 +13752,11 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b else { VKD3D_ASSERT(array_type->class <= HLSL_CLASS_LAST_NUMERIC); - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); + if (!structured) + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); + else + type->packed_bytecode_offset = put_u32(buffer, + vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); put_u32(buffer, vkd3d_make_u32(array_type->e.numeric.dimy, array_type->e.numeric.dimx)); put_u32(buffer, vkd3d_make_u32(array_size, 0)); put_u32(buffer, 1); @@ -13073,9 +13775,9 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef) { uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); - size_t cbuffers_offset, resources_offset, creator_offset, string_offset; - unsigned int cbuffer_count = 0, extern_resources_count, i, j; + size_t buffers_offset, resources_offset, creator_offset, string_offset; size_t cbuffer_position, resource_position, creator_position; + unsigned int buffer_count = 0, extern_resources_count, i, j; const struct hlsl_profile_info *profile = ctx->profile; struct vkd3d_bytecode_buffer buffer = {0}; struct extern_resource *extern_resources; @@ -13097,10 +13799,20 @@ static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rd LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { if (cbuffer->reg.allocated) - ++cbuffer_count; + ++buffer_count; + } + + for (i = 0; i < extern_resources_count; ++i) + { + const struct extern_resource *resource = &extern_resources[i]; + + if (resource->buffer || resource->component_type->sampler_dim != HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + continue; + + ++buffer_count; }
- put_u32(&buffer, cbuffer_count); + put_u32(&buffer, buffer_count); cbuffer_position = put_u32(&buffer, 0); put_u32(&buffer, extern_resources_count); resource_position = put_u32(&buffer, 0); @@ -13141,12 +13853,19 @@ static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rd put_u32(&buffer, sm4_resource_type(resource->component_type)); if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS) { + bool structured = resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER; unsigned int dimx = resource->component_type->e.resource.format->e.numeric.dimx;
put_u32(&buffer, sm4_data_type(resource->component_type)); put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type)); - put_u32(&buffer, ~0u); /* FIXME: multisample count */ - flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; + + if (structured) + put_u32(&buffer, hlsl_type_get_packed_size(resource->component_type->e.resource.format)); + else + put_u32(&buffer, ~0u); /* FIXME: multisample count */ + + if (!structured) + flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; } else { @@ -13175,8 +13894,8 @@ static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rd
/* Buffers. */
- cbuffers_offset = bytecode_align(&buffer); - set_u32(&buffer, cbuffer_position, cbuffers_offset); + buffers_offset = bytecode_align(&buffer); + set_u32(&buffer, cbuffer_position, buffers_offset); LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { unsigned int var_count = 0; @@ -13198,6 +13917,24 @@ static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rd put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); }
+ for (i = 0; i < extern_resources_count; ++i) + { + const struct extern_resource *resource = &extern_resources[i]; + struct hlsl_type *resource_type; + + if (resource->buffer || resource->component_type->sampler_dim != HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + continue; + + resource_type = resource->component_type->e.resource.format; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, 1); /* var count */ + put_u32(&buffer, 0); /* variable offset */ + put_u32(&buffer, hlsl_type_get_packed_size(resource_type)); /* size */ + put_u32(&buffer, 0); /* FIXME: flags */ + put_u32(&buffer, D3D_CT_RESOURCE_BIND_INFO); + } + i = 0; LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { @@ -13205,7 +13942,18 @@ static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rd continue;
string_offset = put_string(&buffer, cbuffer->name); - set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); + set_u32(&buffer, buffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); + } + + for (j = 0; j < extern_resources_count; ++j) + { + const struct extern_resource *resource = &extern_resources[j]; + + if (resource->buffer || resource->component_type->sampler_dim != HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + continue; + + string_offset = put_string(&buffer, resource->name); + set_u32(&buffer, buffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); }
i = 0; @@ -13216,7 +13964,7 @@ static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rd if (!cbuffer->reg.allocated) continue;
- set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); + set_u32(&buffer, buffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start);
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -13255,7 +14003,7 @@ static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rd
string_offset = put_string(&buffer, var->name); set_u32(&buffer, var_offset, string_offset); - write_sm4_type(ctx, &buffer, var->data_type); + write_sm4_type(ctx, &buffer, var->data_type, false); set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset);
if (var->default_values) @@ -13298,6 +14046,42 @@ static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rd } }
+ for (j = 0; j < extern_resources_count; ++j) + { + const struct extern_resource *resource = &extern_resources[j]; + struct hlsl_type *resource_type; + size_t vars_start; + + if (resource->buffer || resource->component_type->sampler_dim != HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + continue; + + resource_type = resource->component_type->e.resource.format; + + vars_start = bytecode_align(&buffer); + + set_u32(&buffer, buffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, 0); /* offset */ + put_u32(&buffer, hlsl_type_get_packed_size(resource_type)); + put_u32(&buffer, D3D_SVF_USED); + put_u32(&buffer, 0); /* type */ + put_u32(&buffer, 0); /* default value */ + + if (profile->major_version >= 5) + { + put_u32(&buffer, ~0u); /* texture start */ + put_u32(&buffer, 0); /* texture count */ + put_u32(&buffer, ~0u); /* sampler start */ + put_u32(&buffer, 0); /* sampler count */ + } + + string_offset = put_string(&buffer, "$Element"); + set_u32(&buffer, vars_start, string_offset); + write_sm4_type(ctx, &buffer, resource_type, true); + set_u32(&buffer, vars_start + 4 * sizeof(uint32_t), resource_type->packed_bytecode_offset); + } + creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); set_u32(&buffer, creator_position, creator_offset);
@@ -13384,7 +14168,7 @@ static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx load = hlsl_block_add_simple_load(ctx, dst, var, loc); cond = hlsl_block_add_unary_expr(ctx, dst, HLSL_OP1_LOGIC_NOT, load, loc);
- if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, loc))) + if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, loc))) return NULL; hlsl_block_add_instr(dst, iff);
@@ -13466,8 +14250,8 @@ static void loop_unrolling_simplify(struct hlsl_ctx *ctx, struct hlsl_block *blo current_index = index_instructions(block, *index); progress |= copy_propagation_transform_block(ctx, block, state);
- progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, block, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, block, NULL); + progress |= replace_ir(ctx, fold_swizzle_chains, block); + progress |= replace_ir(ctx, fold_trivial_swizzles, block); progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, block, NULL); } while (progress);
@@ -13720,7 +14504,99 @@ static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc hlsl_transform_ir(ctx, resolve_loops, block, NULL); }
-static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) +static struct hlsl_ir_node *lower_countbits(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_ir_node *call, *rhs; + struct hlsl_ir_expr *expr; + struct hlsl_ir_var *lhs; + char *body; + + /* Like vkd3d_popcount(). */ + static const char template[] = + "typedef uint%u uintX;\n" + "uintX countbits(uintX v)\n" + "{\n" + " v -= (v >> 1) & 0x55555555;\n" + " v = (v & 0x33333333) + ((v >> 2) & 0x33333333);\n" + " return (((v + (v >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24;\n" + "}\n"; + + if (node->type != HLSL_IR_EXPR) + return NULL; + + expr = hlsl_ir_expr(node); + if (expr->op != HLSL_OP1_COUNTBITS) + return NULL; + + rhs = expr->operands[0].node; + if (!(body = hlsl_sprintf_alloc(ctx, template, hlsl_type_component_count(rhs->data_type)))) + return NULL; + func = hlsl_compile_internal_function(ctx, "countbits", body); + vkd3d_free(body); + if (!func) + return NULL; + + lhs = func->parameters.vars[0]; + hlsl_block_add_simple_store(ctx, block, lhs, rhs); + + if (!(call = hlsl_new_call(ctx, func, &node->loc))) + return NULL; + hlsl_block_add_instr(block, call); + + return hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc); +} + +static struct hlsl_ir_node *lower_ctz(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_ir_node *call, *rhs; + struct hlsl_ir_expr *expr; + struct hlsl_ir_var *lhs; + char *body; + + /* ctz() returns the bit number of the least significant 1-bit. + * Bit numbers count from the least significant bit. */ + static const char template[] = + "typedef uint%u uintX;\n" + "uintX ctz(uintX v)\n" + "{\n" + " uintX c = 31;\n" + " v &= -v;\n" + " c = (v & 0x0000ffff) ? c - 16 : c;\n" + " c = (v & 0x00ff00ff) ? c - 8 : c;\n" + " c = (v & 0x0f0f0f0f) ? c - 4 : c;\n" + " c = (v & 0x33333333) ? c - 2 : c;\n" + " c = (v & 0x55555555) ? c - 1 : c;\n" + " return v ? c : -1;\n" + "}\n"; + + if (node->type != HLSL_IR_EXPR) + return NULL; + + expr = hlsl_ir_expr(node); + if (expr->op != HLSL_OP1_CTZ) + return NULL; + + rhs = expr->operands[0].node; + if (!(body = hlsl_sprintf_alloc(ctx, template, hlsl_type_component_count(rhs->data_type)))) + return false; + func = hlsl_compile_internal_function(ctx, "ctz", body); + vkd3d_free(body); + if (!func) + return NULL; + + lhs = func->parameters.vars[0]; + hlsl_block_add_simple_store(ctx, block, lhs, rhs); + + if (!(call = hlsl_new_call(ctx, func, &node->loc))) + return NULL; + hlsl_block_add_instr(block, call); + + return hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc); +} + +static struct hlsl_ir_node *lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) { struct hlsl_ir_function_decl *func; struct hlsl_ir_node *call, *rhs; @@ -13775,29 +14651,28 @@ static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru expr = hlsl_ir_expr(node);
if (expr->op != HLSL_OP1_F16TOF32) - return false; + return NULL;
rhs = expr->operands[0].node; component_count = hlsl_type_component_count(rhs->data_type);
if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) - return false; + return NULL;
if (!(func = hlsl_compile_internal_function(ctx, "soft_f16tof32", body))) - return false; + return NULL;
lhs = func->parameters.vars[0]; hlsl_block_add_simple_store(ctx, block, lhs, rhs);
if (!(call = hlsl_new_call(ctx, func, &node->loc))) - return false; + return NULL; hlsl_block_add_instr(block, call);
- hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc); - return true; + return hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc); }
-static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) +static struct hlsl_ir_node *lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) { struct hlsl_ir_function_decl *func; struct hlsl_ir_node *call, *rhs; @@ -13835,34 +14710,94 @@ static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru "}\n";
if (node->type != HLSL_IR_EXPR) - return false; + return NULL;
expr = hlsl_ir_expr(node);
if (expr->op != HLSL_OP1_F32TOF16) - return false; + return NULL;
rhs = expr->operands[0].node; component_count = hlsl_type_component_count(rhs->data_type);
if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) - return false; + return NULL;
if (!(func = hlsl_compile_internal_function(ctx, "soft_f32tof16", body))) - return false; + return NULL;
lhs = func->parameters.vars[0]; hlsl_block_add_simple_store(ctx, block, lhs, rhs);
if (!(call = hlsl_new_call(ctx, func, &node->loc))) - return false; + return NULL; hlsl_block_add_instr(block, call);
- hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc); - return true; + return hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc); +} + +static struct hlsl_ir_node *lower_find_msb(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_ir_node *call, *rhs; + struct hlsl_ir_expr *expr; + struct hlsl_ir_var *lhs; + char *body; + + /* For positive numbers, find_msb() returns the bit number of the most + * significant 1-bit. For negative numbers, it returns the bit number of + * the most significant 0-bit. Bit numbers count from the least + * significant bit. */ + static const char template[] = + "typedef %s intX;\n" + "uint%u find_msb(intX v)\n" + "{\n" + " intX c, mask;\n" + " v = v < 0 ? ~v : v;\n" + " mask = v & 0xffff0000;\n" + " v = mask ? mask : v;\n" + " c = mask ? 16 : v ? 0 : -1;\n" + " mask = v & 0xff00ff00;\n" + " v = mask ? mask : v;\n" + " c = mask ? c + 8 : c;\n" + " mask = v & 0xf0f0f0f0;\n" + " v = mask ? mask : v;\n" + " c = mask ? c + 4 : c;\n" + " mask = v & 0xcccccccc;\n" + " v = mask ? mask : v;\n" + " c = mask ? c + 2 : c;\n" + " mask = v & 0xaaaaaaaa;\n" + " v = mask ? mask : v;\n" + " c = mask ? c + 1 : c;\n" + " return c;\n" + "}\n"; + + if (node->type != HLSL_IR_EXPR) + return NULL; + + expr = hlsl_ir_expr(node); + if (expr->op != HLSL_OP1_FIND_MSB) + return NULL; + + rhs = expr->operands[0].node; + if (!(body = hlsl_sprintf_alloc(ctx, template, rhs->data_type->name, hlsl_type_component_count(rhs->data_type)))) + return NULL; + func = hlsl_compile_internal_function(ctx, "find_msb", body); + vkd3d_free(body); + if (!func) + return NULL; + + lhs = func->parameters.vars[0]; + hlsl_block_add_simple_store(ctx, block, lhs, rhs); + + if (!(call = hlsl_new_call(ctx, func, &node->loc))) + return NULL; + hlsl_block_add_instr(block, call); + + return hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc); }
-static bool lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) +static struct hlsl_ir_node *lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) { struct hlsl_ir_function_decl *func; struct hlsl_ir_node *call, *rhs; @@ -13907,12 +14842,12 @@ static bool lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct "}";
if (node->type != HLSL_IR_EXPR) - return false; + return NULL;
expr = hlsl_ir_expr(node);
if (expr->op != HLSL_OP1_ISINF) - return false; + return NULL;
rhs = expr->operands[0].node;
@@ -13927,19 +14862,18 @@ static bool lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct
component_count = hlsl_type_component_count(rhs->data_type); if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) - return false; + return NULL;
if (!(func = hlsl_compile_internal_function(ctx, "isinf", body))) - return false; + return NULL;
hlsl_block_add_simple_store(ctx, block, func->parameters.vars[0], rhs);
if (!(call = hlsl_new_call(ctx, func, &node->loc))) - return false; + return NULL; hlsl_block_add_instr(block, call);
- hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc); - return true; + return hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc); }
static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_vars, struct hlsl_block *body, @@ -13978,22 +14912,25 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v
if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) { - lower_ir(ctx, lower_f16tof32, body); - lower_ir(ctx, lower_f32tof16, body); + replace_ir(ctx, lower_countbits, body); + replace_ir(ctx, lower_ctz, body); + replace_ir(ctx, lower_f16tof32, body); + replace_ir(ctx, lower_f32tof16, body); + replace_ir(ctx, lower_find_msb, body); }
- lower_ir(ctx, lower_isinf, body); + replace_ir(ctx, lower_isinf, body);
lower_return(ctx, entry_func, body, false);
while (hlsl_transform_ir(ctx, lower_calls, body, NULL));
- lower_ir(ctx, lower_complex_casts, body); - lower_ir(ctx, lower_matrix_swizzles, body); - lower_ir(ctx, lower_index_loads, body); + replace_ir(ctx, lower_complex_casts, body); + replace_ir(ctx, lower_matrix_swizzles, body); + replace_ir(ctx, lower_index_loads, body);
- lower_ir(ctx, lower_tgsm_loads, body); - lower_ir(ctx, lower_tgsm_stores, body); + replace_ir(ctx, lower_tgsm_loads, body); + replace_ir(ctx, lower_tgsm_stores, body);
if (entry_func->return_var) { @@ -14153,8 +15090,8 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v } else { - hlsl_transform_ir(ctx, lower_discard_nz, body, NULL); - hlsl_transform_ir(ctx, lower_resource_load_bias, body, NULL); + replace_ir(ctx, lower_discard_nz, body); + replace_ir(ctx, lower_resource_load_bias, body); }
compute_liveness(ctx, body); @@ -14166,9 +15103,9 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v remove_unreachable_code(ctx, body); hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL);
- lower_ir(ctx, lower_nonconstant_vector_derefs, body); - lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_int_dot, body); + replace_ir(ctx, lower_nonconstant_vector_derefs, body); + replace_ir(ctx, lower_casts_to_bool, body); + replace_ir(ctx, lower_int_dot, body);
if (hlsl_version_lt(ctx, 4, 0)) hlsl_transform_ir(ctx, lower_separate_samples, body, NULL); @@ -14180,8 +15117,8 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v progress = vectorize_exprs(ctx, body); compute_liveness(ctx, body); progress |= hlsl_transform_ir(ctx, dce, body, NULL); - progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); + progress |= replace_ir(ctx, fold_swizzle_chains, body); + progress |= replace_ir(ctx, fold_trivial_swizzles, body); progress |= vectorize_stores(ctx, body); } while (progress);
@@ -14209,40 +15146,50 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v
if (profile->major_version < 4) { - while (lower_ir(ctx, lower_nonconstant_array_loads, body)); + while (replace_ir(ctx, lower_nonconstant_array_loads, body));
- lower_ir(ctx, lower_ternary, body); - lower_ir(ctx, lower_int_modulus_sm1, body); - lower_ir(ctx, lower_division, body); + hlsl_transform_ir(ctx, cast_discard_neg_conditions_to_vec4, body, NULL); + + replace_ir(ctx, lower_ternary, body); + replace_ir(ctx, lower_int_modulus_sm1, body); + replace_ir(ctx, lower_division, body); /* Constants casted to float must be folded, and new casts to bool also need to be lowered. */ - hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); - lower_ir(ctx, lower_casts_to_bool, body); - - lower_ir(ctx, lower_casts_to_int, body); - lower_ir(ctx, lower_trunc, body); - lower_ir(ctx, lower_sqrt, body); - lower_ir(ctx, lower_dot, body); - lower_ir(ctx, lower_round, body); - lower_ir(ctx, lower_ceil, body); - lower_ir(ctx, lower_floor, body); - lower_ir(ctx, lower_trig, body); - lower_ir(ctx, lower_comparison_operators, body); - lower_ir(ctx, lower_logic_not, body); + replace_ir(ctx, hlsl_fold_constant_exprs, body); + replace_ir(ctx, lower_casts_to_bool, body); + + replace_ir(ctx, lower_casts_to_int, body); + replace_ir(ctx, lower_trunc, body); + replace_ir(ctx, lower_sqrt, body); + replace_ir(ctx, lower_dot, body); + replace_ir(ctx, lower_round, body); + replace_ir(ctx, lower_ceil, body); + replace_ir(ctx, lower_floor, body); + replace_ir(ctx, lower_trig, body); + replace_ir(ctx, lower_comparison_operators, body); + replace_ir(ctx, lower_logic_not, body); if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) - lower_ir(ctx, lower_slt, body); + replace_ir(ctx, lower_slt, body); else - lower_ir(ctx, lower_cmp, body); + replace_ir(ctx, lower_cmp, body); }
if (profile->major_version < 2) { - lower_ir(ctx, lower_abs, body); + replace_ir(ctx, lower_abs, body); }
- lower_ir(ctx, validate_nonconstant_vector_store_derefs, body); + replace_ir(ctx, validate_nonconstant_vector_store_derefs, body);
hlsl_run_folding_passes(ctx, body);
+ if (profile->major_version < 4) + { + /* Ternary operations can be potentially introduced by hlsl_run_folding_passes(). */ + replace_ir(ctx, lower_ternary, body); + if (ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL) + replace_ir(ctx, lower_cmp, body); + } + do compute_liveness(ctx, body); while (hlsl_transform_ir(ctx, dce, body, NULL)); @@ -14357,6 +15304,8 @@ int hlsl_emit_vsir(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) generate_vsir_signature(ctx, program, ctx->patch_constant_func, &patch_semantic_vars);
+ generate_vsir_descriptors(ctx, program); + if (program->shader_version.major < 4) sm1_generate_ctab(ctx, reflection_data); else diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index d339a06e6c7..627418165bc 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -250,6 +250,118 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, return true; }
+static bool fold_clz(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) +{ + enum hlsl_base_type type = src->node.data_type->e.numeric.type; + unsigned int k, v; + + for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_INT: + v = src->value.u[k].i < 0 ? ~src->value.u[k].u : src->value.u[k].u; + break; + + case HLSL_TYPE_UINT: + v = src->value.u[k].u; + break; + + default: + FIXME("Fold 'clz' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + + dst->u[k].u = v ? vkd3d_log2i(v) ^ 0x1f : ~0u; + } + + return true; +} + +static bool fold_cos(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) +{ + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].f = cosf(src->value.u[k].f); + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].d = cos(src->value.u[k].d); + break; + + default: + FIXME("Fold 'cos' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + +static bool fold_countbits(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) +{ + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_UINT: + dst->u[k].u = vkd3d_popcount(src->value.u[k].u); + break; + + default: + FIXME("Fold 'countbits' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + +static bool fold_ctz(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) +{ + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_UINT: + if (!src->value.u[k].u) + dst->u[k].u = ~0u; + else + dst->u[k].u = vkd3d_ctz(src->value.u[k].u); + break; + + default: + FIXME("Fold 'ctz' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { @@ -478,6 +590,48 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons return true; }
+static bool fold_reinterpret(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) +{ + unsigned int k; + + for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + dst->u[k] = src->value.u[k]; + } + + return true; +} + +static bool fold_round(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) +{ + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + /* Somewhat unfortunately, constant folded round() rounds + * halfway cases towards positive infinity, as opposed to + * nearest even like vsir/TPF round_ne. */ + dst->u[k].f = floorf(src->value.u[k].f + 0.5f); + break; + + default: + FIXME("Fold 'round' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) { @@ -544,6 +698,36 @@ static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons return true; }
+static bool fold_sin(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) +{ + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].f = sinf(src->value.u[k].f); + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].d = sin(src->value.u[k].d); + break; + + default: + FIXME("Fold 'sin' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) { @@ -974,6 +1158,44 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c return true; }
+static bool fold_mad(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) +{ + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src3->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].f = fmaf(src1->value.u[k].f, src2->value.u[k].f, src3->value.u[k].f); + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].d = fma(src1->value.u[k].d, src2->value.u[k].d, src3->value.u[k].d); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_MIN16UINT: + case HLSL_TYPE_UINT: + dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u + src3->value.u[k].u; + break; + + default: + FIXME("Fold 'mad' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { @@ -1212,11 +1434,11 @@ static bool fold_rshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c return true; }
-bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +struct hlsl_ir_node *hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_constant *arg1, *arg2 = NULL, *arg3 = NULL; struct hlsl_constant_value res = {0}; - struct hlsl_ir_node *res_node; struct hlsl_ir_expr *expr; unsigned int i; bool success; @@ -1263,6 +1485,22 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_ceil(ctx, &res, instr->data_type, arg1); break;
+ case HLSL_OP1_CLZ: + success = fold_clz(ctx, &res, instr->data_type, arg1); + break; + + case HLSL_OP1_COS: + success = fold_cos(ctx, &res, instr->data_type, arg1); + break; + + case HLSL_OP1_COUNTBITS: + success = fold_countbits(ctx, &res, instr->data_type, arg1); + break; + + case HLSL_OP1_CTZ: + success = fold_ctz(ctx, &res, instr->data_type, arg1); + break; + case HLSL_OP1_EXP2: success = fold_exp2(ctx, &res, instr->data_type, arg1); break; @@ -1291,6 +1529,14 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_rcp(ctx, &res, instr->data_type, arg1, &instr->loc); break;
+ case HLSL_OP1_REINTERPRET: + success = fold_reinterpret(ctx, &res, instr->data_type, arg1); + break; + + case HLSL_OP1_ROUND: + success = fold_round(ctx, &res, instr->data_type, arg1); + break; + case HLSL_OP1_RSQ: success = fold_rsq(ctx, &res, instr->data_type, arg1, &instr->loc); break; @@ -1299,6 +1545,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_sat(ctx, &res, instr->data_type, arg1); break;
+ case HLSL_OP1_SIN: + success = fold_sin(ctx, &res, instr->data_type, arg1); + break; + case HLSL_OP1_SQRT: success = fold_sqrt(ctx, &res, instr->data_type, arg1, &instr->loc); break; @@ -1373,6 +1623,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_dp2add(ctx, &res, instr->data_type, arg1, arg2, arg3); break;
+ case HLSL_OP3_MAD: + success = fold_mad(ctx, &res, instr->data_type, arg1, arg2, arg3); + break; + case HLSL_OP3_TERNARY: success = fold_ternary(ctx, &res, instr->data_type, arg1, arg2, arg3); break; @@ -1384,103 +1638,32 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, }
if (success) - { - if (!(res_node = hlsl_new_constant(ctx, instr->data_type, &res, &instr->loc))) - return false; - list_add_before(&expr->node.entry, &res_node->entry); - hlsl_replace_node(&expr->node, res_node); - } - return success; -} - -static bool constant_is_zero(struct hlsl_ir_constant *const_arg) -{ - struct hlsl_type *data_type = const_arg->node.data_type; - unsigned int k; - - for (k = 0; k < data_type->e.numeric.dimx; ++k) - { - switch (data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - if (const_arg->value.u[k].f != 0.0f) - return false; - break; + return hlsl_block_add_constant(ctx, block, instr->data_type, &res, &instr->loc);
- case HLSL_TYPE_DOUBLE: - if (const_arg->value.u[k].d != 0.0) - return false; - break; - - case HLSL_TYPE_UINT: - case HLSL_TYPE_INT: - case HLSL_TYPE_BOOL: - case HLSL_TYPE_MIN16UINT: - if (const_arg->value.u[k].u != 0) - return false; - break; - } - } - return true; + return NULL; }
-static bool constant_is_one(struct hlsl_ir_constant *const_arg) -{ - struct hlsl_type *data_type = const_arg->node.data_type; - unsigned int k; - - for (k = 0; k < data_type->e.numeric.dimx; ++k) - { - switch (data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - if (const_arg->value.u[k].f != 1.0f) - return false; - break; - - case HLSL_TYPE_DOUBLE: - if (const_arg->value.u[k].d != 1.0) - return false; - break; - - case HLSL_TYPE_UINT: - case HLSL_TYPE_INT: - case HLSL_TYPE_MIN16UINT: - if (const_arg->value.u[k].u != 1) - return false; - break; - - case HLSL_TYPE_BOOL: - if (const_arg->value.u[k].u != ~0) - return false; - break; - } - } - return true; -} - -bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +struct hlsl_ir_node *hlsl_fold_constant_identities(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { + static const struct hlsl_constant_value zero; struct hlsl_ir_constant *const_arg = NULL; struct hlsl_ir_node *mut_arg = NULL; - struct hlsl_ir_node *res_node; struct hlsl_ir_expr *expr; unsigned int i;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr);
if (instr->data_type->class > HLSL_CLASS_VECTOR) - return false; + return NULL;
/* Verify that the expression has two operands. */ for (i = 0; i < ARRAY_SIZE(expr->operands); ++i) { if (!!expr->operands[i].node != (i < 2)) - return false; + return NULL; }
if (expr->operands[0].node->type == HLSL_IR_CONSTANT) @@ -1495,46 +1678,48 @@ bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *in } else { - return false; + return NULL; }
- res_node = NULL; switch (expr->op) { case HLSL_OP2_ADD: - if (constant_is_zero(const_arg)) - res_node = mut_arg; + if (hlsl_constant_is_zero(const_arg)) + return mut_arg; break;
case HLSL_OP2_MUL: - if (constant_is_one(const_arg)) - res_node = mut_arg; + if (hlsl_constant_is_one(const_arg)) + return mut_arg; break;
case HLSL_OP2_LOGIC_AND: - if (constant_is_zero(const_arg)) - res_node = &const_arg->node; - else if (constant_is_one(const_arg)) - res_node = mut_arg; + if (hlsl_constant_is_zero(const_arg)) + return &const_arg->node; + else if (hlsl_constant_is_one(const_arg)) + return mut_arg; break;
case HLSL_OP2_LOGIC_OR: - if (constant_is_zero(const_arg)) - res_node = mut_arg; - else if (constant_is_one(const_arg)) - res_node = &const_arg->node; + if (hlsl_constant_is_zero(const_arg)) + return mut_arg; + else if (hlsl_constant_is_one(const_arg)) + return &const_arg->node; break;
+ case HLSL_OP2_LESS: + /* x < 0 -> false, if x is unsigned. */ + if (!hlsl_type_is_unsigned_integer(expr->operands[0].node->data_type) + || expr->operands[1].node->type != HLSL_IR_CONSTANT + || !hlsl_constant_is_zero(hlsl_ir_constant(expr->operands[1].node))) + break; + return hlsl_block_add_constant(ctx, block, instr->data_type, &zero, &instr->loc); + default: break; }
- if (res_node) - { - hlsl_replace_node(&expr->node, res_node); - return true; - } - return false; + return NULL; }
static bool is_op_associative(enum hlsl_ir_expr_op op, enum hlsl_base_type type) @@ -1633,23 +1818,23 @@ static struct hlsl_ir_node *collect_exprs(struct hlsl_ctx *ctx, struct hlsl_bloc return hlsl_block_add_expr(ctx, block, opl, operands, instr->data_type, &instr->loc); }
-bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +struct hlsl_ir_node *hlsl_fold_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *arg1, *arg2, *tmp; struct hlsl_ir_expr *expr; enum hlsl_base_type type; enum hlsl_ir_expr_op op; - struct hlsl_block block; bool progress = false;
if (instr->type != HLSL_IR_EXPR) - return false; + return NULL; expr = hlsl_ir_expr(instr);
if (instr->data_type->class > HLSL_CLASS_VECTOR) - return false; + return NULL;
- hlsl_block_init(&block); + if (expr->operands[2].node) + return NULL;
arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; @@ -1657,15 +1842,11 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst op = expr->op;
if (!arg1 || !arg2) - return false; + return NULL;
- if ((tmp = collect_exprs(ctx, &block, instr, op, arg1, arg2))) - { - /* (x OPL a) OPR (x OPL b) -> x OPL (a OPR b) */ - list_move_before(&instr->entry, &block.instrs); - hlsl_replace_node(instr, tmp); - return true; - } + /* (x OPL a) OPR (x OPL b) -> x OPL (a OPR b) */ + if ((tmp = collect_exprs(ctx, block, instr, op, arg1, arg2))) + return tmp;
if (is_op_commutative(op) && arg1->type == HLSL_IR_CONSTANT && arg2->type != HLSL_IR_CONSTANT) { @@ -1688,13 +1869,13 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst { /* (x OP a) OP b -> x OP (a OP b) */ arg1 = e1->operands[0].node; - arg2 = hlsl_block_add_binary_expr(ctx, &block, op, e1->operands[1].node, arg2); + arg2 = hlsl_block_add_binary_expr(ctx, block, op, e1->operands[1].node, arg2); progress = true; } else if (is_op_commutative(op)) { /* (x OP a) OP y -> (x OP y) OP a */ - arg1 = hlsl_block_add_binary_expr(ctx, &block, op, e1->operands[0].node, arg2); + arg1 = hlsl_block_add_binary_expr(ctx, block, op, e1->operands[0].node, arg2); arg2 = e1->operands[1].node; progress = true; } @@ -1704,13 +1885,13 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst && e2->operands[0].node->type != HLSL_IR_CONSTANT && e2->operands[1].node->type == HLSL_IR_CONSTANT) { /* x OP (y OP a) -> (x OP y) OP a */ - arg1 = hlsl_block_add_binary_expr(ctx, &block, op, arg1, e2->operands[0].node); + arg1 = hlsl_block_add_binary_expr(ctx, block, op, arg1, e2->operands[0].node); arg2 = e2->operands[1].node; progress = true; }
if (!progress && e1 && e1->op == op - && (tmp = collect_exprs(ctx, &block, instr, op, e1->operands[1].node, arg2))) + && (tmp = collect_exprs(ctx, block, instr, op, e1->operands[1].node, arg2))) { /* (y OPR (x OPL a)) OPR (x OPL b) -> y OPR (x OPL (a OPR b)) */ arg1 = e1->operands[0].node; @@ -1719,7 +1900,7 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst }
if (!progress && is_op_commutative(op) && e1 && e1->op == op - && (tmp = collect_exprs(ctx, &block, instr, op, e1->operands[0].node, arg2))) + && (tmp = collect_exprs(ctx, block, instr, op, e1->operands[0].node, arg2))) { /* ((x OPL a) OPR y) OPR (x OPL b) -> (x OPL (a OPR b)) OPR y */ arg1 = tmp; @@ -1728,7 +1909,7 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst }
if (!progress && e2 && e2->op == op - && (tmp = collect_exprs(ctx, &block, instr, op, arg1, e2->operands[0].node))) + && (tmp = collect_exprs(ctx, block, instr, op, arg1, e2->operands[0].node))) { /* (x OPL a) OPR ((x OPL b) OPR y) -> (x OPL (a OPR b)) OPR y */ arg1 = tmp; @@ -1737,7 +1918,7 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst }
if (!progress && is_op_commutative(op) && e2 && e2->op == op - && (tmp = collect_exprs(ctx, &block, instr, op, arg1, e2->operands[1].node))) + && (tmp = collect_exprs(ctx, block, instr, op, arg1, e2->operands[1].node))) { /* (x OPL a) OPR (y OPR (x OPL b)) -> (x OPL (a OPR b)) OPR y */ arg1 = tmp; @@ -1749,39 +1930,30 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst if (progress) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2}; - struct hlsl_ir_node *res;
- res = hlsl_block_add_expr(ctx, &block, op, operands, instr->data_type, &instr->loc); - - list_move_before(&instr->entry, &block.instrs); - hlsl_replace_node(instr, res); + return hlsl_block_add_expr(ctx, block, op, operands, instr->data_type, &instr->loc); }
- return progress; + return NULL; }
-bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +struct hlsl_ir_node *hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_constant_value value; struct hlsl_ir_swizzle *swizzle; struct hlsl_ir_constant *src; - struct hlsl_ir_node *dst; unsigned int i;
if (instr->type != HLSL_IR_SWIZZLE) - return false; + return NULL; swizzle = hlsl_ir_swizzle(instr); if (swizzle->val.node->type != HLSL_IR_CONSTANT) - return false; + return NULL; src = hlsl_ir_constant(swizzle->val.node);
for (i = 0; i < swizzle->node.data_type->e.numeric.dimx; ++i) value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->u.vector, i)];
- if (!(dst = hlsl_new_constant(ctx, instr->data_type, &value, &instr->loc))) - return false; - - list_add_before(&swizzle->node.entry, &dst->entry); - hlsl_replace_node(&swizzle->node, dst); - return true; + return hlsl_block_add_constant(ctx, block, instr->data_type, &value, &instr->loc); } diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index 23e059a3490..6a1c5303eb4 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -27,8 +27,55 @@ struct vsir_transformation_context uint64_t config_flags; const struct vkd3d_shader_compile_info *compile_info; struct vkd3d_shader_message_context *message_context; + struct vkd3d_shader_location null_location; + bool progress; +}; + +static void vsir_transformation_context_init(struct vsir_transformation_context *ctx, + struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context) +{ + *ctx = (struct vsir_transformation_context) + { + .result = VKD3D_OK, + .program = program, + .config_flags = config_flags, + .compile_info = compile_info, + .message_context = message_context, + .null_location = {.source_name = compile_info->source_name}, + }; };
+const char *vsir_data_type_get_name(enum vsir_data_type t, const char *error) +{ + static const char * const names[] = + { + [VSIR_DATA_BOOL ] = "bool", + [VSIR_DATA_F16 ] = "f16", + [VSIR_DATA_F32 ] = "f32", + [VSIR_DATA_F64 ] = "f64", + [VSIR_DATA_I8 ] = "i8", + [VSIR_DATA_I16 ] = "i16", + [VSIR_DATA_I32 ] = "i32", + [VSIR_DATA_I64 ] = "i64", + [VSIR_DATA_U8 ] = "u8", + [VSIR_DATA_U16 ] = "u16", + [VSIR_DATA_U32 ] = "u32", + [VSIR_DATA_U64 ] = "u64", + [VSIR_DATA_SNORM ] = "snorm", + [VSIR_DATA_UNORM ] = "unorm", + [VSIR_DATA_OPAQUE ] = "opaque", + [VSIR_DATA_MIXED ] = "mixed", + [VSIR_DATA_CONTINUED] = "<continued>", + [VSIR_DATA_UNUSED ] = "<unused>", + }; + + if ((size_t)t < ARRAY_SIZE(names)) + return names[t] ? names[t] : error; + + return error; +} + const char *vsir_opcode_get_name(enum vkd3d_shader_opcode op, const char *error) { static const char * const names[] = @@ -252,6 +299,7 @@ const char *vsir_opcode_get_name(enum vkd3d_shader_opcode op, const char *error) [VSIR_OP_MOVC ] = "movc", [VSIR_OP_MSAD ] = "msad", [VSIR_OP_MUL ] = "mul", + [VSIR_OP_NEG ] = "neg", [VSIR_OP_NEO ] = "ne_ord", [VSIR_OP_NEU ] = "ne", [VSIR_OP_NOP ] = "nop", @@ -290,6 +338,7 @@ const char *vsir_opcode_get_name(enum vkd3d_shader_opcode op, const char *error) [VSIR_OP_SAMPLE_LOD ] = "sample_l", [VSIR_OP_SAMPLE_LOD_S ] = "sample_l_s", [VSIR_OP_SAMPLE_POS ] = "sample_pos", + [VSIR_OP_SATURATE ] = "saturate", [VSIR_OP_SETP ] = "setp", [VSIR_OP_SGE ] = "sge", [VSIR_OP_SGN ] = "sgn", @@ -373,6 +422,177 @@ const char *vsir_opcode_get_name(enum vkd3d_shader_opcode op, const char *error) return error; }
+static struct vkd3d_shader_param_node *shader_param_allocator_node_create( + struct vkd3d_shader_param_allocator *allocator) +{ + struct vkd3d_shader_param_node *node; + + if (!(node = vkd3d_malloc(offsetof(struct vkd3d_shader_param_node, param[allocator->count * allocator->stride])))) + return NULL; + node->next = NULL; + + return node; +} + +static void shader_param_allocator_init(struct vkd3d_shader_param_allocator *allocator, size_t count, size_t stride) +{ + allocator->count = max(count, MAX_REG_OUTPUT); + allocator->stride = stride; + allocator->head = NULL; + allocator->current = NULL; + allocator->index = allocator->count; +} + +static void shader_param_allocator_destroy(struct vkd3d_shader_param_allocator *allocator) +{ + struct vkd3d_shader_param_node *current = allocator->head; + + while (current) + { + struct vkd3d_shader_param_node *next = current->next; + vkd3d_free(current); + current = next; + } +} + +void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, size_t count) +{ + void *params; + + if (!allocator->current || count > allocator->count - allocator->index) + { + struct vkd3d_shader_param_node *next; + + allocator->count = max(allocator->count, count); + if (!(next = shader_param_allocator_node_create(allocator))) + return NULL; + if (allocator->current) + allocator->current->next = next; + else + allocator->head = next; + allocator->current = next; + allocator->index = 0; + } + + params = &allocator->current->param[allocator->index * allocator->stride]; + allocator->index += count; + + return params; +} + +static bool shader_instruction_array_reserve(struct vkd3d_shader_instruction_array *array, size_t reserve) +{ + if (!vkd3d_array_reserve((void **)&array->elements, &array->capacity, reserve, sizeof(*array->elements))) + { + ERR("Failed to allocate instructions.\n"); + return false; + } + + return true; +} + +bool shader_instruction_array_insert_at(struct vkd3d_shader_instruction_array *array, size_t idx, size_t count) +{ + VKD3D_ASSERT(idx <= array->count); + + if (!shader_instruction_array_reserve(array, array->count + count)) + return false; + + memmove(&array->elements[idx + count], &array->elements[idx], (array->count - idx) * sizeof(*array->elements)); + memset(&array->elements[idx], 0, count * sizeof(*array->elements)); + array->count += count; + + return true; +} + +struct vkd3d_shader_instruction *shader_instruction_array_append(struct vkd3d_shader_instruction_array *array) +{ + if (!shader_instruction_array_insert_at(array, array->count, 1)) + return NULL; + + return &array->elements[array->count - 1]; +} + +bool vsir_program_add_icb(struct vsir_program *program, struct vkd3d_shader_immediate_constant_buffer *icb) +{ + if (!vkd3d_array_reserve((void **)&program->icbs, &program->icb_capacity, + program->icb_count + 1, sizeof(*program->icbs))) + return false; + + program->icbs[program->icb_count++] = icb; + + return true; +} + +static struct vkd3d_shader_src_param *vsir_program_clone_src_params( + struct vsir_program *program, const struct vkd3d_shader_src_param *params, size_t count); + +static bool shader_register_clone_relative_addresses(struct vkd3d_shader_register *reg, struct vsir_program *program) +{ + size_t i; + + for (i = 0; i < reg->idx_count; ++i) + { + if (!reg->idx[i].rel_addr) + continue; + + if (!(reg->idx[i].rel_addr = vsir_program_clone_src_params(program, reg->idx[i].rel_addr, 1))) + return false; + } + + return true; +} + +static struct vkd3d_shader_dst_param *vsir_program_clone_dst_params( + struct vsir_program *program, const struct vkd3d_shader_dst_param *params, size_t count) +{ + struct vkd3d_shader_dst_param *dst_params; + size_t i; + + if (!(dst_params = vsir_program_get_dst_params(program, count))) + return NULL; + + memcpy(dst_params, params, count * sizeof(*params)); + for (i = 0; i < count; ++i) + { + if (!shader_register_clone_relative_addresses(&dst_params[i].reg, program)) + return NULL; + } + + return dst_params; +} + +static struct vkd3d_shader_src_param *vsir_program_clone_src_params( + struct vsir_program *program, const struct vkd3d_shader_src_param *params, size_t count) +{ + struct vkd3d_shader_src_param *src_params; + size_t i; + + if (!(src_params = vsir_program_get_src_params(program, count))) + return NULL; + + memcpy(src_params, params, count * sizeof(*params)); + for (i = 0; i < count; ++i) + { + if (!shader_register_clone_relative_addresses(&src_params[i].reg, program)) + return NULL; + } + + return src_params; +} + +static void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *array) +{ + vkd3d_free(array->elements); +} + +static bool shader_instruction_array_init(struct vkd3d_shader_instruction_array *array, size_t reserve) +{ + memset(array, 0, sizeof(*array)); + + return shader_instruction_array_reserve(array, reserve); +} + static int convert_parameter_info(const struct vkd3d_shader_compile_info *compile_info, unsigned int *ret_count, const struct vkd3d_shader_parameter1 **ret_parameters) { @@ -444,6 +664,11 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c program->shader_version = *version; program->cf_type = cf_type; program->normalisation_level = normalisation_level; + + /* Size the parameter initial allocations so they are large enough for most shaders. The + * code path for chained allocations will be tested if a few shaders need to use it. */ + shader_param_allocator_init(&program->dst_params, reserve - reserve / 8u, sizeof(struct vkd3d_shader_dst_param)); + shader_param_allocator_init(&program->src_params, reserve * 2u, sizeof(struct vkd3d_shader_src_param)); if (!shader_instruction_array_init(&program->instructions, reserve)) { if (program->free_parameters) @@ -471,6 +696,13 @@ void vsir_program_cleanup(struct vsir_program *program) shader_signature_cleanup(&program->output_signature); shader_signature_cleanup(&program->patch_constant_signature); vkd3d_shader_free_scan_descriptor_info1(&program->descriptors); + shader_param_allocator_destroy(&program->src_params); + shader_param_allocator_destroy(&program->dst_params); + for (i = 0; i < program->icb_count; ++i) + { + vkd3d_free(program->icbs[i]); + } + vkd3d_free(program->icbs); }
const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( @@ -517,6 +749,38 @@ bool vsir_signature_find_sysval(const struct shader_signature *signature, return false; }
+struct vkd3d_shader_descriptor_info1 *vsir_program_add_descriptor(struct vsir_program *program, + enum vkd3d_shader_descriptor_type type, unsigned int register_id, + const struct vkd3d_shader_register_range *range, + enum vkd3d_shader_resource_type resource_type, enum vsir_data_type resource_data_type) +{ + struct vkd3d_shader_scan_descriptor_info1 *info = &program->descriptors; + struct vkd3d_shader_descriptor_info1 *d; + + if (!info) + return NULL; + + if (!vkd3d_array_reserve((void **)&info->descriptors, &program->descriptors_size, + info->descriptor_count + 1, sizeof(*info->descriptors))) + { + ERR("Failed to allocate descriptor info.\n"); + return NULL; + } + + d = &info->descriptors[info->descriptor_count]; + memset(d, 0, sizeof(*d)); + d->type = type; + d->register_id = register_id; + d->register_space = range->space; + d->register_index = range->first; + d->resource_type = resource_type; + d->resource_data_type = resource_data_type; + d->count = (range->last == ~0u) ? ~0u : range->last - range->first + 1; + ++info->descriptor_count; + + return d; +} + const char *debug_vsir_writemask(unsigned int writemask) { static const char components[] = {'x', 'y', 'z', 'w'}; @@ -796,16 +1060,15 @@ static void dst_param_init_output(struct vkd3d_shader_dst_param *dst, dst->write_mask = write_mask; }
-void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, - enum vkd3d_shader_opcode opcode) +void vsir_instruction_init(struct vkd3d_shader_instruction *ins, + const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode opcode) { - memset(ins, 0, sizeof(*ins)); - ins->location = *location; - ins->opcode = opcode; - ins->resource_data_type[0] = VSIR_DATA_F32; - ins->resource_data_type[1] = VSIR_DATA_F32; - ins->resource_data_type[2] = VSIR_DATA_F32; - ins->resource_data_type[3] = VSIR_DATA_F32; + *ins = (struct vkd3d_shader_instruction) + { + .location = *location, + .opcode = opcode, + .resource_data_type = {VSIR_DATA_F32, VSIR_DATA_F32, VSIR_DATA_F32, VSIR_DATA_F32}, + }; }
bool vsir_instruction_init_with_params(struct vsir_program *program, @@ -857,11 +1120,19 @@ static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instr || opcode == VSIR_OP_HS_DECLS; }
-static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) +/* NOTE: Immediate constant buffers are not cloned, so the source must not be destroyed while the + * destination is in use. This seems like a reasonable requirement given how this is currently used. */ +static bool vsir_program_iterator_clone_instruction(struct vsir_program *program, + struct vsir_program_iterator *dst_it, const struct vkd3d_shader_instruction *src) { - struct vkd3d_shader_location location = ins->location; + struct vkd3d_shader_instruction *dst = vsir_program_iterator_current(dst_it); + + *dst = *src; + + if (dst->dst_count && !(dst->dst = vsir_program_clone_dst_params(program, dst->dst, dst->dst_count))) + return false;
- vsir_instruction_init(ins, &location, VSIR_OP_NOP); + return !dst->src_count || (dst->src = vsir_program_clone_src_params(program, dst->src, dst->src_count)); }
static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, @@ -952,11 +1223,12 @@ static enum vkd3d_result vsir_program_normalize_addr(struct vsir_program *progra ins2->dst[0].reg.idx[0].offset = tmp_idx; ins2->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ins2->dst[0].write_mask = ins->dst[0].write_mask; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL;
vsir_register_init(&ins2->src[0].reg, VKD3DSPR_TEMP, VSIR_DATA_F32, 1); ins2->src[0].reg.idx[0].offset = tmp_idx; ins2->src[0].reg.dimension = VSIR_DIMENSION_VEC4; - ins2->src[0].swizzle = vsir_swizzle_from_writemask(ins2->dst[0].write_mask); + ins2->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; }
for (k = 0; k < ins->src_count; ++k) @@ -1069,6 +1341,7 @@ static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program ins->src[1].reg.u.immconst_f32[1] = 0.0f; ins->src[1].reg.u.immconst_f32[2] = 0.0f; ins->src[1].reg.u.immconst_f32[3] = 0.0f; + ins->src[1].swizzle = VKD3D_SHADER_NO_SWIZZLE;
/* tmp.x = tmp.x || tmp.y */ /* tmp.x = tmp.x || tmp.z */ @@ -1120,7 +1393,7 @@ static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program * not fused for "precise" operations." * Windows drivers seem to conform with the latter, for SM 4-5 and SM 6. */ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *program, - struct vsir_program_iterator *it, unsigned int *tmp_idx) + struct vsir_program_iterator *it) { struct vkd3d_shader_instruction *mad, *mul_ins, *add_ins; struct vkd3d_shader_dst_param *mul_dst; @@ -1133,9 +1406,6 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro if (!vsir_program_iterator_insert_after(it, 1)) return VKD3D_ERROR_OUT_OF_MEMORY;
- if (*tmp_idx == ~0u) - *tmp_idx = program->temp_count++; - mul_ins = vsir_program_iterator_current(it); add_ins = vsir_program_iterator_next(it);
@@ -1150,14 +1420,9 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro mul_dst = mul_ins->dst; *add_ins->dst = *mul_dst;
- mul_dst->modifiers = 0; - vsir_register_init(&mul_dst->reg, VKD3DSPR_TEMP, mul_ins->src[0].reg.data_type, 1); - mul_dst->reg.dimension = add_ins->dst->reg.dimension; - mul_dst->reg.idx[0].offset = *tmp_idx; + dst_param_init_ssa(mul_dst, program->ssa_count, mul_ins->src[0].reg.data_type, VSIR_DIMENSION_VEC4); + src_param_init_ssa(&add_ins->src[0], program->ssa_count++, mul_ins->src[0].reg.data_type, VSIR_DIMENSION_VEC4);
- add_ins->src[0].reg = mul_dst->reg; - add_ins->src[0].swizzle = vsir_swizzle_from_writemask(mul_dst->write_mask); - add_ins->src[0].modifiers = 0; add_ins->src[1] = mul_ins->src[2];
return VKD3D_OK; @@ -1184,8 +1449,8 @@ static enum vkd3d_result vsir_program_lower_imul(struct vsir_program *program, static enum vkd3d_result vsir_program_lower_udiv(struct vsir_program *program, struct vsir_program_iterator *it, struct vsir_transformation_context *ctx) { + unsigned int count = 3, src0_id, src1_id, divisor_id; struct vkd3d_shader_instruction *udiv, *ins, *mov; - unsigned int count = 2;
udiv = vsir_program_iterator_current(it);
@@ -1199,9 +1464,9 @@ static enum vkd3d_result vsir_program_lower_udiv(struct vsir_program *program, }
if (udiv->dst[0].reg.type != VKD3DSPR_NULL) - ++count; + count += 2; if (udiv->dst[1].reg.type != VKD3DSPR_NULL) - ++count; + count += 2;
if (!vsir_program_iterator_insert_after(it, count)) return VKD3D_ERROR_OUT_OF_MEMORY; @@ -1213,14 +1478,33 @@ static enum vkd3d_result vsir_program_lower_udiv(struct vsir_program *program, return VKD3D_ERROR_OUT_OF_MEMORY;
mov->src[0] = udiv->src[0]; - dst_param_init_ssa(&mov->dst[0], program->ssa_count, udiv->src[0].reg.data_type, udiv->src[0].reg.dimension); + src0_id = program->ssa_count++; + dst_param_init_ssa(&mov->dst[0], src0_id, udiv->src[0].reg.data_type, udiv->src[0].reg.dimension);
mov = vsir_program_iterator_next(it); if (!(vsir_instruction_init_with_params(program, mov, &udiv->location, VSIR_OP_MOV, 1, 1))) return VKD3D_ERROR_OUT_OF_MEMORY;
mov->src[0] = udiv->src[1]; - dst_param_init_ssa(&mov->dst[0], program->ssa_count + 1, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); + src1_id = program->ssa_count++; + dst_param_init_ssa(&mov->dst[0], src1_id, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); + + mov = vsir_program_iterator_next(it); + if (!(vsir_instruction_init_with_params(program, mov, &udiv->location, VSIR_OP_MOVC, 1, 3))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + src_param_init_ssa(&mov->src[0], src1_id, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); + src_param_init_ssa(&mov->src[1], src1_id, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); + vsir_register_init(&mov->src[2].reg, VKD3DSPR_IMMCONST, VSIR_DATA_U32, 0); + mov->src[2].reg.dimension = udiv->src[1].reg.dimension; + mov->src[2].reg.u.immconst_u32[0] = 1; + mov->src[2].reg.u.immconst_u32[1] = 1; + mov->src[2].reg.u.immconst_u32[2] = 1; + mov->src[2].reg.u.immconst_u32[3] = 1; + if (mov->src[2].reg.dimension == VSIR_DIMENSION_VEC4) + mov->src[2].swizzle = VKD3D_SHADER_NO_SWIZZLE; + divisor_id = program->ssa_count++; + dst_param_init_ssa(&mov->dst[0], divisor_id, mov->src[1].reg.data_type, mov->src[1].reg.dimension);
if (udiv->dst[0].reg.type != VKD3DSPR_NULL) { @@ -1231,11 +1515,30 @@ static enum vkd3d_result vsir_program_lower_udiv(struct vsir_program *program,
ins->flags = udiv->flags;
- src_param_init_ssa(&ins->src[0], program->ssa_count, - udiv->src[0].reg.data_type, udiv->src[0].reg.dimension); - src_param_init_ssa(&ins->src[1], program->ssa_count + 1, - udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); + src_param_init_ssa(&ins->src[0], src0_id, udiv->src[0].reg.data_type, udiv->src[0].reg.dimension); + src_param_init_ssa(&ins->src[1], divisor_id, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); + dst_param_init_ssa(&ins->dst[0], program->ssa_count, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); + + /* Like its TPF equivalent, division by zero is well-defined for + * VSIR_OP_UDIV, and returns UINT_MAX. Division by zero is undefined + * for VSIR_OP_UDIV_SIMPLE and VSIR_OP_UREM, so handle it here. */ + ins = vsir_program_iterator_next(it); + if (!(vsir_instruction_init_with_params(program, ins, &udiv->location, VSIR_OP_MOVC, 1, 3))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + src_param_init_ssa(&ins->src[0], src1_id, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); + src_param_init_ssa(&ins->src[1], program->ssa_count, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); + vsir_register_init(&ins->src[2].reg, VKD3DSPR_IMMCONST, VSIR_DATA_U32, 0); + ins->src[2].reg.dimension = udiv->src[1].reg.dimension; + ins->src[2].reg.u.immconst_u32[0] = UINT_MAX; + ins->src[2].reg.u.immconst_u32[1] = UINT_MAX; + ins->src[2].reg.u.immconst_u32[2] = UINT_MAX; + ins->src[2].reg.u.immconst_u32[3] = UINT_MAX; + if (ins->src[2].reg.dimension == VSIR_DIMENSION_VEC4) + ins->src[2].swizzle = VKD3D_SHADER_NO_SWIZZLE; ins->dst[0] = udiv->dst[0]; + + ++program->ssa_count; }
if (udiv->dst[1].reg.type != VKD3DSPR_NULL) @@ -1247,15 +1550,30 @@ static enum vkd3d_result vsir_program_lower_udiv(struct vsir_program *program,
ins->flags = udiv->flags;
- src_param_init_ssa(&ins->src[0], program->ssa_count, - udiv->src[0].reg.data_type, udiv->src[0].reg.dimension); - src_param_init_ssa(&ins->src[1], program->ssa_count + 1, - udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); + src_param_init_ssa(&ins->src[0], src0_id, udiv->src[0].reg.data_type, udiv->src[0].reg.dimension); + src_param_init_ssa(&ins->src[1], divisor_id, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); + dst_param_init_ssa(&ins->dst[0], program->ssa_count, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); + + ins = vsir_program_iterator_next(it); + if (!(vsir_instruction_init_with_params(program, ins, &udiv->location, VSIR_OP_MOVC, 1, 3))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + src_param_init_ssa(&ins->src[0], src1_id, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); + src_param_init_ssa(&ins->src[1], program->ssa_count, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); + vsir_register_init(&ins->src[2].reg, VKD3DSPR_IMMCONST, VSIR_DATA_U32, 0); + ins->src[2].reg.dimension = udiv->src[1].reg.dimension; + ins->src[2].reg.u.immconst_u32[0] = UINT_MAX; + ins->src[2].reg.u.immconst_u32[1] = UINT_MAX; + ins->src[2].reg.u.immconst_u32[2] = UINT_MAX; + ins->src[2].reg.u.immconst_u32[3] = UINT_MAX; + if (ins->src[2].reg.dimension == VSIR_DIMENSION_VEC4) + ins->src[2].swizzle = VKD3D_SHADER_NO_SWIZZLE; ins->dst[0] = udiv->dst[1]; + + ++program->ssa_count; }
vkd3d_shader_instruction_make_nop(udiv); - program->ssa_count += 2;
return VKD3D_OK; } @@ -1410,7 +1728,8 @@ static enum vkd3d_result vsir_program_lower_texcrd(struct vsir_program *program, static enum vkd3d_result vsir_program_lower_texld_sm1(struct vsir_program *program, struct vkd3d_shader_instruction *ins, struct vkd3d_shader_message_context *message_context) { - unsigned int idx = ins->src[0].reg.idx[0].offset; + const struct vkd3d_shader_descriptor_info1 *sampler; + unsigned int idx = ins->dst[0].reg.idx[0].offset; struct vkd3d_shader_src_param *srcs;
/* texld DST, t# -> sample DST, t#, resource#, sampler# */ @@ -1422,7 +1741,7 @@ static enum vkd3d_result vsir_program_lower_texld_sm1(struct vsir_program *progr return VKD3D_ERROR_NOT_IMPLEMENTED; }
- if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) + if (!(srcs = vsir_program_get_src_params(program, 4))) return VKD3D_ERROR_OUT_OF_MEMORY;
/* Note we run before I/O normalization. */ @@ -1430,9 +1749,26 @@ static enum vkd3d_result vsir_program_lower_texld_sm1(struct vsir_program *progr vsir_src_param_init_resource(&srcs[1], idx, idx); vsir_src_param_init_sampler(&srcs[2], idx, idx);
- ins->opcode = VSIR_OP_SAMPLE; - ins->src = srcs; - ins->src_count = 3; + sampler = vkd3d_shader_find_descriptor(&program->descriptors, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, idx); + if (sampler->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE) + { + enum vkd3d_shader_swizzle_component ref = vsir_swizzle_get_component(srcs[0].swizzle, 2); + + ins->opcode = VSIR_OP_SAMPLE_C; + ins->src = srcs; + ins->src_count = 4; + + srcs[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + + srcs[3] = srcs[0]; + srcs[3].swizzle = vkd3d_shader_create_swizzle(ref, ref, ref, ref); + } + else + { + ins->opcode = VSIR_OP_SAMPLE; + ins->src = srcs; + ins->src_count = 3; + }
return VKD3D_OK; } @@ -1492,20 +1828,35 @@ static enum vkd3d_result vsir_program_lower_texldp(struct vsir_program *program, static enum vkd3d_result vsir_program_lower_texld(struct vsir_program *program, struct vkd3d_shader_instruction *tex, struct vkd3d_shader_message_context *message_context) { + const struct vkd3d_shader_descriptor_info1 *sampler; unsigned int idx = tex->src[1].reg.idx[0].offset; struct vkd3d_shader_src_param *srcs;
VKD3D_ASSERT(tex->src[1].reg.idx_count == 1); VKD3D_ASSERT(!tex->src[1].reg.idx[0].rel_addr);
- if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 4))) + if (!(srcs = vsir_program_get_src_params(program, 4))) return VKD3D_ERROR_OUT_OF_MEMORY;
srcs[0] = tex->src[0]; vsir_src_param_init_resource(&srcs[1], idx, idx); vsir_src_param_init_sampler(&srcs[2], idx, idx);
- if (!tex->flags) + sampler = vkd3d_shader_find_descriptor(&program->descriptors, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, idx); + if (sampler->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE) + { + enum vkd3d_shader_swizzle_component ref = vsir_swizzle_get_component(srcs[0].swizzle, 2); + + tex->opcode = VSIR_OP_SAMPLE_C; + tex->src = srcs; + tex->src_count = 4; + + srcs[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + + srcs[3] = srcs[0]; + srcs[3].swizzle = vkd3d_shader_create_swizzle(ref, ref, ref, ref); + } + else if (!tex->flags) { tex->opcode = VSIR_OP_SAMPLE; tex->src = srcs; @@ -1541,7 +1892,7 @@ static enum vkd3d_result vsir_program_lower_texldd(struct vsir_program *program, VKD3D_ASSERT(texldd->src[1].reg.idx_count == 1); VKD3D_ASSERT(!texldd->src[1].reg.idx[0].rel_addr);
- if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5))) + if (!(srcs = vsir_program_get_src_params(program, 5))) return VKD3D_ERROR_OUT_OF_MEMORY;
srcs[0] = texldd->src[0]; @@ -1567,7 +1918,7 @@ static enum vkd3d_result vsir_program_lower_texldl(struct vsir_program *program, VKD3D_ASSERT(texldl->src[1].reg.idx_count == 1); VKD3D_ASSERT(!texldl->src[1].reg.idx[0].rel_addr);
- if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 4))) + if (!(srcs = vsir_program_get_src_params(program, 4))) return VKD3D_ERROR_OUT_OF_MEMORY;
srcs[0] = texldl->src[0]; @@ -1585,211 +1936,380 @@ static enum vkd3d_result vsir_program_lower_texldl(struct vsir_program *program, return VKD3D_OK; }
-static enum vkd3d_result vsir_program_lower_dcl_input(struct vsir_program *program, - struct vkd3d_shader_instruction *ins, struct vsir_transformation_context *ctx) +static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, struct vkd3d_shader_instruction *ins) { - switch (ins->declaration.dst.reg.type) + const struct vkd3d_shader_descriptor_info1 *sampler; + unsigned int idx = ins->dst[0].reg.idx[0].offset; + struct vkd3d_shader_src_param *srcs; + + /* tex t# -> sample t#, t#, resource#, sampler# + * Note that the t# destination will subsequently be turned into a temp. */ + + /* We run before I/O normalization. */ + VKD3D_ASSERT(program->normalisation_level < VSIR_NORMALISED_SM6); + + if (!(srcs = vsir_program_get_src_params(program, 4))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_src_param_init(&srcs[0], VKD3DSPR_TEXTURE, VSIR_DATA_F32, 1); + srcs[0].reg.idx[0].offset = idx; + srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; + srcs[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; + + vsir_src_param_init_resource(&srcs[1], idx, idx); + vsir_src_param_init_sampler(&srcs[2], idx, idx); + + sampler = vkd3d_shader_find_descriptor(&program->descriptors, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, idx); + if (sampler->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE) { - case VKD3DSPR_INPUT: - case VKD3DSPR_OUTPUT: - case VKD3DSPR_PATCHCONST: - case VKD3DSPR_INCONTROLPOINT: - case VKD3DSPR_OUTCONTROLPOINT: - break; + ins->opcode = VSIR_OP_SAMPLE_C; + ins->src = srcs; + ins->src_count = 4;
- case VKD3DSPR_PRIMID: - case VKD3DSPR_FORKINSTID: - case VKD3DSPR_JOININSTID: - case VKD3DSPR_THREADID: - case VKD3DSPR_THREADGROUPID: - case VKD3DSPR_LOCALTHREADID: - case VKD3DSPR_LOCALTHREADINDEX: - case VKD3DSPR_COVERAGE: - case VKD3DSPR_TESSCOORD: - case VKD3DSPR_OUTPOINTID: - case VKD3DSPR_GSINSTID: - case VKD3DSPR_WAVELANECOUNT: - case VKD3DSPR_WAVELANEINDEX: - bitmap_set(program->io_dcls, ins->declaration.dst.reg.type); - break; + srcs[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
- default: - vkd3d_shader_error(ctx->message_context, &ins->location, - VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Internal compiler error: invalid register type %#x for DCL_INPUT.", - ins->declaration.dst.reg.type); - return VKD3D_ERROR; + srcs[3] = srcs[0]; + srcs[3].swizzle = VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z); + } + else + { + ins->opcode = VSIR_OP_SAMPLE; + ins->src = srcs; + ins->src_count = 3; }
return VKD3D_OK; }
-static enum vkd3d_result vsir_program_lower_dcl_output(struct vsir_program *program, - struct vkd3d_shader_instruction *ins, struct vsir_transformation_context *ctx) +static enum vkd3d_result vsir_program_lower_texcoord(struct vsir_program *program, + struct vkd3d_shader_instruction *ins) { - switch (ins->declaration.dst.reg.type) - { - case VKD3DSPR_INPUT: - case VKD3DSPR_OUTPUT: - case VKD3DSPR_PATCHCONST: - case VKD3DSPR_INCONTROLPOINT: - case VKD3DSPR_OUTCONTROLPOINT: - break; + unsigned int idx = ins->dst[0].reg.idx[0].offset; + struct vkd3d_shader_src_param *srcs;
- case VKD3DSPR_DEPTHOUT: - case VKD3DSPR_SAMPLEMASK: - case VKD3DSPR_DEPTHOUTGE: - case VKD3DSPR_DEPTHOUTLE: - case VKD3DSPR_OUTSTENCILREF: - bitmap_set(program->io_dcls, ins->declaration.dst.reg.type); - break; + /* texcoord t# -> saturate t#, t# + * Note that the t# destination will subsequently be turned into a temp. */
- default: - vkd3d_shader_error(ctx->message_context, &ins->location, - VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Internal compiler error: invalid register type %#x for DCL_OUTPUT.", - ins->declaration.dst.reg.type); - return VKD3D_ERROR; - } + /* We run before I/O normalization. */ + VKD3D_ASSERT(program->normalisation_level < VSIR_NORMALISED_SM6); + + if (!(srcs = vsir_program_get_src_params(program, 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_src_param_init(&srcs[0], VKD3DSPR_TEXTURE, VSIR_DATA_F32, 1); + srcs[0].reg.idx[0].offset = idx; + srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; + srcs[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; + + ins->opcode = VSIR_OP_SATURATE; + ins->src = srcs; + ins->src_count = 1;
return VKD3D_OK; }
-static enum vkd3d_result vsir_program_lower_d3dbc_instructions(struct vsir_program *program, - struct vsir_transformation_context *ctx) +static struct vkd3d_shader_instruction *generate_bump_coords(struct vsir_program *program, + struct vsir_program_iterator *it, uint32_t idx, const struct vkd3d_shader_src_param *coords, + const struct vkd3d_shader_src_param *perturbation, const struct vkd3d_shader_location *loc) { - struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); - struct vkd3d_shader_message_context *message_context = ctx->message_context; struct vkd3d_shader_instruction *ins; - unsigned int tmp_idx = ~0u; + uint32_t ssa_temp, ssa_coords;
- for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) + /* We generate the following code: + * + * mad srTMP.xy, PERTURBATION.xx, BUMP_MATRIX#.xy, COORDS.xy + * mad srCOORDS.xy, PERTURBATION.yy, BUMP_MATRIX#.zw, srTMP.xy + */ + + ssa_temp = program->ssa_count++; + ssa_coords = program->ssa_count++; + + ins = vsir_program_iterator_current(it); + if (!vsir_instruction_init_with_params(program, ins, loc, VSIR_OP_MAD, 1, 3)) + return false; + dst_param_init_ssa_float4(&ins->dst[0], ssa_temp); + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; + ins->src[0] = *perturbation; + ins->src[0].swizzle = vsir_combine_swizzles(perturbation->swizzle, VKD3D_SHADER_SWIZZLE(X, X, X, X)); + src_param_init_parameter_vec4(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_BUMP_MATRIX_0 + idx, VSIR_DATA_F32); + ins->src[2] = *coords; + + ins = vsir_program_iterator_next(it); + if (!vsir_instruction_init_with_params(program, ins, loc, VSIR_OP_MAD, 1, 3)) + return false; + dst_param_init_ssa_float4(&ins->dst[0], ssa_coords); + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; + ins->src[0] = *perturbation; + ins->src[0].swizzle = vsir_combine_swizzles(perturbation->swizzle, VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y)); + src_param_init_parameter_vec4(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_BUMP_MATRIX_0 + idx, VSIR_DATA_F32); + ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(Z, W, W, W); + src_param_init_ssa_float4(&ins->src[2], ssa_temp); + ins->src[2].swizzle = VKD3D_SHADER_SWIZZLE(X, Y, Y, Y); + + return ins; +} + +static enum vkd3d_result vsir_program_lower_bem(struct vsir_program *program, struct vsir_program_iterator *it) +{ + struct vkd3d_shader_instruction *ins = vsir_program_iterator_current(it); + const struct vkd3d_shader_location location = ins->location; + const struct vkd3d_shader_src_param *src = ins->src; + const struct vkd3d_shader_dst_param *dst = ins->dst; + + /* bem DST.xy, SRC0, SRC1 + * -> + * mad srTMP.xy, SRC1.xx, BUMP_MATRIX#.xy, SRC0.xy + * mad DST.xy, SRC1.yy, BUMP_MATRIX#.zw, srTMP.xy */ + + if (!vsir_program_iterator_insert_after(it, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (!(ins = generate_bump_coords(program, it, dst[0].reg.idx[0].offset, &src[0], &src[1], &location))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + ins->dst[0] = dst[0]; + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_lower_texbem(struct vsir_program *program, + struct vsir_program_iterator *it, struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_instruction *ins = vsir_program_iterator_current(it); + const struct vkd3d_shader_location location = ins->location; + const struct vkd3d_shader_descriptor_info1 *descriptor; + const struct vkd3d_shader_src_param *src = ins->src; + bool is_texbeml = (ins->opcode == VSIR_OP_TEXBEML); + unsigned int idx = ins->dst[0].reg.idx[0].offset; + uint32_t ssa_coords, ssa_luminance, ssa_sample; + struct vkd3d_shader_src_param orig_coords; + + /* texbem t#, SRC + * -> + * bem srCOORDS.xy, t#, SRC + * texld t#, srCOORDS + * -> + * mad srTMP.xy, SRC.xx, BUMP_MATRIX#.xy, t#.xy + * mad srCOORDS.xy, SRC.yy, BUMP_MATRIX#.zw, srTMP.xy + * sample t#, srCOORDS, resource#, sampler# + * + * Luminance then adds: + * + * mad srLUM.x, SRC.z, BUMP_LUMINANCE_SCALE#, BUMP_LUMINANCE_OFFSET# + * mul t#, t#, srLUM.xxxx + * + * Note that the t# destination will subsequently be turned into a temp. */ + + descriptor = vkd3d_shader_find_descriptor(&program->descriptors, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, idx); + if (descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE) { - enum vkd3d_result ret; + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Unhandled TEXBEM(L) with a comparison sampler."); + return VKD3D_ERROR_NOT_IMPLEMENTED; + }
- switch (ins->opcode) - { - case VSIR_OP_TEXCRD: - ret = vsir_program_lower_texcrd(program, ins, message_context); - break; + descriptor = vkd3d_shader_find_descriptor(&program->descriptors, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, idx); + if (descriptor->resource_type != VKD3D_SHADER_RESOURCE_TEXTURE_2D) + { + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Unhandled TEXBEM(L) with resource dimension %#x.", descriptor->resource_type); + return VKD3D_ERROR_NOT_IMPLEMENTED; + }
- case VSIR_OP_TEXLD: - if (program->shader_version.major == 1) - ret = vsir_program_lower_texld_sm1(program, ins, message_context); - else if (ins->flags == VKD3DSI_TEXLD_PROJECT) - ret = vsir_program_lower_texldp(program, &it, &tmp_idx); - else - ret = vsir_program_lower_texld(program, ins, message_context); - break; + if (!vsir_program_iterator_insert_after(it, is_texbeml ? 4 : 2)) + return VKD3D_ERROR_OUT_OF_MEMORY;
- default: - ret = VKD3D_OK; - break; - } + vsir_src_param_init(&orig_coords, VKD3DSPR_TEXTURE, VSIR_DATA_F32, 1); + orig_coords.reg.idx[0].offset = idx; + orig_coords.reg.dimension = VSIR_DIMENSION_VEC4; + orig_coords.swizzle = VKD3D_SHADER_NO_SWIZZLE;
- if (ret < 0) - return ret; + if (!(ins = generate_bump_coords(program, it, idx, &orig_coords, &src[0], &location))) + return VKD3D_ERROR_OUT_OF_MEMORY; + ssa_coords = ins->dst[0].reg.idx[0].offset; + + ins = vsir_program_iterator_next(it); + if (!vsir_instruction_init_with_params(program, ins, &location, VSIR_OP_SAMPLE, 1, 3)) + return VKD3D_ERROR_OUT_OF_MEMORY; + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_TEXTURE, VSIR_DATA_F32, 1); + ins->dst[0].reg.idx[0].offset = idx; + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; + src_param_init_ssa_float4(&ins->src[0], ssa_coords); + ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, Y, Y, Y); + vsir_src_param_init_resource(&ins->src[1], idx, idx); + vsir_src_param_init_sampler(&ins->src[2], idx, idx); + + if (is_texbeml) + { + enum vkd3d_shader_swizzle_component z = vsir_swizzle_get_component(src[0].swizzle, 2); + + ssa_sample = program->ssa_count++; + ssa_luminance = program->ssa_count++; + + /* Replace t# destination of the SAMPLE instruction with an SSA value. */ + dst_param_init_ssa_float4(&ins->dst[0], ssa_sample); + + ins = vsir_program_iterator_next(it); + if (!vsir_instruction_init_with_params(program, ins, &location, VSIR_OP_MAD, 1, 3)) + return VKD3D_ERROR_OUT_OF_MEMORY; + dst_param_init_ssa_float4(&ins->dst[0], ssa_luminance); + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; + ins->src[0] = src[0]; + ins->src[0].swizzle = vkd3d_shader_create_swizzle(z, z, z, z); + src_param_init_parameter(&ins->src[1], + VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_SCALE_0 + idx, VSIR_DATA_F32); + src_param_init_parameter(&ins->src[2], + VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_OFFSET_0 + idx, VSIR_DATA_F32); + + ins = vsir_program_iterator_next(it); + if (!vsir_instruction_init_with_params(program, ins, &location, VSIR_OP_MUL, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_TEXTURE, VSIR_DATA_F32, 1); + ins->dst[0].reg.idx[0].offset = idx; + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; + src_param_init_ssa_float4(&ins->src[0], ssa_sample); + src_param_init_ssa_float4(&ins->src[1], ssa_luminance); + ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + } + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_lower_dcl_input(struct vsir_program *program, + struct vkd3d_shader_instruction *ins, struct vsir_transformation_context *ctx) +{ + switch (ins->declaration.dst.reg.type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_OUTPUT: + case VKD3DSPR_PATCHCONST: + case VKD3DSPR_INCONTROLPOINT: + case VKD3DSPR_OUTCONTROLPOINT: + break; + + case VKD3DSPR_PRIMID: + case VKD3DSPR_FORKINSTID: + case VKD3DSPR_JOININSTID: + case VKD3DSPR_THREADID: + case VKD3DSPR_THREADGROUPID: + case VKD3DSPR_LOCALTHREADID: + case VKD3DSPR_LOCALTHREADINDEX: + case VKD3DSPR_COVERAGE: + case VKD3DSPR_TESSCOORD: + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_GSINSTID: + case VKD3DSPR_WAVELANECOUNT: + case VKD3DSPR_WAVELANEINDEX: + bitmap_set(program->io_dcls, ins->declaration.dst.reg.type); + break; + + default: + vkd3d_shader_error(ctx->message_context, &ins->location, + VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Internal compiler error: invalid register type %#x for DCL_INPUT.", + ins->declaration.dst.reg.type); + return VKD3D_ERROR; }
return VKD3D_OK; }
-static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, +static enum vkd3d_result vsir_program_lower_dcl_output(struct vsir_program *program, + struct vkd3d_shader_instruction *ins, struct vsir_transformation_context *ctx) +{ + switch (ins->declaration.dst.reg.type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_OUTPUT: + case VKD3DSPR_PATCHCONST: + case VKD3DSPR_INCONTROLPOINT: + case VKD3DSPR_OUTCONTROLPOINT: + break; + + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + case VKD3DSPR_OUTSTENCILREF: + bitmap_set(program->io_dcls, ins->declaration.dst.reg.type); + break; + + default: + vkd3d_shader_error(ctx->message_context, &ins->location, + VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Internal compiler error: invalid register type %#x for DCL_OUTPUT.", + ins->declaration.dst.reg.type); + return VKD3D_ERROR; + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_lower_d3dbc_instructions(struct vsir_program *program, struct vsir_transformation_context *ctx) { struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); struct vkd3d_shader_message_context *message_context = ctx->message_context; struct vkd3d_shader_instruction *ins; unsigned int tmp_idx = ~0u; - enum vkd3d_result ret;
for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) { + enum vkd3d_result ret; + switch (ins->opcode) { - case VSIR_OP_IFC: - if ((ret = vsir_program_lower_ifc(program, &it, &tmp_idx, message_context)) < 0) - return ret; - break; - - case VSIR_OP_TEXKILL: - if ((ret = vsir_program_lower_texkill(program, &it, &tmp_idx)) < 0) - return ret; + case VSIR_OP_BEM: + ret = vsir_program_lower_bem(program, &it); break;
- case VSIR_OP_MAD: - if ((ret = vsir_program_lower_precise_mad(program, &it, &tmp_idx)) < 0) - return ret; + case VSIR_OP_IFC: + ret = vsir_program_lower_ifc(program, &it, &tmp_idx, message_context); break;
- case VSIR_OP_DCL: - case VSIR_OP_DCL_CONSTANT_BUFFER: - case VSIR_OP_DCL_GLOBAL_FLAGS: - case VSIR_OP_DCL_SAMPLER: - case VSIR_OP_DCL_TEMPS: - case VSIR_OP_DCL_TESSELLATOR_DOMAIN: - case VSIR_OP_DCL_THREAD_GROUP: - case VSIR_OP_DCL_UAV_TYPED: - vkd3d_shader_instruction_make_nop(ins); + case VSIR_OP_SINCOS: + ret = vsir_program_lower_sm1_sincos(program, &it); break;
- case VSIR_OP_DCL_INPUT: - vsir_program_lower_dcl_input(program, ins, ctx); - vkd3d_shader_instruction_make_nop(ins); + case VSIR_OP_TEXBEM: + case VSIR_OP_TEXBEML: + ret = vsir_program_lower_texbem(program, &it, message_context); break;
- case VSIR_OP_DCL_OUTPUT: - vsir_program_lower_dcl_output(program, ins, ctx); - vkd3d_shader_instruction_make_nop(ins); + case VSIR_OP_TEXCOORD: + if ((ret = vsir_program_lower_texcoord(program, ins)) < 0) + return ret; break;
- case VSIR_OP_DCL_INPUT_SGV: - case VSIR_OP_DCL_INPUT_SIV: - case VSIR_OP_DCL_INPUT_PS: - case VSIR_OP_DCL_INPUT_PS_SGV: - case VSIR_OP_DCL_INPUT_PS_SIV: - case VSIR_OP_DCL_OUTPUT_SGV: - case VSIR_OP_DCL_OUTPUT_SIV: - vkd3d_shader_instruction_make_nop(ins); + case VSIR_OP_TEXCRD: + ret = vsir_program_lower_texcrd(program, ins, message_context); break;
- case VSIR_OP_IMUL: - case VSIR_OP_UMUL: - if ((ret = vsir_program_lower_imul(program, ins, ctx)) < 0) - return ret; + case VSIR_OP_TEXKILL: + ret = vsir_program_lower_texkill(program, &it, &tmp_idx); break;
- case VSIR_OP_UDIV: - if ((ret = vsir_program_lower_udiv(program, &it, ctx)) < 0) - return ret; + case VSIR_OP_TEX: + ret = vsir_program_lower_tex(program, ins); break;
- case VSIR_OP_SINCOS: - if (ins->dst_count == 1) - { - if ((ret = vsir_program_lower_sm1_sincos(program, &it)) < 0) - return ret; - } + case VSIR_OP_TEXLD: + if (program->shader_version.major == 1) + ret = vsir_program_lower_texld_sm1(program, ins, message_context); + else if (ins->flags == VKD3DSI_TEXLD_PROJECT) + ret = vsir_program_lower_texldp(program, &it, &tmp_idx); else - { - if ((ret = vsir_program_lower_sm4_sincos(program, &it, ctx)) < 0) - return ret; - } + ret = vsir_program_lower_texld(program, ins, message_context); break;
case VSIR_OP_TEXLDD: - if ((ret = vsir_program_lower_texldd(program, ins)) < 0) - return ret; + ret = vsir_program_lower_texldd(program, ins); break;
case VSIR_OP_TEXLDL: - if ((ret = vsir_program_lower_texldl(program, ins)) < 0) - return ret; + ret = vsir_program_lower_texldl(program, ins); break;
- case VSIR_OP_TEXBEM: - case VSIR_OP_TEXBEML: - case VSIR_OP_TEXCOORD: case VSIR_OP_TEXDEPTH: case VSIR_OP_TEXDP3: case VSIR_OP_TEXDP3TEX: @@ -1809,63 +2329,318 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr return VKD3D_ERROR_NOT_IMPLEMENTED;
default: + ret = VKD3D_OK; break; } + + if (ret < 0) + return ret; }
return VKD3D_OK; }
-/* Ensure that the program closes with a ret. sm1 programs do not, by default. - * Many of our IR passes rely on this in order to insert instructions at the - * end of execution. */ -static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program, +static enum vkd3d_result vsir_program_lower_modifiers(struct vsir_program *program, struct vsir_transformation_context *ctx) { - struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); - static const struct vkd3d_shader_location no_loc; - struct vkd3d_shader_instruction *ins; + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions), new_it; + struct vkd3d_shader_instruction *ins, *new_ins; + enum vkd3d_result ret = VKD3D_OK; + unsigned int i, j;
- ins = vsir_program_iterator_tail(&it); - if (ins && ins->opcode == VSIR_OP_RET) - return VKD3D_OK; + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) + { + for (i = 0; i < ins->src_count; ++i) + { + enum vkd3d_shader_opcode new_opcodes[2] = {VSIR_OP_NOP, VSIR_OP_NOP}; + struct vkd3d_shader_src_param *src = &ins->src[i];
- if (!(ins = vsir_program_append(program))) - return VKD3D_ERROR_OUT_OF_MEMORY; - vsir_instruction_init(ins, &no_loc, VSIR_OP_RET); + switch (src->modifiers) + { + case VKD3DSPSM_NONE: + continue;
- return VKD3D_OK; -} + case VKD3DSPSM_ABS: + new_opcodes[0] = VSIR_OP_ABS; + break;
-/* ps_1_* outputs color in r0. Add an instruction to copy that to oC0. - * We don't need to modify the signature since it already contains COLOR. */ -static enum vkd3d_result vsir_program_normalise_ps1_output(struct vsir_program *program, - struct vsir_transformation_context *ctx) -{ - struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); - struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_location loc; + case VKD3DSPSM_NEG: + new_opcodes[0] = data_type_is_integer(src->reg.data_type) ? VSIR_OP_INEG : VSIR_OP_NEG; + break;
- if (!(ins = vsir_program_iterator_tail(&it))) - return VKD3D_OK; - loc = ins->location; + case VKD3DSPSM_ABSNEG: + new_opcodes[0] = VSIR_OP_ABS; + new_opcodes[1] = VSIR_OP_NEG; + break;
- if (!(ins = vsir_program_append(program))) - return VKD3D_ERROR_OUT_OF_MEMORY; - if (!vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_MOV, 1, 1)) - { - vsir_instruction_init(ins, &loc, VSIR_OP_NOP); - return VKD3D_ERROR_OUT_OF_MEMORY; + default: + vkd3d_shader_error(ctx->message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to unimplemented feature: Source modifier %#x.", src->modifiers); + ret = VKD3D_ERROR_NOT_IMPLEMENTED; + continue; + } + + for (j = 0; j < 2 && new_opcodes[j] != VSIR_OP_NOP; ++j) + { + if (!(new_ins = vsir_program_iterator_insert_before(&it, &new_it, 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + ins = vsir_program_iterator_current(&it); + + if (!vsir_instruction_init_with_params(program, new_ins, &ins->location, new_opcodes[j], 1, 1)) + { + vkd3d_shader_instruction_make_nop(new_ins); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + new_ins->src[0] = *src; + new_ins->src[0].modifiers = VKD3DSPSM_NONE; + + dst_param_init_ssa(&new_ins->dst[0], program->ssa_count, src->reg.data_type, src->reg.dimension); + src_param_init_ssa(src, program->ssa_count, src->reg.data_type, src->reg.dimension); + + if (data_type_is_64_bit(src->reg.data_type)) + { + new_ins->dst[0].write_mask = vsir_write_mask_64_from_32(new_ins->dst[0].write_mask); + src->swizzle = vsir_swizzle_64_from_32(src->swizzle); + } + + ++program->ssa_count; + } + } + + for (i = 0; i < ins->dst_count; ++i) + { + struct vkd3d_shader_dst_param *dst = &ins->dst[i]; + + /* It is always legitimate to ignore _pp. */ + dst->modifiers &= ~VKD3DSPDM_PARTIALPRECISION; + + if (dst->modifiers & ~VKD3DSPDM_SATURATE) + { + vkd3d_shader_error(ctx->message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to unimplemented feature: Destination modifier %#x.", dst->modifiers); + ret = VKD3D_ERROR_NOT_IMPLEMENTED; + continue; + } + + if (dst->modifiers & VKD3DSPDM_SATURATE) + { + if (!vsir_program_iterator_insert_after(&it, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + new_ins = vsir_program_iterator_next(&it); + ins = vsir_program_iterator_prev(&it); + + if (!vsir_instruction_init_with_params(program, new_ins, &ins->location, VSIR_OP_SATURATE, 1, 1)) + { + vkd3d_shader_instruction_make_nop(new_ins); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + new_ins->dst[0] = *dst; + new_ins->dst[0].modifiers &= ~VKD3DSPDM_SATURATE; + + dst_param_init_ssa(dst, program->ssa_count, dst->reg.data_type, dst->reg.dimension); + src_param_init_ssa(&new_ins->src[0], program->ssa_count, dst->reg.data_type, dst->reg.dimension); + + if (data_type_is_64_bit(dst->reg.data_type)) + { + dst->write_mask = vsir_write_mask_64_from_32(dst->write_mask); + new_ins->src[0].swizzle = vsir_swizzle_64_from_32(new_ins->src[0].swizzle); + } + + ++program->ssa_count; + } + } + } + + program->has_no_modifiers = true; + + return ret; +} + +static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vkd3d_shader_instruction *ins; + enum vkd3d_result ret; + + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) + { + switch (ins->opcode) + { + case VSIR_OP_MAD: + if ((ret = vsir_program_lower_precise_mad(program, &it)) < 0) + return ret; + break; + + case VSIR_OP_DCL: + case VSIR_OP_DCL_CONSTANT_BUFFER: + case VSIR_OP_DCL_GLOBAL_FLAGS: + case VSIR_OP_DCL_IMMEDIATE_CONSTANT_BUFFER: + case VSIR_OP_DCL_INPUT_PRIMITIVE: + case VSIR_OP_DCL_OUTPUT_TOPOLOGY: + case VSIR_OP_DCL_SAMPLER: + case VSIR_OP_DCL_TEMPS: + case VSIR_OP_DCL_TESSELLATOR_DOMAIN: + case VSIR_OP_DCL_THREAD_GROUP: + case VSIR_OP_DCL_UAV_TYPED: + vkd3d_shader_instruction_make_nop(ins); + break; + + case VSIR_OP_DCL_INPUT: + vsir_program_lower_dcl_input(program, ins, ctx); + vkd3d_shader_instruction_make_nop(ins); + break; + + case VSIR_OP_DCL_OUTPUT: + vsir_program_lower_dcl_output(program, ins, ctx); + vkd3d_shader_instruction_make_nop(ins); + break; + + case VSIR_OP_DCL_INPUT_SGV: + case VSIR_OP_DCL_INPUT_SIV: + case VSIR_OP_DCL_INPUT_PS: + case VSIR_OP_DCL_INPUT_PS_SGV: + case VSIR_OP_DCL_INPUT_PS_SIV: + case VSIR_OP_DCL_OUTPUT_SGV: + case VSIR_OP_DCL_OUTPUT_SIV: + vkd3d_shader_instruction_make_nop(ins); + break; + + case VSIR_OP_IMUL: + case VSIR_OP_UMUL: + if ((ret = vsir_program_lower_imul(program, ins, ctx)) < 0) + return ret; + break; + + case VSIR_OP_UDIV: + if ((ret = vsir_program_lower_udiv(program, &it, ctx)) < 0) + return ret; + break; + + case VSIR_OP_SINCOS: + if ((ret = vsir_program_lower_sm4_sincos(program, &it, ctx)) < 0) + return ret; + break; + + default: + break; + } + } + + return VKD3D_OK; +} + +/* 1.0-1.3 pixel shaders allow writing t# registers, at which point they + * effectively behave like normal r# temps. Convert them to r# registers. + * t# registers which are read before being written contain TEXCOORD varyings, + * just as in 1.4 and 2.x, and will later be lowered to v# registers. + * + * Registers which are partially written are rejected by the native validator, + * but with a "read of uninitialized component" message that suggests that once + * any component of a t# register is written, none of the components contain + * texcoord data. */ +static enum vkd3d_result vsir_program_lower_texture_writes(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vkd3d_shader_instruction *ins; + unsigned int texture_temp_idx = ~0u; + uint32_t texture_written_mask = 0; + + /* We run before I/O normalization. */ + VKD3D_ASSERT(program->normalisation_level < VSIR_NORMALISED_SM6); + + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) + { + for (unsigned int i = 0; i < ins->src_count; ++i) + { + struct vkd3d_shader_src_param *src = &ins->src[i]; + + if (src->reg.type == VKD3DSPR_TEXTURE && bitmap_is_set(&texture_written_mask, src->reg.idx[0].offset)) + { + src->reg.type = VKD3DSPR_TEMP; + src->reg.idx[0].offset += texture_temp_idx; + } + } + + for (unsigned int i = 0; i < ins->dst_count; ++i) + { + struct vkd3d_shader_dst_param *dst = &ins->dst[i]; + + if (dst->reg.type == VKD3DSPR_TEXTURE) + { + bitmap_set(&texture_written_mask, dst->reg.idx[0].offset); + if (texture_temp_idx == ~0u) + { + texture_temp_idx = program->temp_count; + /* These versions have 4 texture registers. */ + program->temp_count += 4; + } + dst->reg.type = VKD3DSPR_TEMP; + dst->reg.idx[0].offset += texture_temp_idx; + } + } + } + + return VKD3D_OK; +} + +/* Ensure that the program closes with a ret. sm1 programs do not, by default. + * Many of our IR passes rely on this in order to insert instructions at the + * end of execution. */ +static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_location loc; + + if (!(ins = vsir_program_iterator_tail(&it))) + loc = ctx->null_location; + else if (ins->opcode == VSIR_OP_RET) + return VKD3D_OK; + else + loc = ins->location; + + if (!(ins = vsir_program_append(program))) + return VKD3D_ERROR_OUT_OF_MEMORY; + vsir_instruction_init(ins, &loc, VSIR_OP_RET); + + return VKD3D_OK; +} + +/* ps_1_* outputs color in r0. Add an instruction to copy that to oC0. + * We don't need to modify the signature since it already contains COLOR. */ +static enum vkd3d_result vsir_program_normalise_ps1_output(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_location loc; + + /* Note we run before I/O normalization. */ + VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM4); + + if (!(ins = vsir_program_iterator_tail(&it))) + return VKD3D_OK; + loc = ins->location; + + if (!(ins = vsir_program_append(program))) + return VKD3D_ERROR_OUT_OF_MEMORY; + if (!vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_SATURATE, 1, 1)) + { + vsir_instruction_init(ins, &loc, VSIR_OP_NOP); + return VKD3D_ERROR_OUT_OF_MEMORY; }
src_param_init_temp_float4(&ins->src[0], 0); ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; - /* Note we run before I/O normalization. */ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_COLOROUT, VSIR_DATA_F32, 1); ins->dst[0].reg.idx[0].offset = 0; ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; - ins->dst[0].modifiers = VKD3DSPDM_SATURATE;
return VKD3D_OK; } @@ -1923,7 +2698,7 @@ static enum vkd3d_result vsir_program_ensure_diffuse(struct vsir_program *progra struct vsir_transformation_context *ctx) { struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); - static const struct vkd3d_shader_location no_loc; + struct vkd3d_shader_location loc = ctx->null_location; struct vkd3d_shader_instruction *ins; unsigned int i;
@@ -1937,21 +2712,23 @@ static enum vkd3d_result vsir_program_ensure_diffuse(struct vsir_program *progra for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) { if (!vsir_instruction_is_dcl(ins) && ins->opcode != VSIR_OP_LABEL && ins->opcode != VSIR_OP_NOP) + { + loc = ins->location; break; + } }
- vsir_program_iterator_prev(&it); - if (!vsir_program_iterator_insert_after(&it, 1)) + if (!(ins = vsir_program_iterator_insert_before_and_move(&it, 1))) return VKD3D_ERROR_OUT_OF_MEMORY; - ins = vsir_program_iterator_next(&it);
- vsir_instruction_init_with_params(program, ins, &no_loc, VSIR_OP_MOV, 1, 1); + vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_MOV, 1, 1); vsir_dst_param_init(&ins->dst[0], VKD3DSPR_ATTROUT, VSIR_DATA_F32, 1); ins->dst[0].reg.idx[0].offset = 0; ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL & ~program->diffuse_written_mask; vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VSIR_DATA_F32, 0); ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; for (i = 0; i < 4; ++i) ins->src[0].reg.u.immconst_f32[i] = 1.0f; return VKD3D_OK; @@ -2162,10 +2939,8 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program continue;
loc = ins->location; - vsir_program_iterator_prev(&it); - if (!vsir_program_iterator_insert_after(&it, uninit_varying_count)) + if (!(ins = vsir_program_iterator_insert_before_and_move(&it, uninit_varying_count))) return VKD3D_ERROR_OUT_OF_MEMORY; - ins = vsir_program_iterator_next(&it);
for (unsigned int j = signature->element_count - uninit_varying_count; j < signature->element_count; ++j) { @@ -2175,6 +2950,7 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program dst_param_init_output(&ins->dst[0], VSIR_DATA_F32, e->register_index, e->mask); vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VSIR_DATA_F32, 0); ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; ins = vsir_program_iterator_next(&it); } } @@ -2200,8 +2976,6 @@ struct hull_flattener { struct vsir_program *program;
- unsigned int instance_count; - unsigned int phase_body_idx; enum vkd3d_shader_opcode phase; struct vkd3d_shader_location last_ret_location; unsigned int *ssa_map; @@ -2213,68 +2987,6 @@ static bool flattener_is_in_fork_or_join_phase(const struct hull_flattener *flat return flattener->phase == VSIR_OP_HS_FORK_PHASE || flattener->phase == VSIR_OP_HS_JOIN_PHASE; }
-struct shader_phase_location -{ - unsigned int index; - unsigned int instance_count; - unsigned int instruction_count; -}; - -struct shader_phase_location_array -{ - /* Unlikely worst case: one phase for each component of each output register. */ - struct shader_phase_location locations[MAX_REG_OUTPUT * VKD3D_VEC4_SIZE]; - unsigned int count; -}; - -static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normaliser, unsigned int index, - struct vkd3d_shader_instruction *ins, struct shader_phase_location_array *locations) -{ - struct shader_phase_location *loc; - bool b; - - if (ins->opcode == VSIR_OP_HS_FORK_PHASE || ins->opcode == VSIR_OP_HS_JOIN_PHASE) - { - b = flattener_is_in_fork_or_join_phase(normaliser); - /* Reset the phase info. */ - normaliser->phase_body_idx = ~0u; - normaliser->phase = ins->opcode; - normaliser->instance_count = 1; - /* Leave the first occurrence and delete the rest. */ - if (b) - vkd3d_shader_instruction_make_nop(ins); - return; - } - else if (ins->opcode == VSIR_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT - || ins->opcode == VSIR_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) - { - normaliser->instance_count = ins->declaration.count + !ins->declaration.count; - vkd3d_shader_instruction_make_nop(ins); - return; - } - - if (normaliser->phase == VSIR_OP_INVALID || vsir_instruction_is_dcl(ins)) - return; - - if (normaliser->phase_body_idx == ~0u) - normaliser->phase_body_idx = index; - - if (ins->opcode == VSIR_OP_RET) - { - normaliser->last_ret_location = ins->location; - vkd3d_shader_instruction_make_nop(ins); - if (locations->count >= ARRAY_SIZE(locations->locations)) - { - FIXME("Insufficient space for phase location.\n"); - return; - } - loc = &locations->locations[locations->count++]; - loc->index = normaliser->phase_body_idx; - loc->instance_count = normaliser->instance_count; - loc->instruction_count = index - normaliser->phase_body_idx; - } -} - static void flattener_fixup_ssa_register(struct hull_flattener *normaliser, struct vkd3d_shader_register *reg, unsigned int instance_id) { @@ -2337,85 +3049,131 @@ static void flattener_fixup_registers(struct hull_flattener *normaliser, flattener_fixup_register_indices(normaliser, &ins->dst[i].reg, instance_id); }
-static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normaliser, - struct shader_phase_location_array *locations) +static enum vkd3d_result flattener_replicate_location(struct hull_flattener *normaliser, + struct vsir_program_iterator *it, size_t instance_count, size_t instruction_count) { - struct vkd3d_shader_instruction_array *instructions = &normaliser->program->instructions; - struct shader_phase_location *loc; - unsigned int i, j, k, end, count; - - for (i = 0, count = 0; i < locations->count; ++i) - count += (locations->locations[i].instance_count - 1) * locations->locations[i].instruction_count; + struct vsir_program_iterator dst_it, src_it, first_it; + struct vkd3d_shader_instruction *ins, *src_ins; + unsigned int i, j; + size_t count;
- if (!shader_instruction_array_reserve(instructions, instructions->count + count)) + VKD3D_ASSERT(instance_count); + count = (instance_count - 1) * instruction_count; + if (!vsir_program_iterator_insert_before(it, &first_it, count)) return VKD3D_ERROR_OUT_OF_MEMORY; - end = instructions->count; - instructions->count += count;
- for (i = locations->count; i > 0; --i) + /* Make a copy of the non-dcl instructions for each instance. */ + dst_it = first_it; + for (i = 1; i < instance_count; ++i) { - loc = &locations->locations[i - 1]; - j = loc->index + loc->instruction_count; - memmove(&instructions->elements[j + count], &instructions->elements[j], - (end - j) * sizeof(*instructions->elements)); - end = j; - count -= (loc->instance_count - 1) * loc->instruction_count; - loc->index += count; - } - - for (i = 0, count = 0; i < locations->count; ++i) - { - loc = &locations->locations[i]; - /* Make a copy of the non-dcl instructions for each instance. */ - for (j = 1; j < loc->instance_count; ++j) + src_it = *it; + for (j = 0; j < instruction_count; ++j) { - for (k = 0; k < loc->instruction_count; ++k) - { - if (!shader_instruction_array_clone_instruction(instructions, - loc->index + loc->instruction_count * j + k, loc->index + k)) - return VKD3D_ERROR_OUT_OF_MEMORY; - } + src_ins = vsir_program_iterator_current(&src_it); + if (!vsir_program_iterator_clone_instruction(normaliser->program, &dst_it, src_ins)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_program_iterator_next(&dst_it); + vsir_program_iterator_next(&src_it); } - /* Replace each reference to the instance id with a constant instance id. */ - for (j = 0; j < loc->instance_count; ++j) - { - if (j != 0) - memset(normaliser->ssa_map, 0xff, normaliser->orig_ssa_count * sizeof(*normaliser->ssa_map)); + } + /* Replace each reference to the instance id with a constant instance id. */ + *it = first_it; + for (i = 0; i < instance_count; ++i) + { + if (i) + memset(normaliser->ssa_map, 0xff, normaliser->orig_ssa_count * sizeof(*normaliser->ssa_map));
- for (k = 0; k < loc->instruction_count; ++k) - flattener_fixup_registers(normaliser, - &instructions->elements[loc->index + loc->instruction_count * j + k], j); + for (j = 0; j < instruction_count; ++j) + { + ins = vsir_program_iterator_current(it); + flattener_fixup_registers(normaliser, ins, i); + vsir_program_iterator_next(it); } }
return VKD3D_OK; }
-static enum vkd3d_result vsir_program_flatten_hull_shader_phases(struct vsir_program *program, - struct vsir_transformation_context *ctx) +static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normaliser) { - struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); - struct shader_phase_location_array locations; - struct hull_flattener flattener = {program}; + struct vsir_program_iterator it = vsir_program_iterator(&normaliser->program->instructions); + struct vsir_program_iterator phase_body_it; struct vkd3d_shader_instruction *ins; - enum vkd3d_result result = VKD3D_OK; - unsigned int i; + bool b, phase_body_it_valid = false; + unsigned int instruction_count = 0; + unsigned int instance_count = 0; + enum vkd3d_result res;
- flattener.phase = VSIR_OP_INVALID; - locations.count = 0; - for (ins = vsir_program_iterator_head(&it), i = 0; ins; ins = vsir_program_iterator_next(&it), ++i) + normaliser->phase = VSIR_OP_INVALID; + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) { - flattener_eliminate_phase_related_dcls(&flattener, i, ins, &locations); - } - bitmap_clear(program->io_dcls, VKD3DSPR_FORKINSTID); - bitmap_clear(program->io_dcls, VKD3DSPR_JOININSTID); + if (ins->opcode == VSIR_OP_HS_FORK_PHASE || ins->opcode == VSIR_OP_HS_JOIN_PHASE) + { + b = flattener_is_in_fork_or_join_phase(normaliser); + /* Reset the phase info. */ + phase_body_it_valid = false; + normaliser->phase = ins->opcode; + instance_count = 1; + instruction_count = 0; + /* Leave the first occurrence and delete the rest. */ + if (b) + vkd3d_shader_instruction_make_nop(ins); + continue; + } + else if (ins->opcode == VSIR_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT + || ins->opcode == VSIR_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) + { + instance_count = ins->declaration.count + !ins->declaration.count; + vkd3d_shader_instruction_make_nop(ins); + ++instruction_count; + continue; + } + + if (normaliser->phase == VSIR_OP_INVALID) + continue; + + if (!phase_body_it_valid && !vsir_instruction_is_dcl(ins)) + { + phase_body_it_valid = true; + phase_body_it = it; + instruction_count = 0; + } + + if (ins->opcode == VSIR_OP_RET) + { + normaliser->last_ret_location = ins->location; + vkd3d_shader_instruction_make_nop(ins); + it = phase_body_it; + if ((res = flattener_replicate_location(normaliser, &it, + instance_count, instruction_count)) < 0) + return res; + phase_body_it_valid = false; + } + else + { + ++instruction_count; + } + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_flatten_hull_shader_phases(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct hull_flattener flattener = {program}; + struct vkd3d_shader_instruction *ins; + enum vkd3d_result result = VKD3D_OK; + + bitmap_clear(program->io_dcls, VKD3DSPR_FORKINSTID); + bitmap_clear(program->io_dcls, VKD3DSPR_JOININSTID);
flattener.orig_ssa_count = program->ssa_count; if (!(flattener.ssa_map = vkd3d_calloc(flattener.orig_ssa_count, sizeof(*flattener.ssa_map)))) return VKD3D_ERROR_OUT_OF_MEMORY;
- result = flattener_flatten_phases(&flattener, &locations); - + result = flattener_flatten_phases(&flattener); vkd3d_free(flattener.ssa_map); flattener.ssa_map = NULL;
@@ -2434,7 +3192,7 @@ static enum vkd3d_result vsir_program_flatten_hull_shader_phases(struct vsir_pro
struct control_point_normaliser { - struct vkd3d_shader_instruction_array instructions; + struct vsir_program *program; enum vkd3d_shader_opcode phase; struct vkd3d_shader_src_param *outpointid_param; }; @@ -2446,20 +3204,15 @@ static bool control_point_normaliser_is_in_control_point_phase(const struct cont
struct vkd3d_shader_src_param *vsir_program_create_outpointid_param(struct vsir_program *program) { - struct vkd3d_shader_instruction_array *instructions = &program->instructions; struct vkd3d_shader_src_param *rel_addr;
- if (instructions->outpointid_param) - return instructions->outpointid_param; - - if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1))) + if (!(rel_addr = vsir_program_get_src_params(program, 1))) return NULL;
vsir_register_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VSIR_DATA_U32, 0); rel_addr->swizzle = 0; rel_addr->modifiers = 0;
- instructions->outpointid_param = rel_addr; return rel_addr; }
@@ -2481,8 +3234,8 @@ static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param }
static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser, - const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst, - const struct vkd3d_shader_location *location) + const struct shader_signature *s, unsigned int input_control_point_count, + struct vsir_program_iterator *dst_it, const struct vkd3d_shader_location *location) { struct vkd3d_shader_instruction *ins; const struct signature_element *e; @@ -2491,17 +3244,11 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p for (i = 0; i < s->element_count; ++i) count += !!s->elements[i].used_mask;
- if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) + if (!(ins = vsir_program_iterator_insert_before_and_move(dst_it, count))) return VKD3D_ERROR_OUT_OF_MEMORY; - - memmove(&normaliser->instructions.elements[dst + count], &normaliser->instructions.elements[dst], - (normaliser->instructions.count - dst) * sizeof(*normaliser->instructions.elements)); - normaliser->instructions.count += count; - - ins = &normaliser->instructions.elements[dst]; vsir_instruction_init(ins, location, VSIR_OP_HS_CONTROL_POINT_PHASE);
- ++ins; + ins = vsir_program_iterator_next(dst_it);
for (i = 0; i < s->element_count; ++i) { @@ -2510,9 +3257,9 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p continue;
vsir_instruction_init(ins, location, VSIR_OP_MOV); - ins->dst = shader_dst_param_allocator_get(&normaliser->instructions.dst_params, 1); + ins->dst = vsir_program_get_dst_params(normaliser->program, 1); ins->dst_count = 1; - ins->src = shader_src_param_allocator_get(&normaliser->instructions.src_params, 1); + ins->src = vsir_program_get_src_params(normaliser->program, 1); ins->src_count = 1;
if (!ins->dst || ! ins->src) @@ -2521,6 +3268,8 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p return VKD3D_ERROR_OUT_OF_MEMORY; }
+ VKD3D_ASSERT(normaliser->outpointid_param); + vsir_dst_param_init_io(&ins->dst[0], VKD3DSPR_OUTPUT, e, 2); ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ins->dst[0].reg.idx[0].offset = 0; @@ -2533,7 +3282,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p ins->src[0].reg.idx[0].rel_addr = normaliser->outpointid_param; ins->src[0].reg.idx[1].offset = e->register_index;
- ++ins; + ins = vsir_program_iterator_next(dst_it); }
vsir_instruction_init(ins, location, VSIR_OP_RET); @@ -2565,8 +3314,8 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i ERR("Failed to allocate src param.\n"); return VKD3D_ERROR_OUT_OF_MEMORY; } - normaliser.instructions = program->instructions; - it = vsir_program_iterator(&normaliser.instructions); + normaliser.program = program; + it = vsir_program_iterator(&normaliser.program->instructions); normaliser.phase = VSIR_OP_INVALID;
for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) @@ -2598,16 +3347,14 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i input_control_point_count = ins->declaration.count; break; case VSIR_OP_HS_CONTROL_POINT_PHASE: - program->instructions = normaliser.instructions; program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; return VKD3D_OK; case VSIR_OP_HS_FORK_PHASE: case VSIR_OP_HS_JOIN_PHASE: /* ins may be relocated if the instruction array expands. */ location = ins->location; - ret = control_point_normaliser_emit_hs_input(&normaliser, &program->input_signature, - input_control_point_count, i, &location); - program->instructions = normaliser.instructions; + ret = control_point_normaliser_emit_hs_input(&normaliser, + &program->input_signature, input_control_point_count, &it, &location); program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; return ret; default: @@ -2615,7 +3362,6 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i } }
- program->instructions = normaliser.instructions; program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; return VKD3D_OK; } @@ -2641,12 +3387,7 @@ struct io_normaliser struct shader_signature *output_signature; struct shader_signature *patch_constant_signature;
- unsigned int instance_count; - unsigned int phase_body_idx; enum vkd3d_shader_opcode phase; - unsigned int output_control_point_count; - - struct vkd3d_shader_src_param *outpointid_param;
struct vkd3d_shader_dst_param *input_dcl_params[MAX_REG_OUTPUT]; struct vkd3d_shader_dst_param *output_dcl_params[MAX_REG_OUTPUT]; @@ -3105,16 +3846,19 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par break;
case VKD3DSPR_RASTOUT: + /* Fog and point size are scalar, but fxc/d3dcompiler emits a full + * write mask when writing to them. */ + if (reg->idx[0].offset > 0) + { + write_mask = VKD3DSP_WRITEMASK_0; + dst_param->write_mask = write_mask; + } /* Leave point size as a system value for the backends to consume. */ if (reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) return true; reg_idx = SM1_RASTOUT_REGISTER_OFFSET + reg->idx[0].offset; signature = normaliser->output_signature; reg->type = VKD3DSPR_OUTPUT; - /* Fog and point size are scalar, but fxc/d3dcompiler emits a full - * write mask when writing to them. */ - if (reg->idx[0].offset > 0) - write_mask = VKD3DSP_WRITEMASK_0; break;
default: @@ -3268,9 +4012,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program { switch (ins->opcode) { - case VSIR_OP_DCL_OUTPUT_CONTROL_POINT_COUNT: - normaliser.output_control_point_count = ins->declaration.count; - break; case VSIR_OP_DCL_INDEX_RANGE: if ((ret = io_normaliser_add_index_range(&normaliser, ins)) < 0) return ret; @@ -3355,6 +4096,7 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par { enum vkd3d_shader_d3dbc_constant_register set; struct vkd3d_shader_src_param *rel_addr; + unsigned int c; uint32_t index; size_t i, j;
@@ -3369,7 +4111,11 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par param->reg.idx_count = 0; param->reg.dimension = VSIR_DIMENSION_VEC4; for (j = 0; j < 4; ++j) - param->reg.u.immconst_u32[j] = normaliser->defs[i].value[j]; + { + c = vsir_swizzle_get_component(param->swizzle, j); + param->reg.u.immconst_u32[j] = normaliser->defs[i].value[c]; + } + param->swizzle = VKD3D_SHADER_NO_SWIZZLE; return; } } @@ -3521,6 +4267,17 @@ static enum vkd3d_result vsir_program_remove_dead_code(struct vsir_program *prog return VKD3D_OK; }
+/* NOTE: This invalidates all iterators, since the position of the + * vkd3d_shader_instruction_array itself changes. */ +static void vsir_program_replace_instructions(struct vsir_program *program, + struct vkd3d_shader_instruction_array *array) +{ + shader_instruction_array_destroy(&program->instructions); + + program->instructions = *array; + memset(array, 0, sizeof(*array)); +} + struct cf_flattener_if_info { struct vkd3d_shader_src_param *false_param; @@ -3544,7 +4301,7 @@ struct cf_flattener_switch_case
struct cf_flattener_switch_info { - size_t ins_location; + struct vsir_program_iterator ins_it; const struct vkd3d_shader_src_param *condition; unsigned int id; unsigned int merge_block_id; @@ -3579,9 +4336,7 @@ struct cf_flattener struct vkd3d_shader_location location; enum vkd3d_result status;
- struct vkd3d_shader_instruction *instructions; - size_t instruction_capacity; - size_t instruction_count; + struct vkd3d_shader_instruction_array instructions;
unsigned int block_id; const char **block_names; @@ -3604,16 +4359,17 @@ static void cf_flattener_set_error(struct cf_flattener *flattener, enum vkd3d_re flattener->status = error; }
-static struct vkd3d_shader_instruction *cf_flattener_require_space(struct cf_flattener *flattener, size_t count) +static struct vkd3d_shader_instruction *cf_flattener_instruction_append(struct cf_flattener *flattener) { - if (!vkd3d_array_reserve((void **)&flattener->instructions, &flattener->instruction_capacity, - flattener->instruction_count + count, sizeof(*flattener->instructions))) + struct vkd3d_shader_instruction *ins; + + if (!(ins = shader_instruction_array_append(&flattener->instructions))) { ERR("Failed to allocate instructions.\n"); cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); return NULL; } - return &flattener->instructions[flattener->instruction_count]; + return ins; }
static bool cf_flattener_copy_instruction(struct cf_flattener *flattener, @@ -3624,11 +4380,10 @@ static bool cf_flattener_copy_instruction(struct cf_flattener *flattener, if (instruction->opcode == VSIR_OP_NOP) return true;
- if (!(dst_ins = cf_flattener_require_space(flattener, 1))) + if (!(dst_ins = cf_flattener_instruction_append(flattener))) return false;
*dst_ins = *instruction; - ++flattener->instruction_count; return true; }
@@ -3656,12 +4411,13 @@ static void cf_flattener_emit_label(struct cf_flattener *flattener, unsigned int { struct vkd3d_shader_instruction *ins;
- if (!(ins = cf_flattener_require_space(flattener, 1))) + if (!(ins = cf_flattener_instruction_append(flattener))) return; - if (vsir_instruction_init_label(ins, &flattener->location, label_id, flattener->program)) - ++flattener->instruction_count; - else + if (!vsir_instruction_init_label(ins, &flattener->location, label_id, flattener->program)) + { + vkd3d_shader_instruction_make_nop(ins); cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); + } }
/* For conditional branches, this returns the false target branch parameter. */ @@ -3673,14 +4429,17 @@ static struct vkd3d_shader_src_param *cf_flattener_emit_branch(struct cf_flatten struct vkd3d_shader_src_param *src_params, *false_branch_param; struct vkd3d_shader_instruction *ins;
- if (!(ins = cf_flattener_require_space(flattener, 1))) + if (!(ins = cf_flattener_instruction_append(flattener))) return NULL; vsir_instruction_init(ins, &flattener->location, VSIR_OP_BRANCH);
if (condition) { if (!(src_params = instruction_src_params_alloc(ins, 4 + !!continue_block_id, flattener))) + { + vkd3d_shader_instruction_make_nop(ins); return NULL; + } src_params[0] = *condition; if (flags == VKD3D_SHADER_CONDITIONAL_OP_Z) { @@ -3701,7 +4460,10 @@ static struct vkd3d_shader_src_param *cf_flattener_emit_branch(struct cf_flatten else { if (!(src_params = instruction_src_params_alloc(ins, merge_block_id ? 3 : 1, flattener))) + { + vkd3d_shader_instruction_make_nop(ins); return NULL; + } vsir_src_param_init_label(&src_params[0], true_id); if (merge_block_id) { @@ -3713,8 +4475,6 @@ static struct vkd3d_shader_src_param *cf_flattener_emit_branch(struct cf_flatten false_branch_param = NULL; }
- ++flattener->instruction_count; - return false_branch_param; }
@@ -3820,9 +4580,6 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte is_hull_shader = program->shader_version.type == VKD3D_SHADER_TYPE_HULL; after_declarations_section = is_hull_shader;
- if (!cf_flattener_require_space(flattener, instructions->count + 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; - it = vsir_program_iterator(instructions); for (instruction = vsir_program_iterator_head(&it); instruction; instruction = vsir_program_iterator_next(&it)) { @@ -3961,13 +4718,14 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte
merge_block_id = cf_flattener_alloc_block_id(flattener);
- cf_info->u.switch_.ins_location = flattener->instruction_count; cf_info->u.switch_.condition = src;
- if (!(dst_ins = cf_flattener_require_space(flattener, 1))) + if (!(dst_ins = shader_instruction_array_append(&flattener->instructions))) return VKD3D_ERROR_OUT_OF_MEMORY; vsir_instruction_init(dst_ins, &instruction->location, VSIR_OP_SWITCH_MONOLITHIC); - ++flattener->instruction_count; + + cf_info->u.switch_.ins_it = vsir_program_iterator(&flattener->instructions); + vsir_program_iterator_tail(&cf_info->u.switch_.ins_it);
cf_info->u.switch_.id = flattener->switch_id; cf_info->u.switch_.merge_block_id = merge_block_id; @@ -4000,8 +4758,12 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte /* The SWITCH instruction is completed when the endswitch * instruction is processed because we do not know the number * of case statements or the default block id in advance.*/ - dst_ins = &flattener->instructions[cf_info->u.switch_.ins_location]; - if (!(src_params = instruction_src_params_alloc(dst_ins, cf_info->u.switch_.cases_count * 2 + 3, flattener))) + /* NOTE: This relies on iterators not being invalidated + * when new instructions are appended to the + * vkd3d_shader_instruction_array. */ + dst_ins = vsir_program_iterator_current(&cf_info->u.switch_.ins_it); + if (!(src_params = instruction_src_params_alloc(dst_ins, + cf_info->u.switch_.cases_count * 2 + 3, flattener))) { vkd3d_free(cf_info->u.switch_.cases); return VKD3D_ERROR_OUT_OF_MEMORY; @@ -4162,18 +4924,17 @@ static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsi
VKD3D_ASSERT(program->cf_type == VSIR_CF_STRUCTURED);
+ shader_instruction_array_init(&flattener.instructions, 0); + if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0) { - vkd3d_free(program->instructions.elements); - program->instructions.elements = flattener.instructions; - program->instructions.capacity = flattener.instruction_capacity; - program->instructions.count = flattener.instruction_count; + vsir_program_replace_instructions(program, &flattener.instructions); program->block_count = flattener.block_id; program->cf_type = VSIR_CF_BLOCKS; } else { - vkd3d_free(flattener.instructions); + shader_instruction_array_destroy(&flattener.instructions); }
vkd3d_free(flattener.control_flow_info); @@ -4190,17 +4951,6 @@ static unsigned int label_from_src_param(const struct vkd3d_shader_src_param *pa return param->reg.idx[0].offset; }
-static bool reserve_instructions(struct vkd3d_shader_instruction **instructions, size_t *capacity, size_t count) -{ - if (!vkd3d_array_reserve((void **)instructions, capacity, count, sizeof(**instructions))) - { - ERR("Failed to allocate instructions.\n"); - return false; - } - - return true; -} - /* A record represents replacing a jump from block `switch_label' to * block `target_label' with a jump from block `if_label' to block * `target_label'. */ @@ -4233,39 +4983,40 @@ static enum vkd3d_result vsir_program_lower_switch_to_selection_ladder(struct vs struct vsir_transformation_context *ctx) { unsigned int block_count = program->block_count, ssa_count = program->ssa_count, current_label = 0, if_label; - size_t ins_capacity = 0, ins_count = 0, i, map_capacity = 0, map_count = 0; - struct vkd3d_shader_instruction *instructions = NULL; + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); struct lower_switch_to_if_ladder_block_mapping *block_map = NULL; + struct vkd3d_shader_instruction_array instructions; + struct vkd3d_shader_instruction *ins, *dst_ins; + size_t map_capacity = 0, map_count = 0;
VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS);
- if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) - goto fail; + if (!shader_instruction_array_init(&instructions, program->instructions.count)) + return VKD3D_ERROR_OUT_OF_MEMORY;
/* First subpass: convert SWITCH_MONOLITHIC instructions to * selection ladders, keeping a map between blocks before and * after the subpass. */ - for (i = 0; i < program->instructions.count; ++i) + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; unsigned int case_count, j, default_label;
switch (ins->opcode) { case VSIR_OP_LABEL: current_label = label_from_src_param(&ins->src[0]); - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) + if (!(dst_ins = shader_instruction_array_append(&instructions))) goto fail; - instructions[ins_count++] = *ins; + *dst_ins = *ins; continue;
case VSIR_OP_SWITCH_MONOLITHIC: break;
default: - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) + if (!(dst_ins = shader_instruction_array_append(&instructions))) goto fail; - instructions[ins_count++] = *ins; + *dst_ins = *ins; continue; }
@@ -4276,32 +5027,33 @@ static enum vkd3d_result vsir_program_lower_switch_to_selection_ladder(struct vs * just have to jump to the default label. */ if (case_count == 0) { - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) + if (!(dst_ins = shader_instruction_array_append(&instructions))) goto fail;
- if (!vsir_instruction_init_with_params(program, &instructions[ins_count], - &ins->location, VSIR_OP_BRANCH, 0, 1)) + if (!vsir_instruction_init_with_params(program, dst_ins, &ins->location, VSIR_OP_BRANCH, 0, 1)) + { + vkd3d_shader_instruction_make_nop(dst_ins); goto fail; - vsir_src_param_init_label(&instructions[ins_count].src[0], default_label); - ++ins_count; + } + vsir_src_param_init_label(&dst_ins->src[0], default_label); }
- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 3 * case_count - 1)) - goto fail; - if_label = current_label;
for (j = 0; j < case_count; ++j) { unsigned int fallthrough_label, case_label = label_from_src_param(&ins->src[3 + 2 * j + 1]);
- if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &ins->location, VSIR_OP_IEQ, 1, 2)) + if (!(dst_ins = shader_instruction_array_append(&instructions))) + goto fail; + if (!vsir_instruction_init_with_params(program, dst_ins, &ins->location, VSIR_OP_IEQ, 1, 2)) + { + vkd3d_shader_instruction_make_nop(dst_ins); goto fail; - dst_param_init_ssa_bool(&instructions[ins_count].dst[0], ssa_count); - instructions[ins_count].src[0] = ins->src[0]; - instructions[ins_count].src[1] = ins->src[3 + 2 * j]; - ++ins_count; + } + dst_param_init_ssa_bool(&dst_ins->dst[0], ssa_count); + dst_ins->src[0] = ins->src[0]; + dst_ins->src[1] = ins->src[3 + 2 * j];
/* For all cases except the last one we fall through to * the following case; the last one has to jump to the @@ -4311,13 +5063,16 @@ static enum vkd3d_result vsir_program_lower_switch_to_selection_ladder(struct vs else fallthrough_label = block_count + 1;
- if (!vsir_instruction_init_with_params(program, &instructions[ins_count], - &ins->location, VSIR_OP_BRANCH, 0, 3)) + if (!(dst_ins = shader_instruction_array_append(&instructions))) + goto fail; + if (!vsir_instruction_init_with_params(program, dst_ins, &ins->location, VSIR_OP_BRANCH, 0, 3)) + { + vkd3d_shader_instruction_make_nop(dst_ins); goto fail; - src_param_init_ssa_bool(&instructions[ins_count].src[0], ssa_count); - vsir_src_param_init_label(&instructions[ins_count].src[1], case_label); - vsir_src_param_init_label(&instructions[ins_count].src[2], fallthrough_label); - ++ins_count; + } + src_param_init_ssa_bool(&dst_ins->src[0], ssa_count); + vsir_src_param_init_label(&dst_ins->src[1], case_label); + vsir_src_param_init_label(&dst_ins->src[2], fallthrough_label);
++ssa_count;
@@ -4333,29 +5088,29 @@ static enum vkd3d_result vsir_program_lower_switch_to_selection_ladder(struct vs } else { - if (!vsir_instruction_init_with_params(program, - &instructions[ins_count], &ins->location, VSIR_OP_LABEL, 0, 1)) + if (!(dst_ins = shader_instruction_array_append(&instructions))) goto fail; - vsir_src_param_init_label(&instructions[ins_count].src[0], ++block_count); - ++ins_count; + if (!vsir_instruction_init_with_params(program, dst_ins, &ins->location, VSIR_OP_LABEL, 0, 1)) + { + vkd3d_shader_instruction_make_nop(dst_ins); + goto fail; + } + vsir_src_param_init_label(&dst_ins->src[0], ++block_count);
if_label = block_count; } } }
- vkd3d_free(program->instructions.elements); + vsir_program_replace_instructions(program, &instructions); vkd3d_free(block_map); - program->instructions.elements = instructions; - program->instructions.capacity = ins_capacity; - program->instructions.count = ins_count; program->block_count = block_count; program->ssa_count = ssa_count;
return VKD3D_OK;
fail: - vkd3d_free(instructions); + shader_instruction_array_destroy(&instructions); vkd3d_free(block_map);
return VKD3D_ERROR_OUT_OF_MEMORY; @@ -4422,16 +5177,15 @@ static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *bl vkd3d_free(block_info); }
-static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program, - struct vsir_transformation_context *ctx) +static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps_in_function( + struct vsir_program *program, struct vsir_program_iterator *it) { - struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); - size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count; struct ssas_to_temps_block_info *info, *block_info = NULL; - struct vkd3d_shader_instruction *instructions = NULL; + struct vsir_program_iterator it_begin = *it; struct ssas_to_temps_alloc alloc = {0}; struct vkd3d_shader_instruction *ins; unsigned int current_label = 0; + size_t phi_count;
VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS);
@@ -4445,15 +5199,31 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ goto fail;
phi_count = 0; - incoming_count = 0; - for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) + for (ins = vsir_program_iterator_current(it); ins; ins = vsir_program_iterator_next(it)) { unsigned int j, temp_idx; + bool finish = false;
/* Only phi src/dst SSA values need be converted here. Structurisation may * introduce new cases of undominated SSA use, which will be handled later. */ - if (ins->opcode != VSIR_OP_PHI) - continue; + switch (ins->opcode) + { + case VSIR_OP_PHI: + break; + + case VSIR_OP_HS_CONTROL_POINT_PHASE: + case VSIR_OP_HS_FORK_PHASE: + case VSIR_OP_HS_JOIN_PHASE: + finish = true; + break; + + default: + continue; + } + + if (finish) + break; + ++phi_count;
temp_idx = alloc.next_temp_idx++; @@ -4477,8 +5247,6 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ incoming->dst = ins->dst;
alloc.table[ins->dst->reg.idx[0].offset] = temp_idx; - - ++incoming_count; }
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst->reg); @@ -4487,12 +5255,13 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ if (!phi_count) goto done;
- if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count + incoming_count - phi_count)) - goto fail; + *it = it_begin;
- for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) + for (ins = vsir_program_iterator_current(it); ins; ins = vsir_program_iterator_next(it)) { struct vkd3d_shader_instruction *mov_ins; + struct vkd3d_shader_location loc; + bool finish = false; size_t j;
for (j = 0; j < ins->dst_count; ++j) @@ -4511,33 +5280,46 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ case VSIR_OP_SWITCH_MONOLITHIC: info = &block_info[current_label - 1];
+ loc = ins->location; + if (!(mov_ins = vsir_program_iterator_insert_before_and_move(it, info->incoming_count))) + goto fail; + VKD3D_ASSERT(mov_ins); + for (j = 0; j < info->incoming_count; ++j) { struct phi_incoming_to_temp *incoming = &info->incomings[j];
- mov_ins = &instructions[ins_count++]; - if (!vsir_instruction_init_with_params(program, mov_ins, &ins->location, VSIR_OP_MOV, 1, 0)) + if (!vsir_instruction_init_with_params(program, mov_ins, &loc, VSIR_OP_MOV, 1, 0)) + { + vkd3d_shader_instruction_make_nop(mov_ins); goto fail; + } *mov_ins->dst = *incoming->dst; mov_ins->src = incoming->src; mov_ins->src_count = 1; + + mov_ins = vsir_program_iterator_next(it); } break;
case VSIR_OP_PHI: - continue; + vkd3d_shader_instruction_make_nop(ins); + break; + + case VSIR_OP_HS_CONTROL_POINT_PHASE: + case VSIR_OP_HS_FORK_PHASE: + case VSIR_OP_HS_JOIN_PHASE: + finish = true; + break;
default: break; }
- instructions[ins_count++] = *ins; + if (finish) + break; }
- vkd3d_free(program->instructions.elements); - program->instructions.elements = instructions; - program->instructions.capacity = ins_capacity; - program->instructions.count = ins_count; program->temp_count = alloc.next_temp_idx; done: ssas_to_temps_block_info_cleanup(block_info, program->block_count); @@ -4546,13 +5328,51 @@ done: return VKD3D_OK;
fail: - vkd3d_free(instructions); ssas_to_temps_block_info_cleanup(block_info, program->block_count); vkd3d_free(alloc.table);
return VKD3D_ERROR_OUT_OF_MEMORY; }
+static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vkd3d_shader_instruction *ins; + enum vkd3d_result ret; + + VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS); + + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_current(&it)) + { + switch (ins->opcode) + { + case VSIR_OP_LABEL: + VKD3D_ASSERT(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); + TRACE("Materializing undominated SSAs in a non-hull shader.\n"); + if ((ret = vsir_program_materialise_phi_ssas_to_temps_in_function(program, &it)) < 0) + return ret; + break; + + case VSIR_OP_HS_CONTROL_POINT_PHASE: + case VSIR_OP_HS_FORK_PHASE: + case VSIR_OP_HS_JOIN_PHASE: + VKD3D_ASSERT(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); + TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->opcode); + vsir_program_iterator_next(&it); + if ((ret = vsir_program_materialise_phi_ssas_to_temps_in_function(program, &it)) < 0) + return ret; + break; + + default: + vsir_program_iterator_next(&it); + break; + } + } + + return VKD3D_OK; +} + struct vsir_block_list { struct vsir_block **blocks; @@ -4607,7 +5427,7 @@ struct vsir_block * LABEL that introduces the block. `end' points to the terminator * instruction (either BRANCH or RET). They can coincide, meaning * that the block is empty. */ - struct vkd3d_shader_instruction *begin, *end; + struct vsir_program_iterator begin, end; struct vsir_block_list predecessors, successors; uint32_t *dominates; }; @@ -4790,8 +5610,8 @@ static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure)
struct vsir_cfg_emit_target { - struct vkd3d_shader_instruction *instructions; - size_t ins_capacity, ins_count; + struct vkd3d_shader_instruction_array instructions; + unsigned int jump_target_temp_idx; unsigned int temp_count; }; @@ -4800,8 +5620,8 @@ struct vsir_cfg { struct vkd3d_shader_message_context *message_context; struct vsir_program *program; - size_t function_begin; - size_t function_end; + struct vsir_program_iterator function_begin; + struct vsir_program_iterator function_end; struct vsir_block *blocks; struct vsir_block *entry; size_t block_count; @@ -4926,19 +5746,21 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) for (i = 0; i < cfg->block_count; ++i) { struct vsir_block *block = &cfg->blocks[i]; + struct vkd3d_shader_instruction *end; const char *shape;
if (block->label == 0) continue;
- switch (block->end->opcode) + end = vsir_program_iterator_current(&block->end); + switch (end->opcode) { case VSIR_OP_RET: shape = "trapezium"; break;
case VSIR_OP_BRANCH: - shape = vsir_register_is_label(&block->end->src[0].reg) ? "ellipse" : "box"; + shape = vsir_register_is_label(&end->src[0].reg) ? "ellipse" : "box"; break;
default: @@ -5046,19 +5868,20 @@ static void vsir_cfg_dump_structured_program(struct vsir_cfg *cfg) }
static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program, - struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target, - size_t *pos) + struct vkd3d_shader_message_context *message_context, + struct vsir_cfg_emit_target *target, struct vsir_program_iterator *it) { struct vsir_block *current_block = NULL; + struct vkd3d_shader_instruction *ins; + size_t i, defined_block_count = 0; enum vkd3d_result ret; - size_t i;
memset(cfg, 0, sizeof(*cfg)); cfg->message_context = message_context; cfg->program = program; cfg->block_count = program->block_count; cfg->target = target; - cfg->function_begin = *pos; + cfg->function_begin = *it;
vsir_block_list_init(&cfg->order);
@@ -5068,12 +5891,11 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program if (TRACE_ON()) vkd3d_string_buffer_init(&cfg->debug_buffer);
- for (i = *pos; i < program->instructions.count; ++i) + for (ins = vsir_program_iterator_current(it); ins; ins = vsir_program_iterator_next(it)) { - struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; bool finish = false;
- switch (instruction->opcode) + switch (ins->opcode) { case VSIR_OP_PHI: case VSIR_OP_SWITCH_MONOLITHIC: @@ -5081,7 +5903,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program
case VSIR_OP_LABEL: { - unsigned int label = label_from_src_param(&instruction->src[0]); + unsigned int label = label_from_src_param(&ins->src[0]);
VKD3D_ASSERT(!current_block); VKD3D_ASSERT(label > 0); @@ -5090,16 +5912,18 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program VKD3D_ASSERT(current_block->label == 0); if ((ret = vsir_block_init(current_block, label, program->block_count)) < 0) goto fail; - current_block->begin = &program->instructions.elements[i + 1]; + current_block->begin = *it; + vsir_program_iterator_next(¤t_block->begin); if (!cfg->entry) cfg->entry = current_block; + ++defined_block_count; break; }
case VSIR_OP_BRANCH: case VSIR_OP_RET: VKD3D_ASSERT(current_block); - current_block->end = instruction; + current_block->end = *it; current_block = NULL; break;
@@ -5118,33 +5942,36 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program break; }
- *pos = i; - cfg->function_end = *pos; + cfg->function_end = *it; + /* program->block_count is the max in any function. Set the count for this function. */ + cfg->block_count = defined_block_count;
for (i = 0; i < cfg->block_count; ++i) { struct vsir_block *block = &cfg->blocks[i]; + struct vkd3d_shader_instruction *end;
if (block->label == 0) continue;
- switch (block->end->opcode) + end = vsir_program_iterator_current(&block->end); + switch (end->opcode) { case VSIR_OP_RET: break;
case VSIR_OP_BRANCH: - if (vsir_register_is_label(&block->end->src[0].reg)) + if (vsir_register_is_label(&end->src[0].reg)) { - if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[0])) < 0) + if ((ret = vsir_cfg_add_edge(cfg, block, &end->src[0])) < 0) goto fail; } else { - if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[1])) < 0) + if ((ret = vsir_cfg_add_edge(cfg, block, &end->src[1])) < 0) goto fail;
- if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[2])) < 0) + if ((ret = vsir_cfg_add_edge(cfg, block, &end->src[2])) < 0) goto fail; } break; @@ -5328,8 +6155,10 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg)
if (cfg->loops_by_header[header->label - 1] != SIZE_MAX) { + struct vkd3d_shader_instruction *begin = vsir_program_iterator_current(&header->begin); + FIXME("Block %u is header to more than one loop, this is not implemented.\n", header->label); - vkd3d_shader_error(cfg->message_context, &header->begin->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + vkd3d_shader_error(cfg->message_context, &begin->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Block %u is header to more than one loop, this is not implemented.", header->label); return VKD3D_ERROR_NOT_IMPLEMENTED; } @@ -5411,8 +6240,10 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
if (in_degrees[i] == 0 && block != cfg->entry) { + struct vkd3d_shader_instruction *begin = vsir_program_iterator_current(&block->begin); + WARN("Unexpected entry point %u.\n", block->label); - vkd3d_shader_error(cfg->message_context, &block->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + vkd3d_shader_error(cfg->message_context, &begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Block %u is unreachable from the entry point.", block->label); ret = VKD3D_ERROR_INVALID_SHADER; goto fail; @@ -5421,8 +6252,10 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
if (in_degrees[cfg->entry->label - 1] != 0) { + struct vkd3d_shader_instruction *begin = vsir_program_iterator_current(&cfg->entry->begin); + WARN("Entry point has %u incoming forward edges.\n", in_degrees[cfg->entry->label - 1]); - vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + vkd3d_shader_error(cfg->message_context, &begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "The entry point block has %u incoming forward edges.", in_degrees[cfg->entry->label - 1]); ret = VKD3D_ERROR_INVALID_SHADER; goto fail; @@ -5524,9 +6357,11 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
if (cfg->order.count != cfg->block_count) { + struct vkd3d_shader_instruction *begin = vsir_program_iterator_current(&cfg->entry->begin); + /* There is a cycle of forward edges. */ WARN("The control flow graph is not reducible.\n"); - vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + vkd3d_shader_error(cfg->message_context, &begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "The control flow graph is not reducible."); ret = VKD3D_ERROR_INVALID_SHADER; goto fail; @@ -5785,6 +6620,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) { struct vsir_block *block = cfg->order.blocks[i]; struct vsir_cfg_structure *structure; + struct vkd3d_shader_instruction *end;
VKD3D_ASSERT(stack_depth > 0);
@@ -5810,16 +6646,17 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) structure->u.block = block;
/* Generate between zero and two jump instructions. */ - switch (block->end->opcode) + end = vsir_program_iterator_current(&block->end); + switch (end->opcode) { case VSIR_OP_BRANCH: { struct vsir_cfg_edge_action action_true, action_false; bool invert_condition = false;
- if (vsir_register_is_label(&block->end->src[0].reg)) + if (vsir_register_is_label(&end->src[0].reg)) { - unsigned int target = label_from_src_param(&block->end->src[0]); + unsigned int target = label_from_src_param(&end->src[0]); struct vsir_block *successor = &cfg->blocks[target - 1];
vsir_cfg_compute_edge_action(cfg, block, successor, &action_true); @@ -5827,12 +6664,12 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) } else { - unsigned int target = label_from_src_param(&block->end->src[1]); + unsigned int target = label_from_src_param(&end->src[1]); struct vsir_block *successor = &cfg->blocks[target - 1];
vsir_cfg_compute_edge_action(cfg, block, successor, &action_true);
- target = label_from_src_param(&block->end->src[2]); + target = label_from_src_param(&end->src[2]); successor = &cfg->blocks[target - 1];
vsir_cfg_compute_edge_action(cfg, block, successor, &action_false); @@ -5884,7 +6721,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) goto fail; structure->u.jump.type = action_true.jump_type; structure->u.jump.target = action_true.target; - structure->u.jump.condition = &block->end->src[0]; + structure->u.jump.condition = &vsir_program_iterator_current(&block->end)->src[0]; structure->u.jump.invert_condition = invert_condition; }
@@ -6369,15 +7206,19 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_block(struct vsir_cfg *cfg struct vsir_block *block) { struct vsir_cfg_emit_target *target = cfg->target; + struct vkd3d_shader_instruction *ins, *end, *dst; + struct vsir_program_iterator it;
- if (!reserve_instructions(&target->instructions, &target->ins_capacity, - target->ins_count + (block->end - block->begin))) - return VKD3D_ERROR_OUT_OF_MEMORY; - - memcpy(&target->instructions[target->ins_count], block->begin, - (char *)block->end - (char *)block->begin); + it = block->begin; + end = vsir_program_iterator_current(&block->end);
- target->ins_count += block->end - block->begin; + for (ins = vsir_program_iterator_current(&it); ins != end; ins = vsir_program_iterator_next(&it)) + { + if (!(dst = shader_instruction_array_append(&target->instructions))) + return VKD3D_ERROR_OUT_OF_MEMORY; + VKD3D_ASSERT(dst); + *dst = *ins; + }
return VKD3D_OK; } @@ -6387,20 +7228,20 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_loop(struct vsir_cfg *cfg, { struct vsir_cfg_emit_target *target = cfg->target; const struct vkd3d_shader_location no_loc = {0}; + struct vkd3d_shader_instruction *ins; enum vkd3d_result ret;
- if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) + if (!(ins = shader_instruction_array_append(&target->instructions))) return VKD3D_ERROR_OUT_OF_MEMORY;
- vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VSIR_OP_LOOP); + vsir_instruction_init(ins, &no_loc, VSIR_OP_LOOP);
if ((ret = vsir_cfg_structure_list_emit(cfg, &loop->body, loop->idx)) < 0) return ret;
- if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 5)) + if (!(ins = shader_instruction_array_append(&target->instructions))) return VKD3D_ERROR_OUT_OF_MEMORY; - - vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VSIR_OP_ENDLOOP); + vsir_instruction_init(ins, &no_loc, VSIR_OP_ENDLOOP);
/* Add a trampoline to implement multilevel jumping depending on the stored * jump_target value. */ @@ -6414,44 +7255,53 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_loop(struct vsir_cfg *cfg, * we have to reach an outer loop, so we keep breaking. */ const unsigned int inner_break_target = loop->idx << 1;
- if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], - &no_loc, VSIR_OP_IEQ, 1, 2)) + if (!(ins = shader_instruction_array_append(&target->instructions))) return VKD3D_ERROR_OUT_OF_MEMORY; + if (!vsir_instruction_init_with_params(cfg->program, ins, &no_loc, VSIR_OP_IEQ, 1, 2)) + { + vkd3d_shader_instruction_make_nop(ins); + return VKD3D_ERROR_OUT_OF_MEMORY; + }
- dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count); - src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx); - src_param_init_const_uint(&target->instructions[target->ins_count].src[1], outer_continue_target); + ++target->temp_count;
- ++target->ins_count; + dst_param_init_temp_bool(&ins->dst[0], target->temp_count - 1); + src_param_init_temp_uint(&ins->src[0], target->jump_target_temp_idx); + src_param_init_const_uint(&ins->src[1], outer_continue_target);
- if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], - &no_loc, VSIR_OP_CONTINUEP, 0, 1)) + if (!(ins = shader_instruction_array_append(&target->instructions))) return VKD3D_ERROR_OUT_OF_MEMORY; + if (!vsir_instruction_init_with_params(cfg->program, ins, &no_loc, VSIR_OP_CONTINUEP, 0, 1)) + { + vkd3d_shader_instruction_make_nop(ins); + return VKD3D_ERROR_OUT_OF_MEMORY; + }
- src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count); - - ++target->ins_count; - ++target->temp_count; + src_param_init_temp_bool(&ins->src[0], target->temp_count - 1);
- if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], - &no_loc, VSIR_OP_IEQ, 1, 2)) + ins = shader_instruction_array_append(&target->instructions); + if (!vsir_instruction_init_with_params(cfg->program, ins, &no_loc, VSIR_OP_IEQ, 1, 2)) + { + vkd3d_shader_instruction_make_nop(ins); return VKD3D_ERROR_OUT_OF_MEMORY; + }
- dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count); - src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx); - src_param_init_const_uint(&target->instructions[target->ins_count].src[1], inner_break_target); + ++target->temp_count;
- ++target->ins_count; + dst_param_init_temp_bool(&ins->dst[0], target->temp_count - 1); + src_param_init_temp_uint(&ins->src[0], target->jump_target_temp_idx); + src_param_init_const_uint(&ins->src[1], inner_break_target);
- if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], - &no_loc, VSIR_OP_BREAKP, 0, 1)) + if (!(ins = shader_instruction_array_append(&target->instructions))) return VKD3D_ERROR_OUT_OF_MEMORY; - target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; - - src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count); + if (!vsir_instruction_init_with_params(cfg->program, ins, &no_loc, VSIR_OP_BREAKP, 0, 1)) + { + vkd3d_shader_instruction_make_nop(ins); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ins->flags |= VKD3D_SHADER_CONDITIONAL_OP_Z;
- ++target->ins_count; - ++target->temp_count; + src_param_init_temp_bool(&ins->src[0], target->temp_count - 1); }
return VKD3D_OK; @@ -6462,40 +7312,40 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_selection(struct vsir_cfg { struct vsir_cfg_emit_target *target = cfg->target; const struct vkd3d_shader_location no_loc = {0}; + struct vkd3d_shader_instruction *ins; enum vkd3d_result ret;
- if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) + if (!(ins = shader_instruction_array_append(&target->instructions))) return VKD3D_ERROR_OUT_OF_MEMORY;
- if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], - &no_loc, VSIR_OP_IF, 0, 1)) + if (!vsir_instruction_init_with_params(cfg->program, ins, &no_loc, VSIR_OP_IF, 0, 1)) + { + vkd3d_shader_instruction_make_nop(ins); return VKD3D_ERROR_OUT_OF_MEMORY; + }
- target->instructions[target->ins_count].src[0] = *selection->condition; + ins->src[0] = *selection->condition;
if (selection->invert_condition) - target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; - - ++target->ins_count; + ins->flags |= VKD3D_SHADER_CONDITIONAL_OP_Z;
if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->if_body, loop_idx)) < 0) return ret;
if (selection->else_body.count != 0) { - if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) + if (!(ins = shader_instruction_array_append(&target->instructions))) return VKD3D_ERROR_OUT_OF_MEMORY;
- vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VSIR_OP_ELSE); + vsir_instruction_init(ins, &no_loc, VSIR_OP_ELSE);
if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->else_body, loop_idx)) < 0) return ret; }
- if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) + if (!(ins = shader_instruction_array_append(&target->instructions))) return VKD3D_ERROR_OUT_OF_MEMORY; - - vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VSIR_OP_ENDIF); + vsir_instruction_init(ins, &no_loc, VSIR_OP_ENDIF);
return VKD3D_OK; } @@ -6508,6 +7358,7 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, /* Encode the jump target as the loop index plus a bit to remember whether * we're breaking or continue-ing. */ unsigned int jump_target = jump->target << 1; + struct vkd3d_shader_instruction *ins; enum vkd3d_shader_opcode opcode;
switch (jump->type) @@ -6538,32 +7389,33 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, vkd3d_unreachable(); }
- if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 2)) - return VKD3D_ERROR_OUT_OF_MEMORY; - if (jump->needs_launcher) { - if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], - &no_loc, VSIR_OP_MOV, 1, 1)) + if (!(ins = shader_instruction_array_append(&target->instructions))) return VKD3D_ERROR_OUT_OF_MEMORY; + if (!vsir_instruction_init_with_params(cfg->program, ins, &no_loc, VSIR_OP_MOV, 1, 1)) + { + vkd3d_shader_instruction_make_nop(ins); + return VKD3D_ERROR_OUT_OF_MEMORY; + }
- dst_param_init_temp_uint(&target->instructions[target->ins_count].dst[0], target->jump_target_temp_idx); - src_param_init_const_uint(&target->instructions[target->ins_count].src[0], jump_target); - - ++target->ins_count; + dst_param_init_temp_uint(&ins->dst[0], target->jump_target_temp_idx); + src_param_init_const_uint(&ins->src[0], jump_target); }
- if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], - &no_loc, opcode, 0, !!jump->condition)) + if (!(ins = shader_instruction_array_append(&target->instructions))) + return VKD3D_ERROR_OUT_OF_MEMORY; + if (!vsir_instruction_init_with_params(cfg->program, ins, &no_loc, opcode, 0, !!jump->condition)) + { + vkd3d_shader_instruction_make_nop(ins); return VKD3D_ERROR_OUT_OF_MEMORY; + }
if (jump->invert_condition) - target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + ins->flags |= VKD3D_SHADER_CONDITIONAL_OP_Z;
if (jump->condition) - target->instructions[target->ins_count].src[0] = *jump->condition; - - ++target->ins_count; + ins->src[0] = *jump->condition;
return VKD3D_OK; } @@ -6616,13 +7468,13 @@ static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) }
static enum vkd3d_result vsir_program_structurize_function(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target, - size_t *pos) + struct vkd3d_shader_message_context *message_context, + struct vsir_cfg_emit_target *target, struct vsir_program_iterator *it) { enum vkd3d_result ret; struct vsir_cfg cfg;
- if ((ret = vsir_cfg_init(&cfg, program, message_context, target, pos)) < 0) + if ((ret = vsir_cfg_init(&cfg, program, message_context, target, it)) < 0) return ret;
vsir_cfg_compute_dominators(&cfg); @@ -6653,32 +7505,31 @@ out: static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, struct vsir_transformation_context *ctx) { + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); struct vkd3d_shader_message_context *message_context = ctx->message_context; + struct vkd3d_shader_instruction *ins, *dst; struct vsir_cfg_emit_target target = {0}; enum vkd3d_result ret; - size_t i;
VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS);
target.jump_target_temp_idx = program->temp_count; target.temp_count = program->temp_count + 1;
- if (!reserve_instructions(&target.instructions, &target.ins_capacity, program->instructions.count)) + if (!shader_instruction_array_init(&target.instructions, 0)) return VKD3D_ERROR_OUT_OF_MEMORY;
- for (i = 0; i < program->instructions.count;) + for (ins = vsir_program_iterator_head(&it); ins;) { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - switch (ins->opcode) { case VSIR_OP_LABEL: VKD3D_ASSERT(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); TRACE("Structurizing a non-hull shader.\n"); - if ((ret = vsir_program_structurize_function(program, message_context, - &target, &i)) < 0) + if ((ret = vsir_program_structurize_function(program, message_context, &target, &it)) < 0) goto fail; - VKD3D_ASSERT(i == program->instructions.count); + ins = vsir_program_iterator_current(&it); + VKD3D_ASSERT(!ins); break;
case VSIR_OP_HS_CONTROL_POINT_PHASE: @@ -6686,33 +7537,33 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, case VSIR_OP_HS_JOIN_PHASE: VKD3D_ASSERT(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); TRACE("Structurizing phase %u of a hull shader.\n", ins->opcode); - target.instructions[target.ins_count++] = *ins; - ++i; - if ((ret = vsir_program_structurize_function(program, message_context, - &target, &i)) < 0) + + dst = shader_instruction_array_append(&target.instructions); + *dst = *ins; + + vsir_program_iterator_next(&it); + if ((ret = vsir_program_structurize_function(program, message_context, &target, &it)) < 0) goto fail; + ins = vsir_program_iterator_current(&it); break;
default: - if (!reserve_instructions(&target.instructions, &target.ins_capacity, target.ins_count + 1)) + if (!(dst = shader_instruction_array_append(&target.instructions))) return VKD3D_ERROR_OUT_OF_MEMORY; - target.instructions[target.ins_count++] = *ins; - ++i; + *dst = *ins; + ins = vsir_program_iterator_next(&it); break; } }
- vkd3d_free(program->instructions.elements); - program->instructions.elements = target.instructions; - program->instructions.capacity = target.ins_capacity; - program->instructions.count = target.ins_count; + vsir_program_replace_instructions(program, &target.instructions); program->temp_count = target.temp_count; program->cf_type = VSIR_CF_STRUCTURED;
return VKD3D_OK;
fail: - vkd3d_free(target.instructions); + shader_instruction_array_destroy(&target.instructions);
return ret; } @@ -6742,8 +7593,10 @@ static void register_map_undominated_use(struct vkd3d_shader_register *reg, stru static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct vsir_cfg *cfg) { struct vsir_program *program = cfg->program; + struct vkd3d_shader_instruction *ins, *end; struct ssas_to_temps_alloc alloc = {0}; struct vsir_block **origin_blocks; + struct vsir_program_iterator it; unsigned int j; size_t i;
@@ -6761,12 +7614,13 @@ static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct v for (i = 0; i < cfg->block_count; ++i) { struct vsir_block *block = &cfg->blocks[i]; - struct vkd3d_shader_instruction *ins;
if (block->label == 0) continue;
- for (ins = block->begin; ins <= block->end; ++ins) + it = block->begin; + end = vsir_program_iterator_current(&block->end); + for (ins = vsir_program_iterator_current(&it); ins != end; ins = vsir_program_iterator_next(&it)) { for (j = 0; j < ins->dst_count; ++j) { @@ -6779,12 +7633,13 @@ static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct v for (i = 0; i < cfg->block_count; ++i) { struct vsir_block *block = &cfg->blocks[i]; - struct vkd3d_shader_instruction *ins;
if (block->label == 0) continue;
- for (ins = block->begin; ins <= block->end; ++ins) + it = block->begin; + end = vsir_program_iterator_current(&block->end); + for (ins = vsir_program_iterator_current(&it); ins != end; ins = vsir_program_iterator_next(&it)) { for (j = 0; j < ins->src_count; ++j) register_map_undominated_use(&ins->src[j].reg, &alloc, block, origin_blocks); @@ -6796,10 +7651,10 @@ static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct v
TRACE("Emitting temps for %u values with undominated usage.\n", alloc.next_temp_idx - program->temp_count);
- for (i = cfg->function_begin; i < cfg->function_end; ++i) + it = cfg->function_begin; + end = vsir_program_iterator_current(&cfg->function_end); + for (ins = vsir_program_iterator_current(&it); ins != end; ins = vsir_program_iterator_next(&it)) { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - for (j = 0; j < ins->dst_count; ++j) materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg);
@@ -6815,14 +7670,13 @@ done: return VKD3D_OK; }
-static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps_in_function( - struct vsir_program *program, struct vkd3d_shader_message_context *message_context, - size_t *pos) +static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps_in_function(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context, struct vsir_program_iterator *it) { enum vkd3d_result ret; struct vsir_cfg cfg;
- if ((ret = vsir_cfg_init(&cfg, program, message_context, NULL, pos)) < 0) + if ((ret = vsir_cfg_init(&cfg, program, message_context, NULL, it)) < 0) return ret;
vsir_cfg_compute_dominators(&cfg); @@ -6837,25 +7691,23 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps_in_f static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program, struct vsir_transformation_context *ctx) { + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); struct vkd3d_shader_message_context *message_context = ctx->message_context; + struct vkd3d_shader_instruction *ins; enum vkd3d_result ret; - size_t i;
VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS);
- for (i = 0; i < program->instructions.count;) + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_current(&it)) { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - switch (ins->opcode) { case VSIR_OP_LABEL: VKD3D_ASSERT(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); TRACE("Materializing undominated SSAs in a non-hull shader.\n"); if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( - program, message_context, &i)) < 0) + program, message_context, &it)) < 0) return ret; - VKD3D_ASSERT(i == program->instructions.count); break;
case VSIR_OP_HS_CONTROL_POINT_PHASE: @@ -6863,14 +7715,14 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru case VSIR_OP_HS_JOIN_PHASE: VKD3D_ASSERT(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->opcode); - ++i; + vsir_program_iterator_next(&it); if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( - program, message_context, &i)) < 0) + program, message_context, &it)) < 0) return ret; break;
default: - ++i; + vsir_program_iterator_next(&it); break; } } @@ -6881,7 +7733,6 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru static bool use_flat_interpolation(const struct vsir_program *program, struct vkd3d_shader_message_context *message_context, bool *flat) { - static const struct vkd3d_shader_location no_loc; const struct vkd3d_shader_parameter1 *parameter;
*flat = false; @@ -6891,13 +7742,13 @@ static bool use_flat_interpolation(const struct vsir_program *program,
if (parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) { - vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Unsupported flat interpolation parameter type %#x.", parameter->type); return false; } if (parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) { - vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid flat interpolation parameter data type %#x.", parameter->data_type); return false; } @@ -6938,7 +7789,6 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr uint32_t colour_temp, struct vkd3d_shader_message_context *message_context) { struct vkd3d_shader_location loc = vsir_program_iterator_current(it)->location; - static const struct vkd3d_shader_location no_loc; struct vkd3d_shader_instruction *ins;
static const struct @@ -6959,10 +7809,8 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr
if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_NEVER) { - vsir_program_iterator_prev(it); - if (!vsir_program_iterator_insert_after(it, 1)) + if (!(ins = vsir_program_iterator_insert_before_and_move(it, 1))) return VKD3D_ERROR_OUT_OF_MEMORY; - ins = vsir_program_iterator_next(it);
vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_DISCARD, 0, 1); ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; @@ -6972,10 +7820,8 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr return VKD3D_OK; }
- vsir_program_iterator_prev(it); - if (!vsir_program_iterator_insert_after(it, 3)) + if (!(ins = vsir_program_iterator_insert_before_and_move(it, 3))) return VKD3D_ERROR_OUT_OF_MEMORY; - ins = vsir_program_iterator_next(it);
switch (ref->data_type) { @@ -6994,7 +7840,7 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr break;
case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4: - vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER, + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER, "Alpha test reference data type must be a single component."); return VKD3D_ERROR_INVALID_ARGUMENT;
@@ -7036,7 +7882,6 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro struct vkd3d_shader_message_context *message_context = ctx->message_context; const struct vkd3d_shader_parameter1 *func = NULL, *ref = NULL; uint32_t colour_signature_idx, colour_temp = ~0u; - static const struct vkd3d_shader_location no_loc; enum vkd3d_shader_comparison_func compare_func; struct vkd3d_shader_instruction *ins; int ret; @@ -7054,13 +7899,13 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro
if (func->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) { - vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Unsupported alpha test function parameter type %#x.", func->type); return VKD3D_ERROR_NOT_IMPLEMENTED; } if (func->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) { - vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid alpha test function parameter data type %#x.", func->data_type); return VKD3D_ERROR_INVALID_ARGUMENT; } @@ -7117,10 +7962,8 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog struct vkd3d_shader_instruction *ins; unsigned int output_idx = 0;
- vsir_program_iterator_prev(it); - if (!vsir_program_iterator_insert_after(it, vkd3d_popcount(mask) + 1)) + if (!(ins = vsir_program_iterator_insert_before_and_move(it, vkd3d_popcount(mask) + 1))) return VKD3D_ERROR_OUT_OF_MEMORY; - ins = vsir_program_iterator_next(it);
for (unsigned int i = 0; i < 8; ++i) { @@ -7166,7 +8009,6 @@ static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *pr unsigned int low_signature_idx = ~0u, high_signature_idx = ~0u; const struct vkd3d_shader_parameter1 *mask_parameter = NULL; uint32_t position_signature_idx, position_temp, mask; - static const struct vkd3d_shader_location no_loc; struct signature_element *clip_element; struct vkd3d_shader_instruction *ins; unsigned int plane_count; @@ -7188,13 +8030,13 @@ static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *pr
if (mask_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) { - vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + vkd3d_shader_error(ctx->message_context, NULL, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Unsupported clip plane mask parameter type %#x.", mask_parameter->type); return VKD3D_ERROR_NOT_IMPLEMENTED; } if (mask_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) { - vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + vkd3d_shader_error(ctx->message_context, NULL, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid clip plane mask parameter data type %#x.", mask_parameter->data_type); return VKD3D_ERROR_INVALID_ARGUMENT; } @@ -7207,7 +8049,7 @@ static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *pr { if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_CLIP_DISTANCE) { - vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER, + vkd3d_shader_error(ctx->message_context, &ctx->null_location, VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER, "Clip planes cannot be used if the shader writes clip distance."); return VKD3D_ERROR_INVALID_ARGUMENT; } @@ -7215,7 +8057,7 @@ static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *pr
if (!vsir_signature_find_sysval(signature, VKD3D_SHADER_SV_POSITION, 0, &position_signature_idx)) { - vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, + vkd3d_shader_error(ctx->message_context, &ctx->null_location, VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, "Shader does not write position."); return VKD3D_ERROR_INVALID_SHADER; } @@ -7288,10 +8130,8 @@ static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *progr const struct vkd3d_shader_location loc = vsir_program_iterator_current(it)->location; struct vkd3d_shader_instruction *ins;
- vsir_program_iterator_prev(it); - if (!vsir_program_iterator_insert_after(it, 1)) + if (!(ins = vsir_program_iterator_insert_before_and_move(it, 1))) return VKD3D_ERROR_OUT_OF_MEMORY; - ins = vsir_program_iterator_next(it);
vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_MOV, 1, 1); vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VSIR_DATA_F32, 1); @@ -7307,7 +8147,6 @@ static enum vkd3d_result vsir_program_insert_point_size(struct vsir_program *pro { struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); const struct vkd3d_shader_parameter1 *size_parameter = NULL; - static const struct vkd3d_shader_location no_loc; struct vkd3d_shader_instruction *ins;
if (program->has_point_size) @@ -7329,7 +8168,7 @@ static enum vkd3d_result vsir_program_insert_point_size(struct vsir_program *pro
if (size_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) { - vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + vkd3d_shader_error(ctx->message_context, NULL, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid point size parameter data type %#x.", size_parameter->data_type); return VKD3D_ERROR_INVALID_ARGUMENT; } @@ -7356,7 +8195,6 @@ static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_progra { const struct vkd3d_shader_parameter1 *min_parameter = NULL, *max_parameter = NULL; struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); - static const struct vkd3d_shader_location no_loc; struct vkd3d_shader_instruction *ins;
if (!program->has_point_size) @@ -7380,14 +8218,14 @@ static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_progra
if (min_parameter && min_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) { - vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + vkd3d_shader_error(ctx->message_context, NULL, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid minimum point size parameter data type %#x.", min_parameter->data_type); return VKD3D_ERROR_INVALID_ARGUMENT; }
if (max_parameter && max_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) { - vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + vkd3d_shader_error(ctx->message_context, NULL, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid maximum point size parameter data type %#x.", max_parameter->data_type); return VKD3D_ERROR_INVALID_ARGUMENT; } @@ -7530,7 +8368,7 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr { struct vsir_program_iterator it = vsir_program_iterator(&program->instructions), it2; const struct vkd3d_shader_parameter1 *sprite_parameter = NULL; - static const struct vkd3d_shader_location no_loc; + struct vkd3d_shader_location loc = ctx->null_location; struct vkd3d_shader_instruction *ins; bool used_texcoord = false; unsigned int coord_temp; @@ -7552,13 +8390,13 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr
if (sprite_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) { - vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + vkd3d_shader_error(ctx->message_context, NULL, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Unsupported point sprite parameter type %#x.", sprite_parameter->type); return VKD3D_ERROR_NOT_IMPLEMENTED; } if (sprite_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) { - vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + vkd3d_shader_error(ctx->message_context, NULL, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid point sprite parameter data type %#x.", sprite_parameter->data_type); return VKD3D_ERROR_INVALID_ARGUMENT; } @@ -7578,7 +8416,10 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) { if (!vsir_instruction_is_dcl(ins) && ins->opcode != VSIR_OP_LABEL && ins->opcode != VSIR_OP_NOP) + { + loc = ins->location; break; + } }
it2 = it; @@ -7613,12 +8454,10 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr
if (used_texcoord) { - vsir_program_iterator_prev(&it); - if (!vsir_program_iterator_insert_after(&it, 2)) + if (!(ins = vsir_program_iterator_insert_before_and_move(&it, 2))) return VKD3D_ERROR_OUT_OF_MEMORY; - ins = vsir_program_iterator_next(&it);
- vsir_instruction_init_with_params(program, ins, &no_loc, VSIR_OP_MOV, 1, 1); + vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_MOV, 1, 1); dst_param_init_temp_float4(&ins->dst[0], coord_temp); ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; vsir_src_param_init(&ins->src[0], VKD3DSPR_POINT_COORD, VSIR_DATA_F32, 0); @@ -7626,11 +8465,12 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; ins = vsir_program_iterator_next(&it);
- vsir_instruction_init_with_params(program, ins, &no_loc, VSIR_OP_MOV, 1, 1); + vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_MOV, 1, 1); dst_param_init_temp_float4(&ins->dst[0], coord_temp); ins->dst[0].write_mask = VKD3DSP_WRITEMASK_2 | VKD3DSP_WRITEMASK_3; vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VSIR_DATA_F32, 0); ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; vsir_program_iterator_next(&it);
program->has_point_coord = true; @@ -7668,66 +8508,63 @@ static enum vkd3d_result vsir_program_add_fog_input(struct vsir_program *program }
static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *program, - const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_fog_fragment_mode mode, - uint32_t fog_signature_idx, uint32_t colour_signature_idx, uint32_t colour_temp, - size_t *ret_pos, struct vkd3d_shader_message_context *message_context) + struct vsir_program_iterator *it, enum vkd3d_shader_fog_fragment_mode mode, uint32_t fog_signature_idx, + uint32_t colour_signature_idx, uint32_t colour_temp, struct vkd3d_shader_message_context *message_context) { - struct vkd3d_shader_instruction_array *instructions = &program->instructions; - struct vkd3d_shader_location loc = ret->location; + struct vkd3d_shader_instruction *ins = vsir_program_iterator_current(it); + struct vkd3d_shader_location loc = ins->location; uint32_t ssa_factor = program->ssa_count++; - size_t pos = ret - instructions->elements; - struct vkd3d_shader_instruction *ins; - uint32_t ssa_temp, ssa_temp2; + uint32_t ssa_temp, ssa_temp2, ssa_temp3;
switch (mode) { case VKD3D_SHADER_FOG_FRAGMENT_LINEAR: /* We generate the following code: * - * add sr0, FOG_END, -vFOG.x - * mul_sat srFACTOR, sr0, FOG_SCALE + * neg sr0, vFOG.x + * add sr1, FOG_END, sr0 + * mul srFACTOR, sr1, FOG_SCALE */ - if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) + if (!(ins = vsir_program_iterator_insert_before_and_move(it, 7))) return VKD3D_ERROR_OUT_OF_MEMORY; - ret = NULL; - - *ret_pos = pos + 4;
ssa_temp = program->ssa_count++; + ssa_temp2 = program->ssa_count++;
- ins = &program->instructions.elements[pos]; + vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_NEG, 1, 1); + dst_param_init_ssa_float(&ins->dst[0], ssa_temp); + vsir_src_param_init(&ins->src[0], VKD3DSPR_INPUT, VSIR_DATA_F32, 1); + ins->src[0].reg.idx[0].offset = fog_signature_idx; + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + ins = vsir_program_iterator_next(it);
vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_ADD, 1, 2); - dst_param_init_ssa_float(&ins->dst[0], ssa_temp); + dst_param_init_ssa_float(&ins->dst[0], ssa_temp2); src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_END, VSIR_DATA_F32); - vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VSIR_DATA_F32, 1); - ins->src[1].reg.idx[0].offset = fog_signature_idx; - ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; - ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - ins->src[1].modifiers = VKD3DSPSM_NEG; + src_param_init_ssa_float(&ins->src[1], ssa_temp); + ins = vsir_program_iterator_next(it);
- vsir_instruction_init_with_params(program, ++ins, &loc, VSIR_OP_MUL, 1, 2); + vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_MUL, 1, 2); dst_param_init_ssa_float(&ins->dst[0], ssa_factor); - ins->dst[0].modifiers = VKD3DSPDM_SATURATE; - src_param_init_ssa_float(&ins->src[0], ssa_temp); + src_param_init_ssa_float(&ins->src[0], ssa_temp2); src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VSIR_DATA_F32); + ins = vsir_program_iterator_next(it); + break;
case VKD3D_SHADER_FOG_FRAGMENT_EXP: /* We generate the following code: * * mul sr0, FOG_SCALE, vFOG.x - * exp_sat srFACTOR, -sr0 + * neg sr1, sr0 + * exp srFACTOR, sr1 */ - if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) + if (!(ins = vsir_program_iterator_insert_before_and_move(it, 7))) return VKD3D_ERROR_OUT_OF_MEMORY; - ret = NULL; - - *ret_pos = pos + 4;
ssa_temp = program->ssa_count++; - - ins = &program->instructions.elements[pos]; + ssa_temp2 = program->ssa_count++;
vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_MUL, 1, 2); dst_param_init_ssa_float(&ins->dst[0], ssa_temp); @@ -7736,12 +8573,17 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro ins->src[1].reg.idx[0].offset = fog_signature_idx; ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + ins = vsir_program_iterator_next(it);
- vsir_instruction_init_with_params(program, ++ins, &loc, VSIR_OP_EXP, 1, 1); - dst_param_init_ssa_float(&ins->dst[0], ssa_factor); - ins->dst[0].modifiers = VKD3DSPDM_SATURATE; + vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_NEG, 1, 1); + dst_param_init_ssa_float(&ins->dst[0], ssa_temp2); src_param_init_ssa_float(&ins->src[0], ssa_temp); - ins->src[0].modifiers = VKD3DSPSM_NEG; + ins = vsir_program_iterator_next(it); + + vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_EXP, 1, 1); + dst_param_init_ssa_float(&ins->dst[0], ssa_factor); + src_param_init_ssa_float(&ins->src[0], ssa_temp2); + ins = vsir_program_iterator_next(it); break;
case VKD3D_SHADER_FOG_FRAGMENT_EXP2: @@ -7749,18 +8591,15 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro * * mul sr0, FOG_SCALE, vFOG.x * mul sr1, sr0, sr0 - * exp_sat srFACTOR, -sr1 + * neg sr2, sr1 + * exp srFACTOR, sr2 */ - if (!shader_instruction_array_insert_at(&program->instructions, pos, 5)) + if (!(ins = vsir_program_iterator_insert_before_and_move(it, 8))) return VKD3D_ERROR_OUT_OF_MEMORY; - ret = NULL; - - *ret_pos = pos + 5;
ssa_temp = program->ssa_count++; ssa_temp2 = program->ssa_count++; - - ins = &program->instructions.elements[pos]; + ssa_temp3 = program->ssa_count++;
vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_MUL, 1, 2); dst_param_init_ssa_float(&ins->dst[0], ssa_temp); @@ -7769,17 +8608,23 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro ins->src[1].reg.idx[0].offset = fog_signature_idx; ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + ins = vsir_program_iterator_next(it);
- vsir_instruction_init_with_params(program, ++ins, &loc, VSIR_OP_MUL, 1, 2); + vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_MUL, 1, 2); dst_param_init_ssa_float(&ins->dst[0], ssa_temp2); src_param_init_ssa_float(&ins->src[0], ssa_temp); src_param_init_ssa_float(&ins->src[1], ssa_temp); + ins = vsir_program_iterator_next(it);
- vsir_instruction_init_with_params(program, ++ins, &loc, VSIR_OP_EXP, 1, 1); - dst_param_init_ssa_float(&ins->dst[0], ssa_factor); - ins->dst[0].modifiers = VKD3DSPDM_SATURATE; + vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_NEG, 1, 1); + dst_param_init_ssa_float(&ins->dst[0], ssa_temp3); src_param_init_ssa_float(&ins->src[0], ssa_temp2); - ins->src[0].modifiers = VKD3DSPSM_NEG; + ins = vsir_program_iterator_next(it); + + vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_EXP, 1, 1); + dst_param_init_ssa_float(&ins->dst[0], ssa_factor); + src_param_init_ssa_float(&ins->src[0], ssa_temp3); + ins = vsir_program_iterator_next(it); break;
default: @@ -7788,22 +8633,38 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro
/* We generate the following code: * - * add sr0, FRAG_COLOUR, -FOG_COLOUR - * mad oC0, sr0, srFACTOR, FOG_COLOUR + * neg sr0, FOG_COLOUR + * add sr1, FRAG_COLOUR, sr0 + * saturate sr2, srFACTOR + * mad oC0, sr1, sr2, FOG_COLOUR */ + ssa_temp = program->ssa_count++; + ssa_temp2 = program->ssa_count++; + ssa_temp3 = program->ssa_count++; + + vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_NEG, 1, 1); + dst_param_init_ssa_float4(&ins->dst[0], ssa_temp); + src_param_init_parameter_vec4(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, VSIR_DATA_F32); + ins = vsir_program_iterator_next(it);
- vsir_instruction_init_with_params(program, ++ins, &loc, VSIR_OP_ADD, 1, 2); - dst_param_init_ssa_float4(&ins->dst[0], program->ssa_count++); + vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_ADD, 1, 2); + dst_param_init_ssa_float4(&ins->dst[0], ssa_temp2); src_param_init_temp_float4(&ins->src[0], colour_temp); - src_param_init_parameter_vec4(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, VSIR_DATA_F32); - ins->src[1].modifiers = VKD3DSPSM_NEG; + src_param_init_ssa_float4(&ins->src[1], ssa_temp); + ins = vsir_program_iterator_next(it); + + vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_SATURATE, 1, 1); + dst_param_init_ssa_float(&ins->dst[0], ssa_temp3); + src_param_init_ssa_float(&ins->src[0], ssa_factor); + ins = vsir_program_iterator_next(it);
- vsir_instruction_init_with_params(program, ++ins, &loc, VSIR_OP_MAD, 1, 3); + vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_MAD, 1, 3); dst_param_init_output(&ins->dst[0], VSIR_DATA_F32, colour_signature_idx, program->output_signature.elements[colour_signature_idx].mask); - src_param_init_ssa_float4(&ins->src[0], program->ssa_count - 1); - src_param_init_ssa_float(&ins->src[1], ssa_factor); + src_param_init_ssa_float4(&ins->src[0], ssa_temp2); + src_param_init_ssa_float(&ins->src[1], ssa_temp3); src_param_init_parameter_vec4(&ins->src[2], VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, VSIR_DATA_F32); + ins = vsir_program_iterator_next(it);
return VKD3D_OK; } @@ -7811,14 +8672,13 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro static enum vkd3d_result vsir_program_insert_fragment_fog(struct vsir_program *program, struct vsir_transformation_context *ctx) { + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); struct vkd3d_shader_message_context *message_context = ctx->message_context; uint32_t colour_signature_idx, fog_signature_idx, colour_temp; const struct vkd3d_shader_parameter1 *mode_parameter = NULL; - static const struct vkd3d_shader_location no_loc; const struct signature_element *fog_element; enum vkd3d_shader_fog_fragment_mode mode; struct vkd3d_shader_instruction *ins; - size_t new_pos; int ret;
if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) @@ -7832,13 +8692,13 @@ static enum vkd3d_result vsir_program_insert_fragment_fog(struct vsir_program *p
if (mode_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) { - vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Unsupported fog fragment mode parameter type %#x.", mode_parameter->type); return VKD3D_ERROR_NOT_IMPLEMENTED; } if (mode_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) { - vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid fog fragment mode parameter data type %#x.", mode_parameter->data_type); return VKD3D_ERROR_INVALID_ARGUMENT; } @@ -7859,19 +8719,16 @@ static enum vkd3d_result vsir_program_insert_fragment_fog(struct vsir_program *p * through the whole shader and convert it to a temp. */ colour_temp = program->temp_count++;
- for (size_t i = 0; i < program->instructions.count; ++i) + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) { - ins = &program->instructions.elements[i]; - if (vsir_instruction_is_dcl(ins)) continue;
if (ins->opcode == VSIR_OP_RET) { - if ((ret = insert_fragment_fog_before_ret(program, ins, mode, fog_signature_idx, - colour_signature_idx, colour_temp, &new_pos, message_context)) < 0) + if ((ret = insert_fragment_fog_before_ret(program, &it, mode, fog_signature_idx, + colour_signature_idx, colour_temp, message_context)) < 0) return ret; - i = new_pos; continue; }
@@ -7927,21 +8784,16 @@ static enum vkd3d_result vsir_program_add_fog_output(struct vsir_program *progra return VKD3D_OK; }
-static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *program, - const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_fog_source source, uint32_t temp, - uint32_t fog_signature_idx, uint32_t source_signature_idx, size_t *ret_pos) +static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *program, struct vsir_program_iterator *it, + enum vkd3d_shader_fog_source source, uint32_t temp, uint32_t fog_signature_idx, uint32_t source_signature_idx) { const struct signature_element *e = &program->output_signature.elements[source_signature_idx]; - struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_instruction *ret = vsir_program_iterator_current(it); const struct vkd3d_shader_location loc = ret->location; - size_t pos = ret - instructions->elements; struct vkd3d_shader_instruction *ins;
- if (!shader_instruction_array_insert_at(&program->instructions, pos, 2)) + if (!(ins = vsir_program_iterator_insert_before_and_move(it, 2))) return VKD3D_ERROR_OUT_OF_MEMORY; - ret = NULL; - - ins = &program->instructions.elements[pos];
/* Write the fog output. */ vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_MOV, 1, 1); @@ -7951,26 +8803,26 @@ static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *progr ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z); else /* Position or specular W. */ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); - ++ins; + ins = vsir_program_iterator_next(it);
/* Write the position or specular output. */ vsir_instruction_init_with_params(program, ins, &loc, VSIR_OP_MOV, 1, 1); dst_param_init_output(&ins->dst[0], vsir_data_type_from_component_type(e->component_type), source_signature_idx, e->mask); src_param_init_temp_float4(&ins->src[0], temp); - ++ins; + ins = vsir_program_iterator_next(it);
- *ret_pos = pos + 2; return VKD3D_OK; }
static enum vkd3d_result vsir_program_insert_vertex_fog(struct vsir_program *program, struct vsir_transformation_context *ctx) { + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); struct vkd3d_shader_message_context *message_context = ctx->message_context; const struct vkd3d_shader_parameter1 *source_parameter = NULL; uint32_t fog_signature_idx, source_signature_idx, temp; - static const struct vkd3d_shader_location no_loc; + struct vkd3d_shader_instruction *ins; enum vkd3d_shader_fog_source source; const struct signature_element *e;
@@ -7982,13 +8834,13 @@ static enum vkd3d_result vsir_program_insert_vertex_fog(struct vsir_program *pro
if (source_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) { - vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Unsupported fog source parameter type %#x.", source_parameter->type); return VKD3D_ERROR_NOT_IMPLEMENTED; } if (source_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) { - vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid fog source parameter data type %#x.", source_parameter->data_type); return VKD3D_ERROR_INVALID_ARGUMENT; } @@ -8010,7 +8862,7 @@ static enum vkd3d_result vsir_program_insert_vertex_fog(struct vsir_program *pro if (!vsir_signature_find_sysval(&program->output_signature, VKD3D_SHADER_SV_POSITION, 0, &source_signature_idx)) { - vkd3d_shader_error(ctx->message_context, &no_loc, + vkd3d_shader_error(ctx->message_context, &ctx->null_location, VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, "Shader does not write position."); return VKD3D_ERROR_INVALID_SHADER; } @@ -8027,22 +8879,18 @@ static enum vkd3d_result vsir_program_insert_vertex_fog(struct vsir_program *pro
/* Insert a fog write before each ret, and convert either specular or * position output to a temp. */ - for (size_t i = 0; i < program->instructions.count; ++i) + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - if (vsir_instruction_is_dcl(ins)) continue;
if (ins->opcode == VSIR_OP_RET) { - size_t new_pos; int ret;
- if ((ret = insert_vertex_fog_before_ret(program, ins, source, temp, - fog_signature_idx, source_signature_idx, &new_pos)) < 0) + if ((ret = insert_vertex_fog_before_ret(program, &it, source, temp, + fog_signature_idx, source_signature_idx)) < 0) return ret; - i = new_pos; continue; }
@@ -8158,6 +9006,7 @@ static bool vsir_src_is_masked(enum vkd3d_shader_opcode opcode, unsigned int src case VSIR_OP_MOVC: case VSIR_OP_MSAD: /* FIXME: Is this correct? */ case VSIR_OP_MUL: + case VSIR_OP_NEG: case VSIR_OP_NEO: case VSIR_OP_NEU: case VSIR_OP_NOT: @@ -8174,6 +9023,7 @@ static bool vsir_src_is_masked(enum vkd3d_shader_opcode opcode, unsigned int src case VSIR_OP_ROUND_PI: case VSIR_OP_ROUND_Z: case VSIR_OP_RSQ: + case VSIR_OP_SATURATE: case VSIR_OP_SETP: case VSIR_OP_SGE: case VSIR_OP_SGN: @@ -8449,8 +9299,8 @@ struct liveness_tracker bool written; bool fixed_mask; uint8_t mask; - unsigned int first_write, last_access; - } *ssa_regs; + unsigned int first_write, last_access, last_read; + } *ssa_regs, *temp_regs; };
static void liveness_track_src(struct liveness_tracker *tracker, @@ -8463,7 +9313,15 @@ static void liveness_track_src(struct liveness_tracker *tracker, }
if (src->reg.type == VKD3DSPR_SSA) + { + tracker->ssa_regs[src->reg.idx[0].offset].last_read = index; tracker->ssa_regs[src->reg.idx[0].offset].last_access = index; + } + else if (src->reg.type == VKD3DSPR_TEMP) + { + tracker->temp_regs[src->reg.idx[0].offset].last_read = index; + tracker->temp_regs[src->reg.idx[0].offset].last_access = index; + } }
static void liveness_track_dst(struct liveness_tracker *tracker, struct vkd3d_shader_dst_param *dst, @@ -8479,6 +9337,8 @@ static void liveness_track_dst(struct liveness_tracker *tracker, struct vkd3d_sh
if (dst->reg.type == VKD3DSPR_SSA) reg = &tracker->ssa_regs[dst->reg.idx[0].offset]; + else if (dst->reg.type == VKD3DSPR_TEMP) + reg = &tracker->temp_regs[dst->reg.idx[0].offset]; else return;
@@ -8565,20 +9425,22 @@ static void liveness_tracker_cleanup(struct liveness_tracker *tracker)
static enum vkd3d_result track_liveness(struct vsir_program *program, struct liveness_tracker *tracker) { + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vkd3d_shader_instruction *ins; struct liveness_tracker_reg *regs; unsigned int loop_depth = 0; unsigned int loop_start = 0; + unsigned int i;
memset(tracker, 0, sizeof(*tracker));
- if (!(regs = vkd3d_calloc(program->ssa_count, sizeof(*regs)))) + if (!(regs = vkd3d_calloc(program->ssa_count + program->temp_count, sizeof(*regs)))) return VKD3D_ERROR_OUT_OF_MEMORY; tracker->ssa_regs = regs; + tracker->temp_regs = ®s[program->ssa_count];
- for (unsigned int i = 0; i < program->instructions.count; ++i) + for (ins = vsir_program_iterator_head(&it), i = 0; ins; ins = vsir_program_iterator_next(&it), ++i) { - const struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - if (ins->opcode == VSIR_OP_LOOP || ins->opcode == VSIR_OP_REP) { if (!loop_depth++) @@ -8604,8 +9466,7 @@ static enum vkd3d_result track_liveness(struct vsir_program *program, struct liv * should be illegal for an SSA value to be read in a block * containing L.) * We don't try to perform this optimization yet, in the name of - * maximal simplicity, and also because this code is intended to - * be extended to non-SSA values. */ + * maximal simplicity. */ for (unsigned int j = 0; j < program->ssa_count; ++j) { struct liveness_tracker_reg *reg = &tracker->ssa_regs[j]; @@ -8614,6 +9475,20 @@ static enum vkd3d_result track_liveness(struct vsir_program *program, struct liv reg->first_write = loop_start; if (reg->last_access < i) reg->last_access = i; + if (reg->last_read < i) + reg->last_read = i; + } + + for (unsigned int j = 0; j < program->temp_count; ++j) + { + struct liveness_tracker_reg *reg = &tracker->temp_regs[j]; + + if (reg->first_write > loop_start) + reg->first_write = loop_start; + if (reg->last_access < i) + reg->last_access = i; + if (reg->last_read < i) + reg->last_read = i; } } } @@ -8632,75 +9507,21 @@ struct temp_allocator struct vkd3d_shader_message_context *message_context; struct temp_allocator_reg { + struct liveness_tracker_reg *liveness_reg; uint8_t allocated_mask; uint32_t temp_id; - } *ssa_regs; - size_t allocated_ssa_count; + enum vkd3d_shader_register_type type; + unsigned int idx; + bool force_first; + bool fixed_mask; + } *ssa_regs, *temp_regs; + size_t ssa_count, temp_count; + unsigned int new_temp_count; enum vkd3d_result result; + uint8_t *current_allocation; + bool ps_1_x; };
-static uint8_t get_available_writemask(const struct temp_allocator *allocator, - struct liveness_tracker *tracker, unsigned int first_write, unsigned int last_access, uint32_t temp_id) -{ - uint8_t writemask = VKD3DSP_WRITEMASK_ALL; - - for (size_t i = 0; i < allocator->allocated_ssa_count; ++i) - { - const struct temp_allocator_reg *reg = &allocator->ssa_regs[i]; - const struct liveness_tracker_reg *liveness_reg = &tracker->ssa_regs[i]; - - /* We do not overlap if first write == last read: - * this is the case where we are allocating the result of that - * expression, e.g. "add r0, r0, r1". */ - - if (reg->temp_id == temp_id - && first_write < liveness_reg->last_access - && last_access > liveness_reg->first_write) - writemask &= ~reg->allocated_mask; - - if (!writemask) - return writemask; - } - - return writemask; -} - -static bool temp_allocator_allocate(struct temp_allocator *allocator, struct liveness_tracker *tracker, - struct temp_allocator_reg *reg, const struct liveness_tracker_reg *liveness_reg, uint32_t base_id) -{ - if (!liveness_reg->written) - return false; - - for (uint32_t id = base_id;; ++id) - { - uint8_t available_mask = get_available_writemask(allocator, tracker, - liveness_reg->first_write, liveness_reg->last_access, id); - - if (liveness_reg->fixed_mask) - { - if ((available_mask & liveness_reg->mask) == liveness_reg->mask) - { - reg->temp_id = id; - reg->allocated_mask = liveness_reg->mask; - return true; - } - } - else - { - /* For SSA values the mask is always zero-based and contiguous. - * We don't correctly handle cases where it's not, currently. */ - VKD3D_ASSERT((liveness_reg->mask | (liveness_reg->mask - 1)) == liveness_reg->mask); - - if (vkd3d_popcount(available_mask) >= vkd3d_popcount(liveness_reg->mask)) - { - reg->temp_id = id; - reg->allocated_mask = vsir_combine_write_masks(available_mask, liveness_reg->mask); - return true; - } - } - } -} - static void temp_allocator_set_src(struct temp_allocator *allocator, struct vkd3d_shader_src_param *src) { struct temp_allocator_reg *reg; @@ -8713,12 +9534,17 @@ static void temp_allocator_set_src(struct temp_allocator *allocator, struct vkd3
if (src->reg.type == VKD3DSPR_SSA) reg = &allocator->ssa_regs[src->reg.idx[0].offset]; + else if (src->reg.type == VKD3DSPR_TEMP) + reg = &allocator->temp_regs[src->reg.idx[0].offset]; else return;
src->reg.type = VKD3DSPR_TEMP; src->reg.dimension = VSIR_DIMENSION_VEC4; src->reg.idx[0].offset = reg->temp_id; + + if (reg->fixed_mask) + return; src->swizzle = vsir_combine_swizzles(vsir_swizzle_from_writemask(reg->allocated_mask), src->swizzle); }
@@ -8792,6 +9618,7 @@ static void temp_allocator_set_dst(struct temp_allocator *allocator, struct vkd3d_shader_dst_param *dst, const struct vkd3d_shader_instruction *ins) { struct temp_allocator_reg *reg; + uint32_t remapped_mask;
for (unsigned int k = 0; k < dst->reg.idx_count; ++k) { @@ -8801,15 +9628,27 @@ static void temp_allocator_set_dst(struct temp_allocator *allocator,
if (dst->reg.type == VKD3DSPR_SSA) reg = &allocator->ssa_regs[dst->reg.idx[0].offset]; + else if (dst->reg.type == VKD3DSPR_TEMP) + reg = &allocator->temp_regs[dst->reg.idx[0].offset]; else return;
dst->reg.type = VKD3DSPR_TEMP; dst->reg.dimension = VSIR_DIMENSION_VEC4; dst->reg.idx[0].offset = reg->temp_id; - if (reg->allocated_mask != dst->write_mask) + + if (reg->fixed_mask) + { + VKD3D_ASSERT((reg->allocated_mask & dst->write_mask) == dst->write_mask); + return; + } + + remapped_mask = vsir_combine_write_masks(reg->allocated_mask, dst->write_mask); + VKD3D_ASSERT(vkd3d_popcount(remapped_mask) == vkd3d_popcount(dst->write_mask)); + + if (dst->write_mask != remapped_mask) { - dst->write_mask = reg->allocated_mask; + dst->write_mask = remapped_mask;
if (vsir_opcode_is_double(ins->opcode)) { @@ -8825,58 +9664,326 @@ static void temp_allocator_set_dst(struct temp_allocator *allocator, if (vsir_src_is_masked(ins->opcode, i)) { if (src->reg.type == VKD3DSPR_IMMCONST) - vsir_remap_immconst(src, dst->write_mask); + vsir_remap_immconst(src, reg->allocated_mask); else if (src->reg.type == VKD3DSPR_IMMCONST64) - vsir_remap_immconst64(src, dst->write_mask); + vsir_remap_immconst64(src, reg->allocated_mask); else - src->swizzle = vsir_map_swizzle(src->swizzle, dst->write_mask); + src->swizzle = vsir_map_swizzle(src->swizzle, reg->allocated_mask); + } + } + } +} + +static int temp_allocate_compare_open(const void *ptr1, const void *ptr2) +{ + const struct temp_allocator_reg * const *reg1 = ptr1, * const *reg2 = ptr2; + int ret; + + if ((ret = vkd3d_u32_compare((*reg1)->force_first, (*reg2)->force_first))) + return -ret; + if ((ret = vkd3d_u32_compare((*reg1)->liveness_reg->first_write, (*reg2)->liveness_reg->first_write))) + return ret; + if ((ret = vkd3d_u32_compare((*reg1)->liveness_reg->last_access, (*reg2)->liveness_reg->last_access))) + return ret; + return 0; +} + +static int temp_allocate_compare_close(const void *ptr1, const void *ptr2) +{ + const struct temp_allocator_reg * const *reg1 = ptr1, * const *reg2 = ptr2; + int ret; + + if ((ret = vkd3d_u32_compare((*reg1)->liveness_reg->last_access, (*reg2)->liveness_reg->last_access))) + return ret; + return vkd3d_u32_compare((*reg1)->liveness_reg->first_write, (*reg2)->liveness_reg->first_write); +} + +static const char *debug_temp_allocator_reg(const struct temp_allocator_reg *reg) +{ + return vkd3d_dbg_sprintf("%s%u", reg->type == VKD3DSPR_SSA ? "sr" : "r", reg->idx); +} + +static void temp_allocator_open_register(struct temp_allocator *allocator, struct temp_allocator_reg *reg) +{ + const size_t reg_count = allocator->ssa_count + allocator->temp_count; + const struct liveness_tracker_reg *liveness_reg = reg->liveness_reg; + uint8_t *current_allocation = allocator->current_allocation; + size_t i; + + if (!liveness_reg->written) + return; + + for (i = 0; i < reg_count; ++i) + { + const uint8_t available_mask = ~current_allocation[i] & 0xf; + + if (liveness_reg->fixed_mask) + { + if ((available_mask & liveness_reg->mask) == liveness_reg->mask) + { + reg->temp_id = i; + reg->allocated_mask = liveness_reg->mask; + reg->fixed_mask = true; + current_allocation[i] |= reg->allocated_mask; + allocator->new_temp_count = max(allocator->new_temp_count, i + 1); + TRACE("Allocated r%u%s for %s (liveness %u-%u).\n", + reg->temp_id, debug_vsir_writemask(reg->allocated_mask), + debug_temp_allocator_reg(reg), liveness_reg->first_write, liveness_reg->last_access); + break; + } + } + else + { + /* For SSA values the mask is always zero-based and contiguous. + * For TEMP values we assume the register was allocated that way, + * but it may only be partially used. + * We currently only handle cases where the mask is zero-based and + * contiguous, so we need to fill in the missing components to + * ensure this. */ + uint8_t mask = (1u << (vkd3d_log2i(liveness_reg->mask) + 1)) - 1; + + if (vkd3d_popcount(available_mask) >= vkd3d_popcount(mask)) + { + if (mask != liveness_reg->mask) + WARN("Allocating a mask %#x with used components %#x; this is not optimized.\n", + mask, liveness_reg->mask); + + reg->temp_id = i; + reg->allocated_mask = vsir_combine_write_masks(available_mask, mask); + reg->fixed_mask = false; + current_allocation[i] |= reg->allocated_mask; + allocator->new_temp_count = max(allocator->new_temp_count, i + 1); + TRACE("Allocated r%u%s for %s (liveness %u-%u).\n", + reg->temp_id, debug_vsir_writemask(reg->allocated_mask), + debug_temp_allocator_reg(reg), liveness_reg->first_write, liveness_reg->last_access); + break; + } + } + } + + VKD3D_ASSERT(i < reg_count); +} + +static void temp_allocator_close_register(struct temp_allocator *allocator, struct temp_allocator_reg *reg) +{ + const struct liveness_tracker_reg *liveness_reg = reg->liveness_reg; + + if (!liveness_reg->written) + return; + + TRACE("Register %s (liveness %u-%u) reaches end of life.\n", + debug_temp_allocator_reg(reg), liveness_reg->first_write, liveness_reg->last_access); + + allocator->current_allocation[reg->temp_id] &= ~reg->allocated_mask; +} + +/* Compute the allocation map. Each register is modeled as a time interval + * spanning from `first_write' to `last_access'. We simulate scanning through + * all the intervals in time order, keeping the set of currently allocated + * registers as a bit map: each time we open an interval (i.e., hit its + * `first_write' time) we allocate it to the first available register scanning + * the current state; each time we close an interval (i.e., hit its + * `last_access' time) we unset the corresponding bits. + * + * In general at any given time we first process all intervals to be closed and + * then all intervals to be opened at that time. This models the fact that an + * instruction can write to a register which it also reads from, and the write + * won't interfere with the read. In other words, first all reads are + * performed, then the instruction is executed, then the writes are performed. + * + * There is a corner case exception, though: the case of degenerate intervals + * that are opened and closed at the same time. This corresponds to registers + * that are written and then never read, which in principle shouldn't exist + * because they make no sense. However it's better to be robust, and we support + * them anyway. + * + * So that's what we do: + * - First all non-degenerate closes are processed. + * - Then all degenerate opens are processed, because we cannot close them + * before having opened them. + * - Then all non-degenerate opens are processed: this has to happens before + * the degenerate intervals are closed, because they need to be allocated to + * different registers. + * - Then all degenerate closes are processed. + * + * This is effected with a few different strategies: + * - In the open order, registers are primarily sorted by `first_write' and + * secondarily by `last_access'. This way degenerate registers are always + * opened before non-degenerate ones with the same `first_write' time. + * - In the close order, registers are primarily sorted by `last_access' and + * secondarily by `first_write'. This way non-degenerate registers are + * always closed before degenerate ones with the same `last_access' time. + * - There is a scheduling algorithm that decides at each iteration whether to + * open or close a register. See details below. + * + * TODO: the algorithm could be further optimized by keeping a few pointers to + * the first position in `current_allocation' that has at least one (or two, + * three and four) available components, so we don't always have to scan from + * the beginning. + */ +static enum vkd3d_result temp_allocator_compute_allocation_map(struct temp_allocator *allocator, + const struct liveness_tracker *liveness) +{ + const size_t reg_count = allocator->ssa_count + allocator->temp_count; + struct temp_allocator_reg **open_order = NULL, **close_order = NULL; + size_t i, pos_open = 0, pos_close = 0; + + /* In the worst-case scenario each of the `reg_count' registers to be + * processed requires its own allocation. We should never exceed that + * amount. */ + if (!(allocator->current_allocation = vkd3d_calloc(reg_count, sizeof(*allocator->current_allocation))) + || !(open_order = vkd3d_calloc(reg_count, sizeof(*open_order))) + || !(close_order = vkd3d_calloc(reg_count, sizeof(*close_order)))) + { + vkd3d_free(close_order); + vkd3d_free(open_order); + vkd3d_free(allocator->current_allocation); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < reg_count; ++i) + { + struct temp_allocator_reg *reg = &allocator->ssa_regs[i]; + + if (i < allocator->ssa_count) + { + reg->type = VKD3DSPR_SSA; + reg->idx = i; + } + else + { + reg->type = VKD3DSPR_TEMP; + reg->idx = i - allocator->ssa_count; + + /* For SM 1.x ps we need to ensure that r0 is reallocated to itself, + * because it doubles as the output register. To do so we + * artificially make it alive for the whole program and make it + * compare before anything else. */ + if (reg->idx == 0 && allocator->ps_1_x) + { + reg->force_first = true; + liveness->ssa_regs[i].first_write = 0; + liveness->ssa_regs[i].last_access = UINT_MAX; } } + + reg->liveness_reg = &liveness->ssa_regs[i]; + open_order[i] = reg; + close_order[i] = reg; + } + + qsort(open_order, reg_count, sizeof(*open_order), temp_allocate_compare_open); + qsort(close_order, reg_count, sizeof(*open_order), temp_allocate_compare_close); + + for (;;) + { + struct temp_allocator_reg *reg_open = NULL, *reg_close = NULL; + bool do_open; + + if (pos_open < reg_count) + reg_open = open_order[pos_open]; + if (pos_close < reg_count) + reg_close = close_order[pos_close]; + + /* We cannot close all the registers before we finish opening them. */ + VKD3D_ASSERT(!(reg_open && !reg_close)); + + /* We finished closing registers, nothing to do any more. */ + if (!reg_close) + break; + /* There is nothing to open, so we just close. */ + else if (!reg_open) + do_open = false; + /* The next open event happens before the next close event, so we open. */ + else if (reg_open->liveness_reg->first_write < reg_close->liveness_reg->last_access) + do_open = true; + /* The other way around, we close. */ + else if (reg_close->liveness_reg->last_access < reg_open->liveness_reg->first_write) + do_open = false; + /* Ok, now we have both an open and a close happening at the same time. + * According to the strategy above, if the interval to close is + * non-degenerate, then we process it. */ + else if (reg_close->liveness_reg->first_write < reg_close->liveness_reg->last_access) + do_open = false; + /* Otherwise the interval to close is degenerate, and therefore we first + * open whatever needs to be opened. */ + else + do_open = true; + + if (do_open) + { + temp_allocator_open_register(allocator, reg_open); + ++pos_open; + } + else + { + temp_allocator_close_register(allocator, reg_close); + ++pos_close; + } } + + vkd3d_free(close_order); + vkd3d_free(open_order); + vkd3d_free(allocator->current_allocation); + return VKD3D_OK; }
+/* This pass does two things: + * + * - converts SSA registers (sr#) into temp registers (r#); + * + * - contracts temp registers with non-overlapping ranges by reallocating them + * into the same register. + * + * These are done at the same time so that SSA and temp registers with + * non-overlapping liveness can share the same register. + * + * The temp contraction is not particularly sophisticated. In particular, it + * does not detect cases where a single temp register has multiple disjoint + * ranges of liveness, and it also assumes that the components used by a single + * registers is zero-based and contiguous. + * The intent for temp contraction is that HLSL will output each distinct + * variable to a unique temp ID. */ enum vkd3d_result vsir_allocate_temp_registers(struct vsir_program *program, struct vkd3d_shader_message_context *message_context) { - const unsigned int prev_temp_count = program->temp_count; + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); struct temp_allocator allocator = {0}; + struct vkd3d_shader_instruction *ins; struct temp_allocator_reg *regs; struct liveness_tracker tracker; enum vkd3d_result ret;
- if (!program->ssa_count) + if (!program->ssa_count && !program->temp_count) return VKD3D_OK;
if ((ret = track_liveness(program, &tracker))) return ret;
- if (!(regs = vkd3d_calloc(program->ssa_count, sizeof(*regs)))) + if (!(regs = vkd3d_calloc(program->ssa_count + program->temp_count, sizeof(*regs)))) { liveness_tracker_cleanup(&tracker); return VKD3D_ERROR_OUT_OF_MEMORY; } allocator.message_context = message_context; + allocator.ssa_count = program->ssa_count; + allocator.temp_count = program->temp_count; allocator.ssa_regs = regs; + allocator.temp_regs = regs + program->ssa_count; + allocator.new_temp_count = 0;
- for (unsigned int i = 0; i < program->ssa_count; ++i) - { - const struct liveness_tracker_reg *liveness_reg = &tracker.ssa_regs[i]; - struct temp_allocator_reg *reg = &allocator.ssa_regs[i]; + if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && program->shader_version.major < 2) + allocator.ps_1_x = true;
- if (temp_allocator_allocate(&allocator, &tracker, reg, liveness_reg, prev_temp_count)) - { - TRACE("Allocated r%u%s to sr%u (liveness %u-%u).\n", - reg->temp_id, debug_vsir_writemask(reg->allocated_mask), i, - liveness_reg->first_write, liveness_reg->last_access); - program->temp_count = max(program->temp_count, reg->temp_id + 1); - } - ++allocator.allocated_ssa_count; + if ((ret = temp_allocator_compute_allocation_map(&allocator, &tracker)) < 0) + { + liveness_tracker_cleanup(&tracker); + vkd3d_free(regs); + return ret; }
- for (unsigned int i = 0; i < program->instructions.count; ++i) + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) { - const struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - /* Make sure we do the srcs first; setting the dst writemask may need * to remap their swizzles. */ for (unsigned int j = 0; j < ins->src_count; ++j) @@ -8886,9 +9993,11 @@ enum vkd3d_result vsir_allocate_temp_registers(struct vsir_program *program, }
program->ssa_count = 0; + program->temp_count = allocator.new_temp_count;
vkd3d_free(regs); liveness_tracker_cleanup(&tracker); + return allocator.result; }
@@ -8902,11 +10011,17 @@ enum vkd3d_result vsir_allocate_temp_registers(struct vsir_program *program, enum vkd3d_result vsir_update_dcl_temps(struct vsir_program *program, struct vkd3d_shader_message_context *message_context) { + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vkd3d_shader_location location; + struct vkd3d_shader_instruction *ins; unsigned int temp_count = 0;
- for (int i = program->instructions.count - 1; i >= 0; --i) + if (program->shader_version.major < 4) + return VKD3D_OK; + + for (ins = vsir_program_iterator_tail(&it); ins; ins = vsir_program_iterator_prev(&it)) { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + location = ins->location;
if (ins->opcode == VSIR_OP_DCL_TEMPS) { @@ -8915,18 +10030,17 @@ enum vkd3d_result vsir_update_dcl_temps(struct vsir_program *program, continue; }
- if (temp_count && program->shader_version.major >= 4 - && (ins->opcode == VSIR_OP_HS_CONTROL_POINT_PHASE - || ins->opcode == VSIR_OP_HS_FORK_PHASE - || ins->opcode == VSIR_OP_HS_JOIN_PHASE)) + if (temp_count && (ins->opcode == VSIR_OP_HS_CONTROL_POINT_PHASE + || ins->opcode == VSIR_OP_HS_FORK_PHASE + || ins->opcode == VSIR_OP_HS_JOIN_PHASE)) { /* The phase didn't have a dcl_temps instruction, but we added * temps here, so we need to insert one. */ - if (!shader_instruction_array_insert_at(&program->instructions, i + 1, 1)) + if (!vsir_program_iterator_insert_after(&it, 1)) return VKD3D_ERROR_OUT_OF_MEMORY;
- ins = &program->instructions.elements[i + 1]; - vsir_instruction_init(ins, &program->instructions.elements[i].location, VSIR_OP_DCL_TEMPS); + ins = vsir_program_iterator_next(&it); + vsir_instruction_init(ins, &location, VSIR_OP_DCL_TEMPS); ins->declaration.count = temp_count; temp_count = 0; continue; @@ -8945,15 +10059,15 @@ enum vkd3d_result vsir_update_dcl_temps(struct vsir_program *program, } }
- if (temp_count && program->shader_version.major >= 4) + if (temp_count) { - struct vkd3d_shader_instruction *ins; + ins = vsir_program_iterator_head(&it); + location = ins->location;
- if (!shader_instruction_array_insert_at(&program->instructions, 0, 1)) + if (!(ins = vsir_program_iterator_insert_before_and_move(&it, 1))) return VKD3D_ERROR_OUT_OF_MEMORY;
- ins = &program->instructions.elements[0]; - vsir_instruction_init(ins, &program->instructions.elements[1].location, VSIR_OP_DCL_TEMPS); + vsir_instruction_init(ins, &location, VSIR_OP_DCL_TEMPS); ins->declaration.count = temp_count; }
@@ -8965,7 +10079,7 @@ struct validation_context struct vkd3d_shader_message_context *message_context; const struct vsir_program *program; size_t instruction_idx; - struct vkd3d_shader_location null_location; + struct vkd3d_shader_location location; bool invalid_instruction_idx; enum vkd3d_result status; bool dcl_temps_found; @@ -9024,13 +10138,12 @@ static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *c
if (ctx->invalid_instruction_idx) { - vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); + vkd3d_shader_error(ctx->message_context, &ctx->location, error, "%s", buf.buffer); WARN("VSIR validation error: %s\n", buf.buffer); } else { - const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; - vkd3d_shader_error(ctx->message_context, &ins->location, error, + vkd3d_shader_error(ctx->message_context, &ctx->location, error, "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); WARN("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); } @@ -9359,6 +10472,37 @@ static void vsir_validate_io_register(struct validation_context *ctx, const stru reg->idx[control_point_index].offset, control_point_count, reg->type); }
+static void vsir_validate_texture_register(struct validation_context *ctx, const struct vkd3d_shader_register *reg) +{ + const struct vkd3d_shader_version *version = &ctx->program->shader_version; + uint32_t idx; + + if (version->type != VKD3D_SHADER_TYPE_PIXEL) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "TEXTURE registers cannot be used in shader type %#x.", version->type); + + if (reg->idx_count != 1) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u for a TEXTURE register.", reg->idx_count); + return; + } + + if (reg->idx[0].rel_addr) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Non-NULL relative address for a TEXTURE register."); + + if (version->major >= 3) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "TEXTURE registers cannot be used in version %u.%u.", version->major, version->minor); + + idx = reg->idx[0].offset; + if (idx >= 8 || (vkd3d_shader_ver_le(version, 1, 4) && idx >= 6) + || (vkd3d_shader_ver_le(version, 1, 3) && idx >= 4)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Register t%u exceeds limits for version %u.%u.", idx, version->major, version->minor); +} + static void vsir_validate_temp_register(struct validation_context *ctx, const struct vkd3d_shader_register *reg) { @@ -9459,7 +10603,8 @@ static void vsir_validate_label_register(struct validation_context *ctx,
if (reg->data_type != VSIR_DATA_UNUSED) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid data type %#x for a LABEL register.", reg->data_type); + "Invalid data type "%s" (%#x) for a LABEL register.", + vsir_data_type_get_name(reg->data_type, "<unknown>"), reg->data_type);
if (reg->dimension != VSIR_DIMENSION_NONE) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, @@ -9538,7 +10683,8 @@ static void vsir_validate_sampler_register(struct validation_context *ctx,
if (reg->data_type != VSIR_DATA_UNUSED) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid data type %#x for a SAMPLER register.", reg->data_type); + "Invalid data type "%s" (%#x) for a SAMPLER register.", + vsir_data_type_get_name(reg->data_type, "<unknown>"), reg->data_type);
/* VEC4 is allowed in gather operations. */ if (reg->dimension == VSIR_DIMENSION_SCALAR) @@ -9564,7 +10710,8 @@ static void vsir_validate_resource_register(struct validation_context *ctx,
if (reg->data_type != VSIR_DATA_UNUSED) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid data type %#x for a RESOURCE register.", reg->data_type); + "Invalid data type "%s" (%#x) for a RESOURCE register.", + vsir_data_type_get_name(reg->data_type, "<unknown>"), reg->data_type);
if (reg->dimension != VSIR_DIMENSION_VEC4) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, @@ -9590,8 +10737,8 @@ static void vsir_validate_uav_register(struct validation_context *ctx,
if (reg->data_type != VSIR_DATA_UNUSED) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid data type %#x for a UAV register.", - reg->data_type); + "Invalid data type "%s" (%#x) for a UAV register.", + vsir_data_type_get_name(reg->data_type, "<unknown>"), reg->data_type);
/* NONE is allowed in counter operations. */ if (reg->dimension == VSIR_DIMENSION_SCALAR) @@ -9663,9 +10810,10 @@ static void vsir_validate_ssa_register(struct validation_context *ctx,
if (data_type_is_64_bit(data->data_type) != data_type_is_64_bit(reg->data_type)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid data type %#x for a SSA register: " - "it has already been seen with data type %#x at instruction %zu.", - reg->data_type, data->data_type, data->first_seen); + "Invalid data type "%s" (%#x) for SSA register %u: " + "it has already been seen with data type "%s" (%#x) at instruction %zu.", + vsir_data_type_get_name(reg->data_type, "<unknown>"), reg->data_type, reg->idx[0].offset, + vsir_data_type_get_name(data->data_type, "<unknown>"), data->data_type, data->first_seen); } }
@@ -9824,6 +10972,10 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_register_without_indices(ctx, reg); break;
+ case VKD3DSPR_TEXTURE: + vsir_validate_texture_register(ctx, reg); + break; + case VKD3DSPR_SSA: vsir_validate_ssa_register(ctx, reg); break; @@ -9863,16 +11015,42 @@ static void vsir_validate_io_dst_param(struct validation_context *ctx, const struct vkd3d_shader_dst_param *dst) { struct vsir_io_register_data io_reg_data; + const struct signature_element *e; + unsigned int idx;
if (!vsir_get_io_register_data(ctx, dst->reg.type, &io_reg_data) || !(io_reg_data.flags & OUTPUT_BIT)) + { validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid register type %#x used as destination parameter.", dst->reg.type); -} - -static void vsir_validate_dst_param(struct validation_context *ctx, - const struct vkd3d_shader_dst_param *dst) -{ - vsir_validate_register(ctx, &dst->reg); + return; + } + + if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6) + { + if (!dst->reg.idx_count) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid index count %u for a register of type %#x.", + dst->reg.idx_count, dst->reg.type); + return; + } + + idx = dst->reg.idx[dst->reg.idx_count - 1].offset; + e = &io_reg_data.signature->elements[idx]; + + if (dst->write_mask & ~e->mask) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid destination write mask %#x for signature element with mask %#x.", + dst->write_mask, e->mask); + } +} + +static void vsir_validate_dst_param(struct validation_context *ctx, + const struct vkd3d_shader_dst_param *dst) +{ + const struct vkd3d_shader_version *version = &ctx->program->shader_version; + + vsir_validate_register(ctx, &dst->reg);
if (dst->write_mask & ~VKD3DSP_WRITEMASK_ALL) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination has invalid write mask %#x.", @@ -9898,7 +11076,7 @@ static void vsir_validate_dst_param(struct validation_context *ctx, break; }
- if (dst->modifiers & ~VKD3DSPDM_MASK) + if (dst->modifiers & ~VKD3DSPDM_MASK || (ctx->program->has_no_modifiers && dst->modifiers)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Destination has invalid modifiers %#x.", dst->modifiers);
@@ -9913,7 +11091,8 @@ static void vsir_validate_dst_param(struct validation_context *ctx,
default: validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid data type %#x for destination with saturate modifier.", dst->reg.data_type); + "Invalid data type "%s" (%#x) for destination with saturate modifier.", + vsir_data_type_get_name(dst->reg.data_type, "<unknown>"), dst->reg.data_type); break;
} @@ -9932,7 +11111,8 @@ static void vsir_validate_dst_param(struct validation_context *ctx, case 15: if (dst->reg.data_type != VSIR_DATA_F32) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid data type %#x for destination with shift.", dst->reg.data_type); + "Invalid data type "%s" (%#x) for destination with shift.", + vsir_data_type_get_name(dst->reg.data_type, "<unknown>"), dst->reg.data_type); break;
default: @@ -9999,6 +11179,11 @@ static void vsir_validate_dst_param(struct validation_context *ctx, vsir_validate_io_dst_param(ctx, dst); break;
+ case VKD3DSPR_TEXTURE: + if (vkd3d_shader_ver_ge(version, 1, 4)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Texture registers cannot be written in version %u.%u.", version->major, version->minor); + default: break; } @@ -10062,7 +11247,17 @@ static void vsir_validate_src_param(struct validation_context *ctx, validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source of dimension %u has invalid swizzle %#x.", src->reg.dimension, src->swizzle);
- if (src->modifiers >= VKD3DSPSM_COUNT) + if (src->reg.dimension == VSIR_DIMENSION_VEC4 && src->reg.type == VKD3DSPR_IMMCONST + && src->swizzle != VKD3D_SHADER_NO_SWIZZLE) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, + "Immediate constant source has invalid swizzle %#x.", src->swizzle); + + if (src->reg.dimension == VSIR_DIMENSION_VEC4 && src->reg.type == VKD3DSPR_IMMCONST64 + && src->swizzle != VKD3D_SHADER_SWIZZLE(X, Y, X, X)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, + "Immediate constant source has invalid swizzle %#x.", src->swizzle); + + if (src->modifiers >= VKD3DSPSM_COUNT || (ctx->program->has_no_modifiers && src->modifiers)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.", src->modifiers);
@@ -10070,7 +11265,8 @@ static void vsir_validate_src_param(struct validation_context *ctx, { if (!(src_modifier_data[src->modifiers].data_type_mask & (1u << src->reg.data_type))) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, - "Source has invalid modifier %#x for data type %u.", src->modifiers, src->reg.data_type); + "Source has invalid modifier %#x for data type "%s" (%#x).", + src->modifiers, vsir_data_type_get_name(src->reg.data_type, "<unknown>"), src->reg.data_type); }
switch (src->reg.type) @@ -10722,7 +11918,7 @@ static void vsir_validate_hull_shader_phase(struct validation_context *ctx, static void vsir_validate_elementwise_operation(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction, const bool types[VSIR_DATA_TYPE_COUNT]) { - enum vsir_data_type dst_data_type; + enum vsir_data_type dst_data_type, src_data_type; unsigned int i;
if (instruction->dst_count < 1) @@ -10735,16 +11931,18 @@ static void vsir_validate_elementwise_operation(struct validation_context *ctx,
if (!types[dst_data_type]) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid data type %#x for elementwise operation "%s" (%#x).", - dst_data_type, vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode); + "Invalid data type "%s" (%#x) for elementwise operation "%s" (%#x).", + vsir_data_type_get_name(dst_data_type, "<unknown>"), dst_data_type, + vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode);
for (i = 0; i < instruction->src_count; ++i) { - if (instruction->src[i].reg.data_type != dst_data_type) + if ((src_data_type = instruction->src[i].reg.data_type) != dst_data_type) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Data type %#x for operand %u doesn't match the destination data type %#x " + "Data type "%s" (%#x) for operand %u doesn't match the destination data type "%s" (%#x) " "for elementwise operation "%s" (%#x).", - instruction->src[i].reg.data_type, i, dst_data_type, + vsir_data_type_get_name(src_data_type, "<unknown>"), src_data_type, i, + vsir_data_type_get_name(dst_data_type, "<unknown>"), dst_data_type, vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode); } } @@ -10771,12 +11969,25 @@ static void vsir_validate_float_elementwise_operation(struct validation_context vsir_validate_elementwise_operation(ctx, instruction, types); }
+static void vsir_validate_float_or_double_elementwise_operation(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + static const bool types[VSIR_DATA_TYPE_COUNT] = + { + [VSIR_DATA_F32] = true, + [VSIR_DATA_F64] = true, + }; + + vsir_validate_elementwise_operation(ctx, instruction, types); +} + static void vsir_validate_integer_elementwise_operation(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { static const bool types[VSIR_DATA_TYPE_COUNT] = { [VSIR_DATA_I32] = true, + [VSIR_DATA_I64] = true, [VSIR_DATA_U32] = true, [VSIR_DATA_U64] = true, }; @@ -10784,6 +11995,18 @@ static void vsir_validate_integer_elementwise_operation(struct validation_contex vsir_validate_elementwise_operation(ctx, instruction, types); }
+static void vsir_validate_signed_integer_elementwise_operation(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + static const bool types[VSIR_DATA_TYPE_COUNT] = + { + [VSIR_DATA_I32] = true, + [VSIR_DATA_I64] = true, + }; + + vsir_validate_elementwise_operation(ctx, instruction, types); +} + static void vsir_validate_logic_elementwise_operation(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { @@ -10801,7 +12024,7 @@ static void vsir_validate_logic_elementwise_operation(struct validation_context static void vsir_validate_comparison_operation(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction, const bool types[VSIR_DATA_TYPE_COUNT]) { - enum vsir_data_type dst_data_type, src_data_type; + enum vsir_data_type dst_data_type, src_data_type, data_type; unsigned int i;
if (instruction->dst_count < 1) @@ -10811,8 +12034,9 @@ static void vsir_validate_comparison_operation(struct validation_context *ctx,
if (dst_data_type != VSIR_DATA_U32 && dst_data_type != VSIR_DATA_BOOL) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid data type %#x for result of comparison operation "%s" (%#x).", - dst_data_type, vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode); + "Invalid data type "%s" (%#x) for result of comparison operation "%s" (%#x).", + vsir_data_type_get_name(dst_data_type, "<unknown>"), dst_data_type, + vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode);
if (instruction->src_count == 0) return; @@ -10824,16 +12048,18 @@ static void vsir_validate_comparison_operation(struct validation_context *ctx,
if (!types[src_data_type]) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid data type %#x for comparison operation "%s" (%#x).", - src_data_type, vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode); + "Invalid data type "%s" (%#x) for comparison operation "%s" (%#x).", + vsir_data_type_get_name(src_data_type, "<unknown>"), src_data_type, + vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode);
for (i = 1; i < instruction->src_count; ++i) { - if (instruction->src[i].reg.data_type != src_data_type) + if ((data_type = instruction->src[i].reg.data_type) != src_data_type) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Data type %#x for operand %u doesn't match the first operands data type %#x " - "for comparison operation "%s" (%#x).", - instruction->src[i].reg.data_type, i, src_data_type, + "Data type "%s" (%#x) for operand %u doesn't match the first " + "operands data type "%s" (%#x) for comparison operation "%s" (%#x).", + vsir_data_type_get_name(data_type, "<unknown>"), data_type, i, + vsir_data_type_get_name(src_data_type, "<unknown>"), src_data_type, vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode); } } @@ -10874,6 +12100,18 @@ static void vsir_validate_integer_comparison_operation(struct validation_context vsir_validate_comparison_operation(ctx, instruction, types); }
+static void vsir_validate_signed_integer_comparison_operation(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + static const bool types[VSIR_DATA_TYPE_COUNT] = + { + [VSIR_DATA_I32] = true, + [VSIR_DATA_I64] = true, + }; + + vsir_validate_comparison_operation(ctx, instruction, types); +} + static void vsir_validate_cast_operation(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction, const bool src_types[VSIR_DATA_TYPE_COUNT], const bool dst_types[VSIR_DATA_TYPE_COUNT]) @@ -10891,45 +12129,72 @@ static void vsir_validate_cast_operation(struct validation_context *ctx,
if (!src_types[src_data_type]) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid source data type %#x for cast operation "%s" (%#x).", - src_data_type, vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode); + "Invalid source data type "%s" (%#x) for cast operation "%s" (%#x).", + vsir_data_type_get_name(src_data_type, "<unknown>"), src_data_type, + vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode);
if (!dst_types[dst_data_type]) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid destination data type %#x for cast operation "%s" (%#x).", - dst_data_type, vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode); + "Invalid destination data type "%s" (%#x) for cast operation "%s" (%#x).", + vsir_data_type_get_name(dst_data_type, "<unknown>"), dst_data_type, + vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode); }
static void vsir_validate_shift_operation(struct validation_context *ctx, - const struct vkd3d_shader_instruction *instruction) + const struct vkd3d_shader_instruction *instruction, const bool types[VSIR_DATA_TYPE_COUNT]) { - enum vsir_data_type data_type; + enum vsir_data_type dst_data_type, src_data_type;
- static const bool types[] = + static const bool shift_types[] = { [VSIR_DATA_I32] = true, + [VSIR_DATA_I64] = true, [VSIR_DATA_U32] = true, [VSIR_DATA_U64] = true, };
- data_type = instruction->dst[0].reg.data_type; - if ((size_t)data_type >= ARRAY_SIZE(types) || !types[data_type]) + dst_data_type = instruction->dst[0].reg.data_type; + if ((size_t)dst_data_type >= VSIR_DATA_TYPE_COUNT || !types[dst_data_type]) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid destination data type %#x for shift operation "%s" (%#x).", - data_type, vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode); + "Invalid destination data type "%s" (%#x) for shift operation "%s" (%#x).", + vsir_data_type_get_name(dst_data_type, "<unknown>"), dst_data_type, + vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode);
- if (instruction->src[0].reg.data_type != data_type) + if ((src_data_type = instruction->src[0].reg.data_type) != dst_data_type) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Data type %#x for source operand 0 doesn't match destination data type %#x " + "Data type "%s" (%#x) for source operand 0 doesn't match destination data type "%s" (%#x) " "for shift operation "%s" (%#x).", - instruction->src[0].reg.data_type, data_type, + vsir_data_type_get_name(src_data_type, "<unknown>"), src_data_type, + vsir_data_type_get_name(dst_data_type, "<unknown>"), dst_data_type, vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode);
- data_type = instruction->src[1].reg.data_type; - if ((size_t)data_type >= ARRAY_SIZE(types) || !types[data_type]) + src_data_type = instruction->src[1].reg.data_type; + if ((size_t)src_data_type >= ARRAY_SIZE(shift_types) || !shift_types[src_data_type]) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid source operand 1 data type %#x for shift operation "%s" (%#x).", - data_type, vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode); + "Invalid source operand 1 data type "%s" (%#x) for shift operation "%s" (%#x).", + vsir_data_type_get_name(src_data_type, "<unknown>"), src_data_type, + vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode); +} + +static void vsir_validate_bem(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_version *version = &ctx->program->shader_version; + + if (version->type != VKD3D_SHADER_TYPE_PIXEL) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "BEM cannot be used in shader type %#x.", version->type); + + if (version->major != 1 || version->minor != 4) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_OPCODE, + "BEM cannot be used in version %u.%u.", version->major, version->minor); + + if (instruction->dst[0].write_mask != 0x3) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Invalid BEM write mask %#x.", instruction->dst[0].write_mask); + + /* Strictly not an elementwise operation, but we expect all the arguments + * to be float. */ + vsir_validate_float_elementwise_operation(ctx, instruction); }
static void vsir_validate_branch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) @@ -11490,14 +12755,39 @@ static void vsir_validate_ifc(struct validation_context *ctx, const struct vkd3d vsir_validator_push_block(ctx, VSIR_OP_IF); }
+static void vsir_validate_ishl(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + static const bool types[VSIR_DATA_TYPE_COUNT] = + { + [VSIR_DATA_I32] = true, + [VSIR_DATA_I64] = true, + [VSIR_DATA_U32] = true, + [VSIR_DATA_U64] = true, + }; + + vsir_validate_shift_operation(ctx, instruction, types); +} + +static void vsir_validate_ishr(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + static const bool types[VSIR_DATA_TYPE_COUNT] = + { + [VSIR_DATA_I32] = true, + [VSIR_DATA_I64] = true, + }; + + vsir_validate_shift_operation(ctx, instruction, types); +} + static void vsir_validate_itof(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { static const bool src_types[VSIR_DATA_TYPE_COUNT] = { [VSIR_DATA_BOOL] = true, [VSIR_DATA_I32] = true, - [VSIR_DATA_U32] = true, - [VSIR_DATA_U64] = true, + [VSIR_DATA_I64] = true, }; static const bool dst_types[VSIR_DATA_TYPE_COUNT] = { @@ -11624,7 +12914,8 @@ static void vsir_validate_throw_invalid_dst_type_error_with_flags(struct validat enum vsir_data_type dst_data_type = instruction->dst[0].reg.data_type;
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid destination data type %#x for operation "%s" (%#x) with flags %#x.", dst_data_type, + "Invalid destination data type "%s" (%#x) for operation "%s" (%#x) with flags %#x.", + vsir_data_type_get_name(dst_data_type, "<unknown>"), dst_data_type, vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode, instruction->flags); }
@@ -11721,6 +13012,18 @@ static void vsir_validate_switch_monolithic(struct validation_context *ctx, ctx->inside_block = false; }
+static void vsir_validate_ushr(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + static const bool types[VSIR_DATA_TYPE_COUNT] = + { + [VSIR_DATA_U32] = true, + [VSIR_DATA_U64] = true, + }; + + vsir_validate_shift_operation(ctx, instruction, types); +} + struct vsir_validator_instruction_desc { unsigned int dst_param_count; @@ -11730,12 +13033,13 @@ struct vsir_validator_instruction_desc
static const struct vsir_validator_instruction_desc vsir_validator_instructions[] = { - [VSIR_OP_ABS] = {1, 1, vsir_validate_float_elementwise_operation}, + [VSIR_OP_ABS] = {1, 1, vsir_validate_float_or_double_elementwise_operation}, [VSIR_OP_ACOS] = {1, 1, vsir_validate_float_elementwise_operation}, [VSIR_OP_ADD] = {1, 2, vsir_validate_float_elementwise_operation}, [VSIR_OP_AND] = {1, 2, vsir_validate_logic_elementwise_operation}, [VSIR_OP_ASIN] = {1, 1, vsir_validate_float_elementwise_operation}, [VSIR_OP_ATAN] = {1, 1, vsir_validate_float_elementwise_operation}, + [VSIR_OP_BEM] = {1, 2, vsir_validate_bem}, [VSIR_OP_BRANCH] = {0, ~0u, vsir_validate_branch}, [VSIR_OP_DADD] = {1, 2, vsir_validate_double_elementwise_operation}, [VSIR_OP_DCL_GS_INSTANCES] = {0, 0, vsir_validate_dcl_gs_instances}, @@ -11797,20 +13101,21 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ [VSIR_OP_HS_JOIN_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, [VSIR_OP_HTAN] = {1, 1, vsir_validate_float_elementwise_operation}, [VSIR_OP_IADD] = {1, 2, vsir_validate_integer_elementwise_operation}, + [VSIR_OP_IDIV] = {1, 2, vsir_validate_signed_integer_elementwise_operation}, [VSIR_OP_IEQ] = {1, 2, vsir_validate_integer_comparison_operation}, [VSIR_OP_IF] = {0, 1, vsir_validate_if}, [VSIR_OP_IFC] = {0, 2, vsir_validate_ifc}, - [VSIR_OP_IGE] = {1, 2, vsir_validate_integer_comparison_operation}, - [VSIR_OP_ILT] = {1, 2, vsir_validate_integer_comparison_operation}, + [VSIR_OP_IGE] = {1, 2, vsir_validate_signed_integer_comparison_operation}, + [VSIR_OP_ILT] = {1, 2, vsir_validate_signed_integer_comparison_operation}, [VSIR_OP_IMAD] = {1, 3, vsir_validate_integer_elementwise_operation}, - [VSIR_OP_IMAX] = {1, 2, vsir_validate_integer_elementwise_operation}, - [VSIR_OP_IMIN] = {1, 2, vsir_validate_integer_elementwise_operation}, + [VSIR_OP_IMAX] = {1, 2, vsir_validate_signed_integer_elementwise_operation}, + [VSIR_OP_IMIN] = {1, 2, vsir_validate_signed_integer_elementwise_operation}, [VSIR_OP_INE] = {1, 2, vsir_validate_integer_comparison_operation}, [VSIR_OP_INEG] = {1, 1, vsir_validate_integer_elementwise_operation}, - [VSIR_OP_IREM] = {1, 2, vsir_validate_integer_elementwise_operation}, + [VSIR_OP_IREM] = {1, 2, vsir_validate_signed_integer_elementwise_operation}, [VSIR_OP_ISFINITE] = {1, 1, vsir_validate_float_comparison_operation}, - [VSIR_OP_ISHL] = {1, 2, vsir_validate_shift_operation}, - [VSIR_OP_ISHR] = {1, 2, vsir_validate_shift_operation}, + [VSIR_OP_ISHL] = {1, 2, vsir_validate_ishl}, + [VSIR_OP_ISHR] = {1, 2, vsir_validate_ishr}, [VSIR_OP_ISINF] = {1, 1, vsir_validate_float_comparison_operation}, [VSIR_OP_ISNAN] = {1, 1, vsir_validate_float_comparison_operation}, [VSIR_OP_ITOF] = {1, 1, vsir_validate_itof}, @@ -11824,6 +13129,7 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ [VSIR_OP_MAX] = {1, 2, vsir_validate_float_elementwise_operation}, [VSIR_OP_MIN] = {1, 2, vsir_validate_float_elementwise_operation}, [VSIR_OP_MUL] = {1, 2, vsir_validate_float_elementwise_operation}, + [VSIR_OP_NEG] = {1, 1, vsir_validate_float_or_double_elementwise_operation}, [VSIR_OP_NEO] = {1, 2, vsir_validate_float_comparison_operation}, [VSIR_OP_NEU] = {1, 2, vsir_validate_float_comparison_operation}, [VSIR_OP_NOP] = {0, 0, vsir_validate_nop}, @@ -11838,18 +13144,18 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ [VSIR_OP_ROUND_NE] = {1, 1, vsir_validate_float_elementwise_operation}, [VSIR_OP_ROUND_NI] = {1, 1, vsir_validate_float_elementwise_operation}, [VSIR_OP_SAMPLE_INFO] = {1, 1, vsir_validate_sample_info}, + [VSIR_OP_SATURATE] = {1, 1, vsir_validate_float_or_double_elementwise_operation}, [VSIR_OP_SWITCH] = {0, 1, vsir_validate_switch}, [VSIR_OP_SWITCH_MONOLITHIC] = {0, ~0u, vsir_validate_switch_monolithic}, + [VSIR_OP_USHR] = {1, 2, vsir_validate_ushr}, };
-static void vsir_validate_instruction(struct validation_context *ctx) +static void vsir_validate_instruction(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_version *version = &ctx->program->shader_version; - const struct vkd3d_shader_instruction *instruction; size_t i;
- instruction = &ctx->program->instructions.elements[ctx->instruction_idx]; - for (i = 0; i < instruction->dst_count; ++i) vsir_validate_dst_param(ctx, &instruction->dst[i]);
@@ -11927,7 +13233,7 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c { .message_context = message_context, .program = program, - .null_location = {.source_name = source_name}, + .location = {.source_name = source_name}, .status = VKD3D_OK, .phase = VSIR_OP_INVALID, .invalid_instruction_idx = true, @@ -11938,6 +13244,8 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c .inner_tess_idxs[0] = ~0u, .inner_tess_idxs[1] = ~0u, }; + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vkd3d_shader_instruction *ins; unsigned int i;
if (!(config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) @@ -12046,11 +13354,17 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c
ctx.invalid_instruction_idx = false;
- for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count - && ctx.status != VKD3D_ERROR_OUT_OF_MEMORY; ++ctx.instruction_idx) - vsir_validate_instruction(&ctx); + ctx.instruction_idx = 0; + for (ins = vsir_program_iterator_head(&it); ins && ctx.status != VKD3D_ERROR_OUT_OF_MEMORY; + ins = vsir_program_iterator_next(&it)) + { + ctx.location = ins->location; + vsir_validate_instruction(&ctx, ins); + ++ctx.instruction_idx; + }
ctx.invalid_instruction_idx = true; + ctx.location = (struct vkd3d_shader_location){.source_name = source_name};
if (ctx.depth != 0) validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "%zu nested blocks were not closed.", ctx.depth); @@ -12104,71 +13418,706 @@ static void vsir_transform_( } }
-/* Transformations which should happen at parse time, i.e. before scan - * information is returned to the user. - * - * In particular, some passes need to modify the signature, and - * vkd3d_shader_scan() should report the modified signature for the given - * target. */ -enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) -{ - struct vsir_transformation_context ctx = - { - .result = VKD3D_OK, - .program = program, - .config_flags = config_flags, - .compile_info = compile_info, - .message_context = message_context, - }; - - /* For vsir_program_ensure_diffuse(). */ - if (program->shader_version.major <= 2) - vsir_transform(&ctx, vsir_program_add_diffuse_output); - - /* For vsir_program_insert_fragment_fog(). */ - vsir_transform(&ctx, vsir_program_add_fog_input); - - /* For vsir_program_insert_vertex_fog(). */ - vsir_transform(&ctx, vsir_program_add_fog_output); - - return ctx.result; -} - -enum vkd3d_result vsir_program_lower_d3dbc(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) -{ - struct vsir_transformation_context ctx = - { - .result = VKD3D_OK, - .program = program, - .config_flags = config_flags, - .compile_info = compile_info, - .message_context = message_context, - }; - - vsir_transform(&ctx, vsir_program_lower_d3dbc_instructions); - if (program->shader_version.major == 1 && program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL) - vsir_transform(&ctx, vsir_program_normalise_ps1_output); - - if (TRACE_ON()) - vsir_program_trace(program); - - return ctx.result; -} - -enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +static bool vsir_instruction_has_side_effects(const struct vkd3d_shader_instruction *ins) { - struct vsir_transformation_context ctx = + switch (ins->opcode) { - .result = VKD3D_OK, - .program = program, - .config_flags = config_flags, - .compile_info = compile_info, - .message_context = message_context, - }; + case VSIR_OP_ABS: + case VSIR_OP_ACOS: + case VSIR_OP_ADD: + case VSIR_OP_AND: + case VSIR_OP_ASIN: + case VSIR_OP_ATAN: + case VSIR_OP_BEM: + case VSIR_OP_BFI: + case VSIR_OP_BFREV: + case VSIR_OP_BUFINFO: + case VSIR_OP_CHECK_ACCESS_FULLY_MAPPED: + case VSIR_OP_CMP: + case VSIR_OP_CND: + case VSIR_OP_COS: + case VSIR_OP_COUNTBITS: + case VSIR_OP_CRS: + case VSIR_OP_DADD: + case VSIR_OP_DDIV: + case VSIR_OP_DEF: + case VSIR_OP_DEFB: + case VSIR_OP_DEFI: + case VSIR_OP_DEQO: + case VSIR_OP_DFMA: + case VSIR_OP_DGEO: + case VSIR_OP_DIV: + case VSIR_OP_DLT: + case VSIR_OP_DMAX: + case VSIR_OP_DMIN: + case VSIR_OP_DMOV: + case VSIR_OP_DMOVC: + case VSIR_OP_DMUL: + case VSIR_OP_DNE: + case VSIR_OP_DP2: + case VSIR_OP_DP2ADD: + case VSIR_OP_DP3: + case VSIR_OP_DP4: + case VSIR_OP_DRCP: + case VSIR_OP_DST: + case VSIR_OP_DSX: + case VSIR_OP_DSX_COARSE: + case VSIR_OP_DSX_FINE: + case VSIR_OP_DSY: + case VSIR_OP_DSY_COARSE: + case VSIR_OP_DSY_FINE: + case VSIR_OP_DTOF: + case VSIR_OP_DTOI: + case VSIR_OP_DTOU: + case VSIR_OP_EQO: + case VSIR_OP_EQU: + case VSIR_OP_EVAL_CENTROID: + case VSIR_OP_EVAL_SAMPLE_INDEX: + case VSIR_OP_EXP: + case VSIR_OP_EXPP: + case VSIR_OP_F16TOF32: + case VSIR_OP_F32TOF16: + case VSIR_OP_FIRSTBIT_HI: + case VSIR_OP_FIRSTBIT_LO: + case VSIR_OP_FIRSTBIT_SHI: + case VSIR_OP_FRC: + case VSIR_OP_FREM: + case VSIR_OP_FTOD: + case VSIR_OP_FTOI: + case VSIR_OP_FTOU: + case VSIR_OP_GATHER4: + case VSIR_OP_GATHER4_C: + case VSIR_OP_GATHER4_C_S: + case VSIR_OP_GATHER4_PO: + case VSIR_OP_GATHER4_PO_C: + case VSIR_OP_GATHER4_PO_C_S: + case VSIR_OP_GATHER4_PO_S: + case VSIR_OP_GATHER4_S: + case VSIR_OP_GEO: + case VSIR_OP_GEU: + case VSIR_OP_HCOS: + case VSIR_OP_HSIN: + case VSIR_OP_HTAN: + case VSIR_OP_IADD: + case VSIR_OP_IBFE: + case VSIR_OP_IDIV: + case VSIR_OP_IEQ: + case VSIR_OP_IGE: + case VSIR_OP_ILT: + case VSIR_OP_IMAD: + case VSIR_OP_IMAX: + case VSIR_OP_IMIN: + case VSIR_OP_IMUL: + case VSIR_OP_IMUL_LOW: + case VSIR_OP_INE: + case VSIR_OP_INEG: + case VSIR_OP_IREM: + case VSIR_OP_ISFINITE: + case VSIR_OP_ISHL: + case VSIR_OP_ISHR: + case VSIR_OP_ISINF: + case VSIR_OP_ISNAN: + case VSIR_OP_ITOD: + case VSIR_OP_ITOF: + case VSIR_OP_ITOI: + case VSIR_OP_LD: + case VSIR_OP_LD2DMS: + case VSIR_OP_LD2DMS_S: + case VSIR_OP_LD_RAW: + case VSIR_OP_LD_RAW_S: + case VSIR_OP_LD_S: + case VSIR_OP_LD_STRUCTURED: + case VSIR_OP_LD_STRUCTURED_S: + case VSIR_OP_LD_UAV_TYPED: + case VSIR_OP_LD_UAV_TYPED_S: + case VSIR_OP_LIT: + case VSIR_OP_LOD: + case VSIR_OP_LOG: + case VSIR_OP_LOGP: + case VSIR_OP_LRP: + case VSIR_OP_LTO: + case VSIR_OP_LTU: + case VSIR_OP_M3x2: + case VSIR_OP_M3x3: + case VSIR_OP_M3x4: + case VSIR_OP_M4x3: + case VSIR_OP_M4x4: + case VSIR_OP_MAD: + case VSIR_OP_MAX: + case VSIR_OP_MIN: + case VSIR_OP_MOV: + case VSIR_OP_MOVA: + case VSIR_OP_MOVC: + case VSIR_OP_MSAD: + case VSIR_OP_MUL: + case VSIR_OP_NEG: + case VSIR_OP_NEO: + case VSIR_OP_NEU: + case VSIR_OP_NOP: + case VSIR_OP_NOT: + case VSIR_OP_NRM: + case VSIR_OP_OR: + case VSIR_OP_ORD: + case VSIR_OP_PHI: + case VSIR_OP_POW: + case VSIR_OP_QUAD_READ_ACROSS_D: + case VSIR_OP_QUAD_READ_ACROSS_X: + case VSIR_OP_QUAD_READ_ACROSS_Y: + case VSIR_OP_QUAD_READ_LANE_AT: + case VSIR_OP_RCP: + case VSIR_OP_RESINFO: + case VSIR_OP_ROUND_NE: + case VSIR_OP_ROUND_NI: + case VSIR_OP_ROUND_PI: + case VSIR_OP_ROUND_Z: + case VSIR_OP_RSQ: + case VSIR_OP_SAMPLE: + case VSIR_OP_SAMPLE_B: + case VSIR_OP_SAMPLE_B_CL_S: + case VSIR_OP_SAMPLE_C: + case VSIR_OP_SAMPLE_C_CL_S: + case VSIR_OP_SAMPLE_C_LZ: + case VSIR_OP_SAMPLE_C_LZ_S: + case VSIR_OP_SAMPLE_CL_S: + case VSIR_OP_SAMPLE_GRAD: + case VSIR_OP_SAMPLE_GRAD_CL_S: + case VSIR_OP_SAMPLE_INFO: + case VSIR_OP_SAMPLE_LOD: + case VSIR_OP_SAMPLE_LOD_S: + case VSIR_OP_SAMPLE_POS: + case VSIR_OP_SATURATE: + case VSIR_OP_SETP: + case VSIR_OP_SGE: + case VSIR_OP_SGN: + case VSIR_OP_SIN: + case VSIR_OP_SINCOS: + case VSIR_OP_SLT: + case VSIR_OP_SQRT: + case VSIR_OP_SUB: + case VSIR_OP_SWAPC: + case VSIR_OP_TAN: + case VSIR_OP_TEX: + case VSIR_OP_TEXBEM: + case VSIR_OP_TEXBEML: + case VSIR_OP_TEXCOORD: + case VSIR_OP_TEXCRD: + case VSIR_OP_TEXDEPTH: + case VSIR_OP_TEXDP3: + case VSIR_OP_TEXDP3TEX: + case VSIR_OP_TEXLD: + case VSIR_OP_TEXLDD: + case VSIR_OP_TEXLDL: + case VSIR_OP_TEXM3x2DEPTH: + case VSIR_OP_TEXM3x2PAD: + case VSIR_OP_TEXM3x2TEX: + case VSIR_OP_TEXM3x3: + case VSIR_OP_TEXM3x3DIFF: + case VSIR_OP_TEXM3x3PAD: + case VSIR_OP_TEXM3x3SPEC: + case VSIR_OP_TEXM3x3TEX: + case VSIR_OP_TEXM3x3VSPEC: + case VSIR_OP_TEXREG2AR: + case VSIR_OP_TEXREG2GB: + case VSIR_OP_TEXREG2RGB: + case VSIR_OP_UBFE: + case VSIR_OP_UDIV: + case VSIR_OP_UDIV_SIMPLE: + case VSIR_OP_UGE: + case VSIR_OP_ULT: + case VSIR_OP_UMAX: + case VSIR_OP_UMIN: + case VSIR_OP_UMUL: + case VSIR_OP_UNO: + case VSIR_OP_UREM: + case VSIR_OP_USHR: + case VSIR_OP_UTOD: + case VSIR_OP_UTOF: + case VSIR_OP_UTOU: + case VSIR_OP_WAVE_ACTIVE_ALL_EQUAL: + case VSIR_OP_WAVE_ACTIVE_BALLOT: + case VSIR_OP_WAVE_ACTIVE_BIT_AND: + case VSIR_OP_WAVE_ACTIVE_BIT_OR: + case VSIR_OP_WAVE_ACTIVE_BIT_XOR: + case VSIR_OP_WAVE_ALL_BIT_COUNT: + case VSIR_OP_WAVE_ALL_TRUE: + case VSIR_OP_WAVE_ANY_TRUE: + case VSIR_OP_WAVE_IS_FIRST_LANE: + case VSIR_OP_WAVE_OP_ADD: + case VSIR_OP_WAVE_OP_IMAX: + case VSIR_OP_WAVE_OP_IMIN: + case VSIR_OP_WAVE_OP_MAX: + case VSIR_OP_WAVE_OP_MIN: + case VSIR_OP_WAVE_OP_MUL: + case VSIR_OP_WAVE_OP_UMAX: + case VSIR_OP_WAVE_OP_UMIN: + case VSIR_OP_WAVE_PREFIX_BIT_COUNT: + case VSIR_OP_WAVE_READ_LANE_AT: + case VSIR_OP_WAVE_READ_LANE_FIRST: + case VSIR_OP_XOR: + return false; + + case VSIR_OP_ATOMIC_AND: + case VSIR_OP_ATOMIC_CMP_STORE: + case VSIR_OP_ATOMIC_IADD: + case VSIR_OP_ATOMIC_IMAX: + case VSIR_OP_ATOMIC_IMIN: + case VSIR_OP_ATOMIC_OR: + case VSIR_OP_ATOMIC_UMAX: + case VSIR_OP_ATOMIC_UMIN: + case VSIR_OP_ATOMIC_XOR: + case VSIR_OP_BRANCH: + case VSIR_OP_BREAK: + case VSIR_OP_BREAKC: + case VSIR_OP_BREAKP: + case VSIR_OP_CALL: + case VSIR_OP_CALLNZ: + case VSIR_OP_CASE: + case VSIR_OP_CONTINUE: + case VSIR_OP_CONTINUEP: + case VSIR_OP_CUT: + case VSIR_OP_CUT_STREAM: + case VSIR_OP_DCL: + case VSIR_OP_DCL_CONSTANT_BUFFER: + case VSIR_OP_DCL_FUNCTION_BODY: + case VSIR_OP_DCL_FUNCTION_TABLE: + case VSIR_OP_DCL_GLOBAL_FLAGS: + case VSIR_OP_DCL_GS_INSTANCES: + case VSIR_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case VSIR_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + case VSIR_OP_DCL_HS_MAX_TESSFACTOR: + case VSIR_OP_DCL_IMMEDIATE_CONSTANT_BUFFER: + case VSIR_OP_DCL_INDEX_RANGE: + case VSIR_OP_DCL_INDEXABLE_TEMP: + case VSIR_OP_DCL_INPUT: + case VSIR_OP_DCL_INPUT_CONTROL_POINT_COUNT: + case VSIR_OP_DCL_INPUT_PRIMITIVE: + case VSIR_OP_DCL_INPUT_PS: + case VSIR_OP_DCL_INPUT_PS_SGV: + case VSIR_OP_DCL_INPUT_PS_SIV: + case VSIR_OP_DCL_INPUT_SGV: + case VSIR_OP_DCL_INPUT_SIV: + case VSIR_OP_DCL_INTERFACE: + case VSIR_OP_DCL_OUTPUT: + case VSIR_OP_DCL_OUTPUT_CONTROL_POINT_COUNT: + case VSIR_OP_DCL_OUTPUT_SGV: + case VSIR_OP_DCL_OUTPUT_SIV: + case VSIR_OP_DCL_OUTPUT_TOPOLOGY: + case VSIR_OP_DCL_RESOURCE_RAW: + case VSIR_OP_DCL_RESOURCE_STRUCTURED: + case VSIR_OP_DCL_SAMPLER: + case VSIR_OP_DCL_STREAM: + case VSIR_OP_DCL_TEMPS: + case VSIR_OP_DCL_TESSELLATOR_DOMAIN: + case VSIR_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: + case VSIR_OP_DCL_TESSELLATOR_PARTITIONING: + case VSIR_OP_DCL_TGSM_RAW: + case VSIR_OP_DCL_TGSM_STRUCTURED: + case VSIR_OP_DCL_THREAD_GROUP: + case VSIR_OP_DCL_UAV_RAW: + case VSIR_OP_DCL_UAV_STRUCTURED: + case VSIR_OP_DCL_UAV_TYPED: + case VSIR_OP_DCL_VERTICES_OUT: + case VSIR_OP_DEFAULT: + case VSIR_OP_DISCARD: + case VSIR_OP_ELSE: + case VSIR_OP_EMIT: + case VSIR_OP_EMIT_STREAM: + case VSIR_OP_ENDIF: + case VSIR_OP_ENDLOOP: + case VSIR_OP_ENDREP: + case VSIR_OP_ENDSWITCH: + case VSIR_OP_FCALL: + case VSIR_OP_HS_CONTROL_POINT_PHASE: + case VSIR_OP_HS_DECLS: + case VSIR_OP_HS_FORK_PHASE: + case VSIR_OP_HS_JOIN_PHASE: + case VSIR_OP_IF: + case VSIR_OP_IFC: + case VSIR_OP_IMM_ATOMIC_ALLOC: + case VSIR_OP_IMM_ATOMIC_AND: + case VSIR_OP_IMM_ATOMIC_CMP_EXCH: + case VSIR_OP_IMM_ATOMIC_CONSUME: + case VSIR_OP_IMM_ATOMIC_EXCH: + case VSIR_OP_IMM_ATOMIC_IADD: + case VSIR_OP_IMM_ATOMIC_IMAX: + case VSIR_OP_IMM_ATOMIC_IMIN: + case VSIR_OP_IMM_ATOMIC_OR: + case VSIR_OP_IMM_ATOMIC_UMAX: + case VSIR_OP_IMM_ATOMIC_UMIN: + case VSIR_OP_IMM_ATOMIC_XOR: + case VSIR_OP_LABEL: + case VSIR_OP_LOOP: + case VSIR_OP_PHASE: + case VSIR_OP_REP: + case VSIR_OP_RET: + case VSIR_OP_RETP: + case VSIR_OP_STORE_RAW: + case VSIR_OP_STORE_STRUCTURED: + case VSIR_OP_STORE_UAV_TYPED: + case VSIR_OP_SWITCH: + case VSIR_OP_SWITCH_MONOLITHIC: + case VSIR_OP_SYNC: + case VSIR_OP_TEXKILL: + return true; + + case VSIR_OP_INVALID: + case VSIR_OP_COUNT: + break; + } + + vkd3d_unreachable(); +} + +static enum vkd3d_result vsir_program_dce(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vkd3d_shader_instruction *ins; + struct liveness_tracker tracker; + enum vkd3d_result ret; + unsigned int i; + + if ((ret = track_liveness(program, &tracker))) + return ret; + + for (ins = vsir_program_iterator_head(&it), i = 0; ins; ins = vsir_program_iterator_next(&it), ++i) + { + unsigned int used_dst_count = 0; + + /* FIXME: IMM_ATOMIC_* can still be effectively DCE'd by turning them + * into non-immediate ATOMIC_* instructions. */ + if (vsir_instruction_has_side_effects(ins)) + continue; + + for (unsigned int j = 0; j < ins->dst_count; ++j) + { + struct vkd3d_shader_dst_param *dst = &ins->dst[j]; + + if (dst->reg.type == VKD3DSPR_SSA && !tracker.ssa_regs[dst->reg.idx[0].offset].last_read) + { + vsir_dst_param_init_null(dst); + ctx->progress = true; + } + else if (dst->reg.type == VKD3DSPR_TEMP + && tracker.temp_regs[dst->reg.idx[0].offset].last_read <= i + && !(program->shader_version.major == 1 && dst->reg.idx[0].offset == 0)) + { + vsir_dst_param_init_null(dst); + ctx->progress = true; + } + else if (dst->reg.type != VKD3DSPR_NULL) + { + ++used_dst_count; + } + } + + if (!used_dst_count) + vkd3d_shader_instruction_make_nop(ins); + } + + liveness_tracker_cleanup(&tracker); + return VKD3D_OK; +} + +/* + * This pass attempts to reduce redundant MOVs (copies) by combining them with + * adjacent instructions. The resulting MOVs will subsequently be removed by + * DCE if no longer used. + * + * We attempt to combine two instructions, not necessarily consecutive, + * of the form + * + * mov aaa.bbb, ccc + * XXX ..., aaa.ddd + * + * into + * + * XXX ..., ccc + * + * There are many constraints, including: + * + * - The ddd components of aaa must not have been modified between the + * two instructions. + * Currently, only SSA is supported, so this is trivial. + * + * - The relevant components of ccc must not have been modified between the + * two instructions. + * Currently, we require ccc to be a read-only register, so this is trivial. + * + * - ddd must be a subset of bbb. This is again trivial for SSA. + */ + +struct vsir_copy_propagation_state +{ + /* The sources for each SSA register, if it was written by a + * MOV instruction, or NULL if not. + * + * We do not add or remove instructions in this pass, only modifying their + * content, so these pointers are safe to store. + */ + const struct vkd3d_shader_instruction **ssa_sources; +}; + +static bool is_read_only(const struct vsir_program *program, enum vkd3d_shader_register_type type) +{ + switch (type) + { + case VKD3DSPR_ADDR: + case VKD3DSPR_IDXTEMP: + case VKD3DSPR_LOOP: + case VKD3DSPR_TEMP: + case VKD3DSPR_TEMPFLOAT16: + return false; + + case VKD3DSPR_TEXTURE: + return vkd3d_shader_ver_ge(&program->shader_version, 1, 4); + + /* Not applicable since they're not numeric or can't be sources. */ + case VKD3DSPR_ATTROUT: + case VKD3DSPR_COLOROUT: + case VKD3DSPR_COMBINED_SAMPLER: + case VKD3DSPR_COUNT: + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + case VKD3DSPR_FUNCTIONBODY: + case VKD3DSPR_FUNCTIONPOINTER: + case VKD3DSPR_GROUPSHAREDMEM: + case VKD3DSPR_INVALID: + case VKD3DSPR_LABEL: + case VKD3DSPR_NULL: + case VKD3DSPR_OUTPUT: + case VKD3DSPR_OUTSTENCILREF: + case VKD3DSPR_PREDICATE: + case VKD3DSPR_RASTERIZER: + case VKD3DSPR_RASTOUT: + case VKD3DSPR_RESOURCE: + case VKD3DSPR_SAMPLER: + case VKD3DSPR_STREAM: + case VKD3DSPR_TEXCRDOUT: + case VKD3DSPR_UAV: + return false; + + case VKD3DSPR_CONST: + case VKD3DSPR_CONSTBOOL: + case VKD3DSPR_CONSTBUFFER: + case VKD3DSPR_CONSTINT: + case VKD3DSPR_COVERAGE: + case VKD3DSPR_FORKINSTID: + case VKD3DSPR_GSINSTID: + case VKD3DSPR_IMMCONST: + case VKD3DSPR_IMMCONST64: + case VKD3DSPR_IMMCONSTBUFFER: + case VKD3DSPR_INCONTROLPOINT: + case VKD3DSPR_INPUT: + case VKD3DSPR_JOININSTID: + case VKD3DSPR_LOCALTHREADID: + case VKD3DSPR_LOCALTHREADINDEX: + case VKD3DSPR_MISCTYPE: + case VKD3DSPR_OUTCONTROLPOINT: + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_PARAMETER: + case VKD3DSPR_PATCHCONST: + case VKD3DSPR_POINT_COORD: + case VKD3DSPR_PRIMID: + case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_SSA: + case VKD3DSPR_TESSCOORD: + case VKD3DSPR_THREADGROUPID: + case VKD3DSPR_THREADID: + case VKD3DSPR_UNDEF: + case VKD3DSPR_WAVELANECOUNT: + case VKD3DSPR_WAVELANEINDEX: + return true; + } + + vkd3d_unreachable(); +} + +static bool can_propagate_ssa_source(const struct vsir_program *program, const struct vkd3d_shader_instruction *ins) +{ + if (ins->opcode != VSIR_OP_MOV) + return false; + /* TODO: Propagate copies for other register types. */ + if (ins->dst[0].reg.type != VKD3DSPR_SSA) + return false; + if (ins->dst[0].modifiers || ins->dst[0].shift) + return false; + + /* TODO: We can perform copy-prop for read-write register types, but we + * have to be sure that the register wasn't modified between the two + * instructions. */ + if (!is_read_only(program, ins->src[0].reg.type)) + return false; + for (unsigned int k = 0; k < ins->src[0].reg.idx_count; ++k) + { + if (ins->src[0].reg.idx[k].rel_addr && !is_read_only(program, ins->src[0].reg.idx[k].rel_addr->reg.type)) + return false; + } + + /* Don't bother with other source modifiers for now; the HLSL compiler + * doesn't emit them. */ + switch (ins->src[0].modifiers) + { + case VKD3DSPSM_ABS: + case VKD3DSPSM_ABSNEG: + case VKD3DSPSM_NEG: + case VKD3DSPSM_NONE: + break; + + default: + return false; + } + return true; +} + +static enum vkd3d_result vsir_program_copy_propagation(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vsir_copy_propagation_state state = {0}; + struct vkd3d_shader_instruction *ins; + + if (!(state.ssa_sources = vkd3d_calloc(program->ssa_count, sizeof(*state.ssa_sources)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) + { + for (unsigned int j = 0; j < ins->src_count; ++j) + { + struct vkd3d_shader_src_param *src = &ins->src[j]; + const struct vkd3d_shader_src_param *mov_src; + const struct vkd3d_shader_instruction *mov; + enum vsir_data_type data_type; + uint32_t new_swizzle = 0; + + if (src->reg.type != VKD3DSPR_SSA) + continue; + if (data_type_is_64_bit(src->reg.data_type)) + continue; + if (!(mov = state.ssa_sources[src->reg.idx[0].offset])) + continue; + mov_src = &mov->src[0]; + data_type = src->reg.data_type; + + src->reg = mov_src->reg; + src->reg.data_type = data_type; + + if (!shader_register_clone_relative_addresses(&src->reg, program)) + { + vkd3d_free(state.ssa_sources); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (unsigned int k = 0; k < 4; ++k) + { + unsigned int s = vsir_swizzle_get_component(src->swizzle, k); + + if (mov_src->reg.type == VKD3DSPR_IMMCONST) + src->reg.u.immconst_u32[k] = mov_src->reg.u.immconst_u32[s]; + else + vsir_swizzle_set_component(&new_swizzle, k, vsir_swizzle_get_component(mov_src->swizzle, s)); + } + if (mov_src->reg.type != VKD3DSPR_IMMCONST) + src->swizzle = new_swizzle; + + if (src->modifiers == VKD3DSPSM_NONE) + src->modifiers = mov_src->modifiers; + else if (src->modifiers == VKD3DSPSM_NEG && mov_src->modifiers == VKD3DSPSM_ABS) + src->modifiers = VKD3DSPSM_ABSNEG; + else if (src->modifiers == VKD3DSPSM_NEG && mov_src->modifiers == VKD3DSPSM_ABSNEG) + src->modifiers = VKD3DSPSM_ABS; + else if (src->modifiers == VKD3DSPSM_NEG && mov_src->modifiers == VKD3DSPSM_NEG) + src->modifiers = VKD3DSPSM_NONE; + /* Otherwise no change is necessary. */ + + ctx->progress = true; + } + + if (can_propagate_ssa_source(program, ins)) + state.ssa_sources[ins->dst[0].reg.idx[0].offset] = ins; + } + + vkd3d_free(state.ssa_sources); + return VKD3D_OK; +} + +enum vkd3d_result vsir_program_optimize(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ + struct vsir_transformation_context ctx; + + vsir_transformation_context_init(&ctx, program, config_flags, compile_info, message_context); + + do + { + ctx.progress = false; + vsir_transform(&ctx, vsir_program_copy_propagation); + vsir_transform(&ctx, vsir_program_dce); + } + while (ctx.progress); + + if (TRACE_ON()) + vsir_program_trace(program); + + return ctx.result; +} + +/* Transformations which should happen at parse time, i.e. before scan + * information is returned to the user. + * + * In particular, some passes need to modify the signature, and + * vkd3d_shader_scan() should report the modified signature for the given + * target. */ +enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ + struct vsir_transformation_context ctx; + + vsir_transformation_context_init(&ctx, program, config_flags, compile_info, message_context); + + /* For vsir_program_ensure_diffuse(). */ + if (program->shader_version.major <= 2) + vsir_transform(&ctx, vsir_program_add_diffuse_output); + + /* For vsir_program_insert_fragment_fog(). */ + vsir_transform(&ctx, vsir_program_add_fog_input); + + /* For vsir_program_insert_vertex_fog(). */ + vsir_transform(&ctx, vsir_program_add_fog_output); + + return ctx.result; +} + +enum vkd3d_result vsir_program_lower_d3dbc(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ + struct vsir_transformation_context ctx; + + vsir_transformation_context_init(&ctx, program, config_flags, compile_info, message_context); + vsir_transform(&ctx, vsir_program_lower_d3dbc_instructions); + + if (program->shader_version.major == 1 && program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL) + { + if (program->shader_version.minor < 4) + vsir_transform(&ctx, vsir_program_lower_texture_writes); + + vsir_transform(&ctx, vsir_program_normalise_ps1_output); + } + + if (TRACE_ON()) + vsir_program_trace(program); + + return ctx.result; +} + +enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ + struct vsir_transformation_context ctx;
+ vsir_transformation_context_init(&ctx, program, config_flags, compile_info, message_context); + vsir_transform(&ctx, vsir_program_lower_modifiers); vsir_transform(&ctx, vsir_program_lower_instructions);
if (program->shader_version.major >= 6) diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c index c6e048adb20..d34133d6d4c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/msl.c +++ b/libs/vkd3d/libs/vkd3d-shader/msl.c @@ -167,6 +167,7 @@ static void msl_print_register_datatype(struct vkd3d_string_buffer *buffer, case VSIR_DATA_F32: vkd3d_string_buffer_printf(buffer, "f"); break; + case VSIR_DATA_BOOL: case VSIR_DATA_I32: vkd3d_string_buffer_printf(buffer, "i"); break; @@ -208,132 +209,71 @@ static bool msl_check_shader_visibility(const struct msl_generator *gen, } }
-static const struct vkd3d_shader_descriptor_binding *msl_get_cbv_binding(const struct msl_generator *gen, - unsigned int register_space, unsigned int register_idx) +static bool msl_get_binding(const struct msl_generator *gen, const struct vkd3d_shader_descriptor_info1 *descriptor, + unsigned int register_idx, enum vkd3d_shader_binding_flag flags, unsigned int *idx) { const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; unsigned int i;
if (!interface_info) - return NULL; + return false;
for (i = 0; i < interface_info->binding_count; ++i) { const struct vkd3d_shader_resource_binding *binding = &interface_info->bindings[i];
- if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_CBV) - continue; - if (binding->register_space != register_space) + if (binding->type != descriptor->type) continue; - if (binding->register_index != register_idx) + if (binding->register_space != descriptor->register_space) continue; - if (!msl_check_shader_visibility(gen, binding->shader_visibility)) + if (binding->register_index > descriptor->register_index) continue; - if (!(binding->flags & VKD3D_SHADER_BINDING_FLAG_BUFFER)) + if (descriptor->count != ~0u && binding->binding.count < descriptor->count) continue; - - return &binding->binding; - } - - return NULL; -} - -static const struct vkd3d_shader_descriptor_binding *msl_get_sampler_binding(const struct msl_generator *gen, - unsigned int register_space, unsigned int register_idx) -{ - const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; - const struct vkd3d_shader_resource_binding *binding; - unsigned int i; - - if (!interface_info) - return NULL; - - for (i = 0; i < interface_info->binding_count; ++i) - { - binding = &interface_info->bindings[i]; - - if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER) + if (descriptor->count != ~0u + && binding->binding.count - descriptor->count < descriptor->register_index - binding->register_index) continue; - if (binding->register_space != register_space) - continue; - if (binding->register_index != register_idx) + if (descriptor->count == ~0u + && binding->binding.count <= descriptor->register_index - binding->register_index) continue; if (!msl_check_shader_visibility(gen, binding->shader_visibility)) continue; + if ((binding->flags & flags) != flags) + continue;
- return &binding->binding; + *idx = register_idx + binding->binding.binding - binding->register_index; + return true; }
- return NULL; + return false; }
-static const struct vkd3d_shader_descriptor_binding *msl_get_srv_binding(const struct msl_generator *gen, - unsigned int register_space, unsigned int register_idx, enum vkd3d_shader_resource_type resource_type) +static bool msl_get_cbv_binding(const struct msl_generator *gen, + const struct vkd3d_shader_descriptor_info1 *descriptor, unsigned int register_idx, unsigned int *idx) { - const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; - enum vkd3d_shader_binding_flag resource_type_flag; - unsigned int i; - - if (!interface_info) - return NULL; - - resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER - ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; - - for (i = 0; i < interface_info->binding_count; ++i) - { - const struct vkd3d_shader_resource_binding *binding = &interface_info->bindings[i]; - - if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV) - continue; - if (binding->register_space != register_space) - continue; - if (binding->register_index != register_idx) - continue; - if (!msl_check_shader_visibility(gen, binding->shader_visibility)) - continue; - if (!(binding->flags & resource_type_flag)) - continue; - - return &binding->binding; - } - - return NULL; + return msl_get_binding(gen, descriptor, register_idx, VKD3D_SHADER_BINDING_FLAG_BUFFER, idx); }
-static const struct vkd3d_shader_descriptor_binding *msl_get_uav_binding(const struct msl_generator *gen, - unsigned int register_space, unsigned int register_idx, enum vkd3d_shader_resource_type resource_type) +static bool msl_get_sampler_binding(const struct msl_generator *gen, + const struct vkd3d_shader_descriptor_info1 *descriptor, unsigned int register_idx, unsigned int *idx) { - const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; - const struct vkd3d_shader_resource_binding *binding; - enum vkd3d_shader_binding_flag resource_type_flag; - unsigned int i; - - if (!interface_info) - return NULL; - - resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER - ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; - - for (i = 0; i < interface_info->binding_count; ++i) - { - binding = &interface_info->bindings[i]; - - if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) - continue; - if (binding->register_space != register_space) - continue; - if (binding->register_index != register_idx) - continue; - if (!msl_check_shader_visibility(gen, binding->shader_visibility)) - continue; - if (!(binding->flags & resource_type_flag)) - continue; + return msl_get_binding(gen, descriptor, register_idx, 0, idx); +}
- return &binding->binding; - } +static bool msl_get_srv_binding(const struct msl_generator *gen, + const struct vkd3d_shader_descriptor_info1 *descriptor, unsigned int register_idx, unsigned int *idx) +{ + return msl_get_binding(gen, descriptor, register_idx, + descriptor->resource_type == VKD3D_SHADER_RESOURCE_BUFFER + ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE, idx); +}
- return NULL; +static bool msl_get_uav_binding(const struct msl_generator *gen, + const struct vkd3d_shader_descriptor_info1 *descriptor, unsigned int register_idx, unsigned int *idx) +{ + return msl_get_binding(gen, descriptor, register_idx, + descriptor->resource_type == VKD3D_SHADER_RESOURCE_BUFFER + ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE, idx); }
static void msl_print_cbv_name(struct vkd3d_string_buffer *buffer, unsigned int binding) @@ -369,6 +309,9 @@ static void msl_print_uav_name(struct vkd3d_string_buffer *buffer, struct msl_ge static enum msl_data_type msl_print_register_name(struct vkd3d_string_buffer *buffer, struct msl_generator *gen, const struct vkd3d_shader_register *reg) { + const struct vkd3d_shader_descriptor_info1 *descriptor; + unsigned int binding, cbv_id, cbv_idx; + switch (reg->type) { case VKD3DSPR_TEMP: @@ -440,38 +383,52 @@ static enum msl_data_type msl_print_register_name(struct vkd3d_string_buffer *bu }
case VKD3DSPR_CONSTBUFFER: + if (reg->idx_count != 3) { - const struct vkd3d_shader_descriptor_binding *binding; + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled constant buffer register index count %u.", + reg->idx_count); + vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); + return MSL_DATA_UNION; + }
- if (reg->idx_count != 3) - { - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled constant buffer register index count %u.", - reg->idx_count); - vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); - return MSL_DATA_UNION; - } - if (reg->idx[0].rel_addr || reg->idx[1].rel_addr) - { - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled constant buffer register indirect addressing."); - vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); - return MSL_DATA_UNION; - } - /* FIXME: This should use vkd3d_shader_find_descriptor() to - * find the resource index/space from the resource ID. */ - if (!(binding = msl_get_cbv_binding(gen, 0, reg->idx[1].offset))) - { - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, - "No descriptor binding specified for CBV %u.", reg->idx[0].offset); - vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); - return MSL_DATA_UNION; - } - msl_print_cbv_name(buffer, binding->binding); - msl_print_subscript(buffer, gen, reg->idx[2].rel_addr, reg->idx[2].offset); + if (reg->idx[0].rel_addr || reg->idx[1].rel_addr) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled constant buffer register indirect addressing."); + vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); + return MSL_DATA_UNION; + } + + cbv_id = reg->idx[0].offset; + cbv_idx = reg->idx[1].offset; + + if (!(descriptor = vkd3d_shader_find_descriptor(&gen->program->descriptors, + VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, cbv_id))) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Undeclared CBV descriptor %u.", cbv_id); + vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); return MSL_DATA_UNION; }
+ if (!msl_get_cbv_binding(gen, descriptor, cbv_idx, &binding)) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, + "No descriptor binding specified for CBV %u.", cbv_id); + vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); + return MSL_DATA_UNION; + } + + msl_print_cbv_name(buffer, binding); + msl_print_subscript(buffer, gen, reg->idx[2].rel_addr, reg->idx[2].offset); + return MSL_DATA_UNION; + + case VKD3DSPR_IMMCONSTBUFFER: + vkd3d_string_buffer_printf(buffer, "icb%u", reg->idx[0].offset); + msl_print_subscript(buffer, gen, reg->idx[1].rel_addr, reg->idx[1].offset); + return MSL_DATA_UINT; + case VKD3DSPR_IDXTEMP: vkd3d_string_buffer_printf(buffer, "x%u", reg->idx[0].offset); msl_print_subscript(buffer, gen, reg->idx[1].rel_addr, reg->idx[1].offset); @@ -485,6 +442,40 @@ static enum msl_data_type msl_print_register_name(struct vkd3d_string_buffer *bu vkd3d_string_buffer_printf(buffer, "o_mask"); return MSL_DATA_UNION;
+ case VKD3DSPR_THREADID: + vkd3d_string_buffer_printf(buffer, "v_thread_id"); + return MSL_DATA_UNION; + + case VKD3DSPR_THREADGROUPID: + vkd3d_string_buffer_printf(buffer, "v_thread_group_id"); + return MSL_DATA_UNION; + + case VKD3DSPR_LOCALTHREADID: + vkd3d_string_buffer_printf(buffer, "v_local_thread_id"); + return MSL_DATA_UNION; + + case VKD3DSPR_LOCALTHREADINDEX: + vkd3d_string_buffer_printf(buffer, "v_local_thread_index"); + return MSL_DATA_UNION; + + case VKD3DSPR_UNDEF: + switch (reg->dimension) + { + case VSIR_DIMENSION_SCALAR: + vkd3d_string_buffer_printf(buffer, "0u"); + return MSL_DATA_UINT; + + case VSIR_DIMENSION_VEC4: + vkd3d_string_buffer_printf(buffer, "uint4(0u, 0u, 0u, 0u)"); + return MSL_DATA_UINT; + + default: + vkd3d_string_buffer_printf(buffer, "<unhandled_dimension %#x>", reg->dimension); + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled dimension %#x.", reg->dimension); + return MSL_DATA_UINT; + } + default: msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, "Internal compiler error: Unhandled register type %#x.", reg->type); @@ -566,7 +557,7 @@ static void msl_print_src_with_type(struct vkd3d_string_buffer *buffer, struct m const struct vkd3d_shader_src_param *vsir_src, uint32_t mask, enum vsir_data_type data_type) { const struct vkd3d_shader_register *reg = &vsir_src->reg; - struct vkd3d_string_buffer *register_name, *str; + struct vkd3d_string_buffer *register_name; enum msl_data_type src_data_type;
register_name = vkd3d_string_buffer_get(&gen->string_buffers); @@ -575,39 +566,10 @@ static void msl_print_src_with_type(struct vkd3d_string_buffer *buffer, struct m msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, "Internal compiler error: Unhandled 'non-uniform' modifier.");
- if (!vsir_src->modifiers) - str = buffer; - else - str = vkd3d_string_buffer_get(&gen->string_buffers); - src_data_type = msl_print_register_name(register_name, gen, reg); - msl_print_bitcast(str, gen, register_name->buffer, data_type, src_data_type, reg->dimension); + msl_print_bitcast(buffer, gen, register_name->buffer, data_type, src_data_type, reg->dimension); if (reg->dimension == VSIR_DIMENSION_VEC4) - msl_print_swizzle(str, vsir_src->swizzle, mask); - - switch (vsir_src->modifiers) - { - case VKD3DSPSM_NONE: - break; - case VKD3DSPSM_NEG: - vkd3d_string_buffer_printf(buffer, "-%s", str->buffer); - break; - case VKD3DSPSM_ABS: - vkd3d_string_buffer_printf(buffer, "abs(%s)", str->buffer); - break; - case VKD3DSPSM_ABSNEG: - vkd3d_string_buffer_printf(buffer, "-abs(%s)", str->buffer); - break; - default: - vkd3d_string_buffer_printf(buffer, "<unhandled modifier %#x>(%s)", - vsir_src->modifiers, str->buffer); - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); - break; - } - - if (str != buffer) - vkd3d_string_buffer_release(&gen->string_buffers, str); + msl_print_swizzle(buffer, vsir_src->swizzle, mask); }
static void msl_src_init(struct msl_src *msl_src, struct msl_generator *gen, @@ -671,32 +633,19 @@ static void msl_print_subscript(struct vkd3d_string_buffer *buffer, struct msl_g static void VKD3D_PRINTF_FUNC(3, 4) msl_print_assignment( struct msl_generator *gen, struct msl_dst *dst, const char *format, ...) { - uint32_t modifiers = dst->vsir->modifiers; va_list args;
- /* It is always legitimate to ignore _pp. */ - modifiers &= ~VKD3DSPDM_PARTIALPRECISION; - if (dst->vsir->shift) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift); - if (modifiers & ~VKD3DSPDM_SATURATE) - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled destination modifier(s) %#x.", modifiers);
msl_print_indent(gen->buffer, gen->indent); vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer);
- if (modifiers & VKD3DSPDM_SATURATE) - vkd3d_string_buffer_printf(gen->buffer, "saturate("); - va_start(args, format); vkd3d_string_buffer_vprintf(gen->buffer, format, args); va_end(args);
- if (modifiers & VKD3DSPDM_SATURATE) - vkd3d_string_buffer_printf(gen->buffer, ")"); - vkd3d_string_buffer_printf(gen->buffer, ";\n"); }
@@ -749,6 +698,28 @@ static void msl_dot(struct msl_generator *gen, const struct vkd3d_shader_instruc msl_dst_cleanup(&dst, &gen->string_buffers); }
+static void msl_firstbit(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + const char *op = ins->opcode == VSIR_OP_FIRSTBIT_LO ? "ctz" : "clz"; + unsigned int mask_size; + struct msl_src src; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src, gen, &ins->src[0], mask); + + if ((mask_size = vsir_write_mask_component_count(mask)) > 1) + msl_print_assignment(gen, &dst, "select(uint%u(0xffffffffu), %s(%s), bool%u(%s))", + mask_size, op, src.str->buffer, mask_size, src.str->buffer); + else + msl_print_assignment(gen, &dst, "%s ? %s(%s) : 0xffffffffu", + src.str->buffer, op, src.str->buffer); + + msl_src_cleanup(&src, &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + static void msl_intrinsic(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) { struct vkd3d_string_buffer *args; @@ -799,6 +770,7 @@ static void msl_relop(struct msl_generator *gen, const struct vkd3d_shader_instr static void msl_cast(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *constructor) { unsigned int component_count; + const char *negate; struct msl_src src; struct msl_dst dst; uint32_t mask; @@ -806,10 +778,11 @@ static void msl_cast(struct msl_generator *gen, const struct vkd3d_shader_instru mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); msl_src_init(&src, gen, &ins->src[0], mask);
+ negate = ins->opcode == VSIR_OP_UTOF && data_type_is_bool(ins->src[0].reg.data_type) ? "-" : ""; if ((component_count = vsir_write_mask_component_count(mask)) > 1) - msl_print_assignment(gen, &dst, "%s%u(%s)", constructor, component_count, src.str->buffer); + msl_print_assignment(gen, &dst, "%s%u(%s%s)", constructor, component_count, negate, src.str->buffer); else - msl_print_assignment(gen, &dst, "%s(%s)", constructor, src.str->buffer); + msl_print_assignment(gen, &dst, "%s(%s%s)", constructor, negate, src.str->buffer);
msl_src_cleanup(&src, &gen->string_buffers); msl_dst_cleanup(&dst, &gen->string_buffers); @@ -943,15 +916,14 @@ static void msl_print_texel_offset(struct vkd3d_string_buffer *buffer, struct ms
static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) { + unsigned int resource_id, resource_idx, resource_space, sample_count; const struct msl_resource_type_info *resource_type_info; - unsigned int resource_id, resource_idx, resource_space; const struct vkd3d_shader_descriptor_info1 *descriptor; - const struct vkd3d_shader_descriptor_binding *binding; enum vkd3d_shader_resource_type resource_type; uint32_t coord_mask, write_mask_size; struct vkd3d_string_buffer *read; enum vsir_data_type data_type; - unsigned int srv_binding; + unsigned int srv_binding = 0; struct msl_dst dst;
if (vkd3d_shader_instruction_has_texel_offset(ins)) @@ -969,6 +941,7 @@ static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruct { resource_type = descriptor->resource_type; resource_space = descriptor->register_space; + sample_count = descriptor->sample_count; data_type = descriptor->resource_data_type; } else @@ -977,6 +950,7 @@ static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruct "Internal compiler error: Undeclared resource descriptor %u.", resource_id); resource_space = 0; resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + sample_count = 1; data_type = VSIR_DATA_F32; }
@@ -988,6 +962,16 @@ static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruct msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED, "Texel fetches from resource type %#x are not supported.", resource_type);
+ if (sample_count == 1) + { + /* Similar to the SPIR-V and GLSL targets, we map multi-sample + * textures with sample count 1 to their single-sample equivalents. */ + if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS) + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; + } + if (!(resource_type_info = msl_get_resource_type_info(resource_type))) { msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, @@ -996,17 +980,10 @@ static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruct } coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size);
- if ((binding = msl_get_srv_binding(gen, resource_space, resource_idx, resource_type))) - { - srv_binding = binding->binding; - } - else - { + if (descriptor && !msl_get_srv_binding(gen, descriptor, resource_idx, &srv_binding)) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, "No descriptor binding specified for SRV %u (index %u, space %u).", resource_id, resource_idx, resource_space); - srv_binding = 0; - }
msl_dst_init(&dst, gen, ins, &ins->dst[0]); read = vkd3d_string_buffer_get(&gen->string_buffers); @@ -1030,6 +1007,10 @@ static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruct vkd3d_string_buffer_printf(read, ", "); if (ins->opcode != VSIR_OP_LD2DMS) msl_print_src_with_type(read, gen, &ins->src[0], VKD3DSP_WRITEMASK_3, VSIR_DATA_U32); + else if (sample_count == 1) + /* If the resource isn't a true multisample resource, this is the + * "lod" parameter instead of the "sample" parameter. */ + vkd3d_string_buffer_printf(read, "0"); else msl_print_src_with_type(read, gen, &ins->src[2], VKD3DSP_WRITEMASK_0, VSIR_DATA_U32); } @@ -1049,11 +1030,10 @@ static void msl_sample(struct msl_generator *gen, const struct vkd3d_shader_inst const struct msl_resource_type_info *resource_type_info; const struct vkd3d_shader_src_param *resource, *sampler; unsigned int resource_id, resource_idx, resource_space; - const struct vkd3d_shader_descriptor_binding *binding; unsigned int sampler_id, sampler_idx, sampler_space; + unsigned int srv_binding = 0, sampler_binding = 0; const struct vkd3d_shader_descriptor_info1 *d; enum vkd3d_shader_resource_type resource_type; - unsigned int srv_binding, sampler_binding; uint32_t coord_mask, write_mask_size; struct vkd3d_string_buffer *sample; enum vsir_data_type data_type; @@ -1121,17 +1101,10 @@ static void msl_sample(struct msl_generator *gen, const struct vkd3d_shader_inst } coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size);
- if ((binding = msl_get_srv_binding(gen, resource_space, resource_idx, resource_type))) - { - srv_binding = binding->binding; - } - else - { + if (d && !msl_get_srv_binding(gen, d, resource_idx, &srv_binding)) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, "No descriptor binding specified for SRV %u (index %u, space %u).", resource_id, resource_idx, resource_space); - srv_binding = 0; - }
sampler_id = sampler->reg.idx[0].offset; sampler_idx = sampler->reg.idx[1].offset; @@ -1161,17 +1134,10 @@ static void msl_sample(struct msl_generator *gen, const struct vkd3d_shader_inst sampler_space = 0; }
- if ((binding = msl_get_sampler_binding(gen, sampler_space, sampler_idx))) - { - sampler_binding = binding->binding; - } - else - { + if (d && !msl_get_sampler_binding(gen, d, sampler_idx, &sampler_binding)) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, "No descriptor binding specified for sampler %u (index %u, space %u).", sampler_id, sampler_idx, sampler_space); - sampler_binding = 0; - }
msl_dst_init(&dst, gen, ins, &ins->dst[0]); sample = vkd3d_string_buffer_get(&gen->string_buffers); @@ -1263,13 +1229,12 @@ static void msl_sample(struct msl_generator *gen, const struct vkd3d_shader_inst static void msl_store_uav_typed(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) { const struct msl_resource_type_info *resource_type_info; - const struct vkd3d_shader_descriptor_binding *binding; const struct vkd3d_shader_descriptor_info1 *d; enum vkd3d_shader_resource_type resource_type; unsigned int uav_id, uav_idx, uav_space; struct vkd3d_string_buffer *image_data; enum vsir_data_type data_type; - unsigned int uav_binding; + unsigned int uav_binding = 0; uint32_t coord_mask;
if (ins->dst[0].reg.idx[0].rel_addr || ins->dst[0].reg.idx[1].rel_addr) @@ -1294,6 +1259,11 @@ static void msl_store_uav_typed(struct msl_generator *gen, const struct vkd3d_sh data_type = VSIR_DATA_F32; }
+ if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS + || resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED, + "Storing to resource type %#x is not supported.", resource_type); + if (!(resource_type_info = msl_get_resource_type_info(resource_type))) { msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, @@ -1302,17 +1272,10 @@ static void msl_store_uav_typed(struct msl_generator *gen, const struct vkd3d_sh } coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size);
- if ((binding = msl_get_uav_binding(gen, uav_space, uav_idx, resource_type))) - { - uav_binding = binding->binding; - } - else - { + if (d && !msl_get_uav_binding(gen, d, uav_idx, &uav_binding)) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, "No descriptor binding specified for UAV %u (index %u, space %u).", uav_id, uav_idx, uav_space); - uav_binding = 0; - }
image_data = vkd3d_string_buffer_get(&gen->string_buffers);
@@ -1414,18 +1377,70 @@ static void msl_ret(struct msl_generator *gen, const struct vkd3d_shader_instruc static void msl_dcl_indexable_temp(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) { const char *type = ins->declaration.indexable_temp.component_count == 4 ? "vkd3d_vec4" : "vkd3d_scalar"; + + if (ins->declaration.indexable_temp.initialiser) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled initialiser for indexable temporary %u.", + ins->declaration.indexable_temp.register_idx); + msl_print_indent(gen->buffer, gen->indent); vkd3d_string_buffer_printf(gen->buffer, "%s x%u[%u];\n", type, ins->declaration.indexable_temp.register_idx, ins->declaration.indexable_temp.register_size); }
+static void msl_barrier(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + uint32_t flags = ins->flags; + + if (flags & (VKD3DSSF_GLOBAL_UAV | VKD3DSSF_THREAD_GROUP_UAV)) + { + const char *scope = flags & VKD3DSSF_GLOBAL_UAV ? "thread_scope_device" : "thread_scope_threadgroup"; + const char *mem_flags = "mem_flags::mem_device | mem_flags::mem_texture"; + + if (flags & VKD3DSSF_GROUP_SHARED_MEMORY) + { + mem_flags = "mem_flags::mem_device | mem_flags::mem_texture | mem_flags::mem_threadgroup"; + flags &= ~VKD3DSSF_GROUP_SHARED_MEMORY; + } + + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, + "atomic_thread_fence(%s, memory_order_seq_cst, %s);\n", mem_flags, scope); + flags &= ~(VKD3DSSF_GLOBAL_UAV | VKD3DSSF_THREAD_GROUP_UAV); + } + else if (flags & VKD3DSSF_GROUP_SHARED_MEMORY) + { + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, + "atomic_thread_fence(mem_flags::mem_threadgroup, memory_order_seq_cst, thread_scope_threadgroup);\n"); + flags &= ~VKD3DSSF_GROUP_SHARED_MEMORY; + } + + if (flags & VKD3DSSF_THREAD_GROUP) + { + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "threadgroup_barrier(mem_flags::mem_none);\n"); + flags &= ~VKD3DSSF_THREAD_GROUP; + } + + if (flags) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled synchronisation flags %#x.", flags); +} + static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) { gen->location = ins->location;
switch (ins->opcode) { + case VSIR_OP_ABS: + msl_intrinsic(gen, ins, "abs"); + break; + case VSIR_OP_ACOS: + msl_intrinsic(gen, ins, "acos"); + break; case VSIR_OP_ADD: case VSIR_OP_IADD: msl_binop(gen, ins, "+"); @@ -1433,6 +1448,12 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d case VSIR_OP_AND: msl_binop(gen, ins, "&"); break; + case VSIR_OP_ASIN: + msl_intrinsic(gen, ins, "asin"); + break; + case VSIR_OP_ATAN: + msl_intrinsic(gen, ins, "atan"); + break; case VSIR_OP_BREAK: msl_break(gen); break; @@ -1442,6 +1463,12 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d case VSIR_OP_CONTINUE: msl_continue(gen); break; + case VSIR_OP_COS: + msl_intrinsic(gen, ins, "cos"); + break; + case VSIR_OP_COUNTBITS: + msl_intrinsic(gen, ins, "popcount"); + break; case VSIR_OP_DCL_INDEXABLE_TEMP: msl_dcl_indexable_temp(gen, ins); break; @@ -1494,9 +1521,16 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d case VSIR_OP_EXP: msl_intrinsic(gen, ins, "exp2"); break; + case VSIR_OP_FIRSTBIT_HI: + case VSIR_OP_FIRSTBIT_LO: + msl_firstbit(gen, ins); + break; case VSIR_OP_FRC: msl_intrinsic(gen, ins, "fract"); break; + case VSIR_OP_FREM: + msl_intrinsic(gen, ins, "fmod"); + break; case VSIR_OP_FTOI: msl_cast(gen, ins, "int"); break; @@ -1516,11 +1550,29 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d break; case VSIR_OP_GEO: case VSIR_OP_IGE: + case VSIR_OP_UGE: msl_relop(gen, ins, ">="); break; + case VSIR_OP_HCOS: + msl_intrinsic(gen, ins, "cosh"); + break; + case VSIR_OP_HSIN: + msl_intrinsic(gen, ins, "sinh"); + break; + case VSIR_OP_HTAN: + msl_intrinsic(gen, ins, "tanh"); + break; + case VSIR_OP_IDIV: + case VSIR_OP_UDIV_SIMPLE: + msl_binop(gen, ins, "/"); + break; case VSIR_OP_IF: msl_if(gen, ins); break; + case VSIR_OP_IREM: + case VSIR_OP_UREM: + msl_binop(gen, ins, "%"); + break; case VSIR_OP_ISHL: msl_binop(gen, ins, "<<"); break; @@ -1553,6 +1605,7 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d msl_relop(gen, ins, "!="); break; case VSIR_OP_INEG: + case VSIR_OP_NEG: msl_unary_op(gen, ins, "-"); break; case VSIR_OP_ITOF: @@ -1602,6 +1655,12 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d case VSIR_OP_RSQ: msl_intrinsic(gen, ins, "rsqrt"); break; + case VSIR_OP_SATURATE: + msl_intrinsic(gen, ins, "saturate"); + break; + case VSIR_OP_SIN: + msl_intrinsic(gen, ins, "sin"); + break; case VSIR_OP_SQRT: msl_intrinsic(gen, ins, "sqrt"); break; @@ -1611,6 +1670,12 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d case VSIR_OP_SWITCH: msl_switch(gen, ins); break; + case VSIR_OP_SYNC: + msl_barrier(gen, ins); + break; + case VSIR_OP_TAN: + msl_intrinsic(gen, ins, "tan"); + break; case VSIR_OP_XOR: msl_binop(gen, ins, "^"); break; @@ -1895,6 +1960,59 @@ static void msl_generate_output_struct_declarations(struct msl_generator *gen) vkd3d_string_buffer_printf(buffer, "};\n\n"); }
+static void msl_generate_immediate_constant_buffers(struct msl_generator *gen) +{ + const struct vkd3d_shader_immediate_constant_buffer *icb; + size_t i, j; + + for (i = 0; i < gen->program->icb_count; ++i) + { + icb = gen->program->icbs[i]; + + if (data_type_is_64_bit(icb->data_type)) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Immediate constant buffer %zu has unhandled data type "%s" (%#x).", + i, vsir_data_type_get_name(icb->data_type, "<unknown>"), icb->data_type); + + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "constant %s icb%u[%u] =", + icb->component_count == 4 ? "uint4" : "uint", icb->register_idx, icb->element_count); + + if (icb->is_null || data_type_is_64_bit(icb->data_type)) + { + vkd3d_string_buffer_printf(gen->buffer, " {};\n\n"); + continue; + } + + vkd3d_string_buffer_printf(gen->buffer, "\n"); + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "{\n"); + + if (icb->component_count == 4) + { + for (j = 0; j < icb->element_count; ++j) + { + msl_print_indent(gen->buffer, gen->indent + 1); + vkd3d_string_buffer_printf(gen->buffer, "{0x%08x, 0x%08x, 0x%08x, 0x%08x},\n", + icb->data[4 * j + 0], icb->data[4 * j + 1], icb->data[4 * j + 2], icb->data[4 * j + 3]); + } + } + else + { + for (j = 0; j < icb->element_count; ++j) + { + if (!(j & 3)) + msl_print_indent(gen->buffer, gen->indent + 1); + vkd3d_string_buffer_printf(gen->buffer, "0x%08x,%s", icb->data[j], + j == icb->element_count - 1 || (j & 3) == 3 ? "\n" : " "); + } + } + + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "};\n\n"); + } +} + static void msl_generate_entrypoint_prologue(struct msl_generator *gen) { const struct shader_signature *signature = &gen->program->input_signature; @@ -1951,6 +2069,30 @@ static void msl_generate_entrypoint_prologue(struct msl_generator *gen) msl_print_write_mask(buffer, e->mask); vkd3d_string_buffer_printf(buffer, ";\n"); } + + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_THREADID)) + { + msl_print_indent(gen->buffer, 1); + vkd3d_string_buffer_printf(buffer, "v_thread_id.u = uint4(thread_id, 0u);\n"); + } + + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_THREADGROUPID)) + { + msl_print_indent(gen->buffer, 1); + vkd3d_string_buffer_printf(buffer, "v_thread_group_id.u = uint4(thread_group_id, 0u);\n"); + } + + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_LOCALTHREADID)) + { + msl_print_indent(gen->buffer, 1); + vkd3d_string_buffer_printf(buffer, "v_local_thread_id.u = uint4(local_thread_id, 0u);\n"); + } + + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_LOCALTHREADINDEX)) + { + msl_print_indent(gen->buffer, 1); + vkd3d_string_buffer_printf(buffer, "v_local_thread_index.u = uint4(local_thread_index, 0u, 0u, 0u);\n"); + } }
static void msl_generate_entrypoint_epilogue(struct msl_generator *gen) @@ -1995,6 +2137,7 @@ static void msl_generate_entrypoint_epilogue(struct msl_generator *gen) static void msl_generate_entrypoint(struct msl_generator *gen) { enum vkd3d_shader_type type = gen->program->shader_version.type; + bool output = true;
switch (type) { @@ -2006,13 +2149,21 @@ static void msl_generate_entrypoint(struct msl_generator *gen) vkd3d_string_buffer_printf(gen->buffer, "[[early_fragment_tests]]\n"); vkd3d_string_buffer_printf(gen->buffer, "fragment "); break; + case VKD3D_SHADER_TYPE_COMPUTE: + vkd3d_string_buffer_printf(gen->buffer, "kernel "); + output = false; + break; default: msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, "Internal compiler error: Unhandled shader type %#x.", type); return; }
- vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_out shader_entry(\n", gen->prefix); + if (output) + vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_out ", gen->prefix); + else + vkd3d_string_buffer_printf(gen->buffer, "void "); + vkd3d_string_buffer_printf(gen->buffer, "shader_entry(\n");
if (gen->program->descriptors.descriptor_count) { @@ -2028,6 +2179,30 @@ static void msl_generate_entrypoint(struct msl_generator *gen) vkd3d_string_buffer_printf(gen->buffer, "uint vertex_id [[vertex_id]],\n"); }
+ if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_THREADID)) + { + msl_print_indent(gen->buffer, 2); + vkd3d_string_buffer_printf(gen->buffer, "uint3 thread_id [[thread_position_in_grid]],\n"); + } + + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_THREADGROUPID)) + { + msl_print_indent(gen->buffer, 2); + vkd3d_string_buffer_printf(gen->buffer, "uint3 thread_group_id [[threadgroup_position_in_grid]],\n"); + } + + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_LOCALTHREADID)) + { + msl_print_indent(gen->buffer, 2); + vkd3d_string_buffer_printf(gen->buffer, "uint3 local_thread_id [[thread_position_in_threadgroup]],\n"); + } + + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_LOCALTHREADINDEX)) + { + msl_print_indent(gen->buffer, 2); + vkd3d_string_buffer_printf(gen->buffer, "uint local_thread_index [[thread_index_in_threadgroup]],\n"); + } + msl_print_indent(gen->buffer, 2); vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_in input [[stage_in]])\n{\n", gen->prefix);
@@ -2037,6 +2212,14 @@ static void msl_generate_entrypoint(struct msl_generator *gen) vkd3d_string_buffer_printf(gen->buffer, " vkd3d_%s_out output;\n", gen->prefix); if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_SAMPLEMASK)) vkd3d_string_buffer_printf(gen->buffer, " vkd3d_scalar o_mask;\n"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_THREADID)) + vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 v_thread_id;\n"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_THREADGROUPID)) + vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 v_thread_group_id;\n"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_LOCALTHREADID)) + vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 v_local_thread_id;\n"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_LOCALTHREADINDEX)) + vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 v_local_thread_index;\n"); vkd3d_string_buffer_printf(gen->buffer, "\n");
msl_generate_entrypoint_prologue(gen); @@ -2048,13 +2231,23 @@ static void msl_generate_entrypoint(struct msl_generator *gen) vkd3d_string_buffer_printf(gen->buffer, ", output.shader_out_depth"); if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_SAMPLEMASK)) vkd3d_string_buffer_printf(gen->buffer, ", o_mask"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_THREADID)) + vkd3d_string_buffer_printf(gen->buffer, ", v_thread_id"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_THREADGROUPID)) + vkd3d_string_buffer_printf(gen->buffer, ", v_thread_group_id"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_LOCALTHREADID)) + vkd3d_string_buffer_printf(gen->buffer, ", v_local_thread_id"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_LOCALTHREADINDEX)) + vkd3d_string_buffer_printf(gen->buffer, ", v_local_thread_index"); if (gen->program->descriptors.descriptor_count) vkd3d_string_buffer_printf(gen->buffer, ", descriptors"); vkd3d_string_buffer_printf(gen->buffer, ");\n\n");
msl_generate_entrypoint_epilogue(gen);
- vkd3d_string_buffer_printf(gen->buffer, " return output;\n}\n"); + if (output) + vkd3d_string_buffer_printf(gen->buffer, " return output;\n"); + vkd3d_string_buffer_printf(gen->buffer, "}\n"); }
static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader_code *out) @@ -2065,8 +2258,7 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n");
vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); - vkd3d_string_buffer_printf(gen->buffer, "#include <metal_common>\n"); - vkd3d_string_buffer_printf(gen->buffer, "#include <metal_texture>\n\n"); + vkd3d_string_buffer_printf(gen->buffer, "#include <metal_stdlib>\n"); vkd3d_string_buffer_printf(gen->buffer, "using namespace metal;\n\n");
if (gen->program->global_flags & ~(VKD3DSGF_REFACTORING_ALLOWED | VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL)) @@ -2107,6 +2299,7 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader
msl_generate_input_struct_declarations(gen); msl_generate_output_struct_declarations(gen); + msl_generate_immediate_constant_buffers(gen);
vkd3d_string_buffer_printf(gen->buffer, "static void %s_main(thread vkd3d_vec4 *v, " @@ -2119,6 +2312,14 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader vkd3d_string_buffer_printf(gen->buffer, ", thread float &o_depth"); if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_SAMPLEMASK)) vkd3d_string_buffer_printf(gen->buffer, ", thread vkd3d_scalar &o_mask"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_THREADID)) + vkd3d_string_buffer_printf(gen->buffer, ", thread vkd3d_vec4 &v_thread_id"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_THREADGROUPID)) + vkd3d_string_buffer_printf(gen->buffer, ", thread vkd3d_vec4 &v_thread_group_id"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_LOCALTHREADID)) + vkd3d_string_buffer_printf(gen->buffer, ", thread vkd3d_vec4 &v_local_thread_id"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_LOCALTHREADINDEX)) + vkd3d_string_buffer_printf(gen->buffer, ", thread vkd3d_vec4 &v_local_thread_index"); if (gen->program->descriptors.descriptor_count) vkd3d_string_buffer_printf(gen->buffer, ", constant descriptor *descriptors"); vkd3d_string_buffer_printf(gen->buffer, ")\n{\n"); @@ -2201,6 +2402,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags,
VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); VKD3D_ASSERT(program->has_descriptor_info); + VKD3D_ASSERT(program->has_no_modifiers);
if ((ret = msl_generator_init(&generator, program, compile_info, message_context)) < 0) return ret; diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index a8c0db358bc..8913e57283a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -823,7 +823,6 @@ int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, static const struct vkd3d_shader_preprocess_info default_preprocess_info = {0}; struct preproc_ctx ctx = {0}; char *source_name = NULL; - void *output_code; unsigned int i;
vkd3d_string_buffer_init(&ctx.buffer); @@ -900,16 +899,9 @@ int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, return VKD3D_ERROR_INVALID_SHADER; }
- if (!(output_code = vkd3d_malloc(ctx.buffer.content_size))) - { - vkd3d_string_buffer_cleanup(&ctx.buffer); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - memcpy(output_code, ctx.buffer.buffer, ctx.buffer.content_size); - out->size = ctx.buffer.content_size; - out->code = output_code; vkd3d_string_buffer_trace(&ctx.buffer); - vkd3d_string_buffer_cleanup(&ctx.buffer); + + vkd3d_shader_code_from_string_buffer(out, &ctx.buffer); return VKD3D_OK;
fail: diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 97c0d0e73a8..0d260d63542 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -38,7 +38,7 @@ #define VKD3D_SPIRV_VERSION_1_0 0x00010000 #define VKD3D_SPIRV_VERSION_1_3 0x00010300 #define VKD3D_SPIRV_GENERATOR_ID 18 -#define VKD3D_SPIRV_GENERATOR_VERSION 17 +#define VKD3D_SPIRV_GENERATOR_VERSION 18 #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) #ifndef VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER # define VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER 0 @@ -989,7 +989,7 @@ struct vkd3d_spirv_builder uint32_t type_bool_id; uint32_t type_void_id; uint32_t scope_subgroup_id; - uint32_t numeric_type_ids[VKD3D_SHADER_COMPONENT_TYPE_COUNT][VKD3D_VEC4_SIZE]; + uint32_t numeric_type_ids[VSIR_DATA_TYPE_COUNT][VKD3D_VEC4_SIZE];
struct vkd3d_spirv_stream debug_stream; /* debug instructions */ struct vkd3d_spirv_stream annotation_stream; /* decoration instructions */ @@ -2116,20 +2116,6 @@ static uint32_t vkd3d_spirv_build_op_fdiv(struct vkd3d_spirv_builder *builder, SpvOpFDiv, result_type, operand0, operand1); }
-static uint32_t vkd3d_spirv_build_op_fnegate(struct vkd3d_spirv_builder *builder, - uint32_t result_type, uint32_t operand) -{ - return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, - SpvOpFNegate, result_type, operand); -} - -static uint32_t vkd3d_spirv_build_op_snegate(struct vkd3d_spirv_builder *builder, - uint32_t result_type, uint32_t operand) -{ - return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, - SpvOpSNegate, result_type, operand); -} - static uint32_t vkd3d_spirv_build_op_and(struct vkd3d_spirv_builder *builder, uint32_t result_type, uint32_t operand0, uint32_t operand1) { @@ -2478,19 +2464,6 @@ static uint32_t vkd3d_spirv_build_op_group_nonuniform_broadcast_first(struct vkd result_type, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); }
-static uint32_t vkd3d_spirv_build_op_glsl_std450_tr1(struct vkd3d_spirv_builder *builder, - enum GLSLstd450 op, uint32_t result_type, uint32_t operand) -{ - uint32_t id = vkd3d_spirv_get_glsl_std450_instr_set(builder); - return vkd3d_spirv_build_op_ext_inst(builder, result_type, id, op, &operand, 1); -} - -static uint32_t vkd3d_spirv_build_op_glsl_std450_fabs(struct vkd3d_spirv_builder *builder, - uint32_t result_type, uint32_t operand) -{ - return vkd3d_spirv_build_op_glsl_std450_tr1(builder, GLSLstd450FAbs, result_type, operand); -} - static uint32_t vkd3d_spirv_build_op_glsl_std450_max(struct vkd3d_spirv_builder *builder, uint32_t result_type, uint32_t x, uint32_t y) { @@ -2518,70 +2491,6 @@ static uint32_t vkd3d_spirv_build_op_glsl_std450_nclamp(struct vkd3d_spirv_build GLSLstd450NClamp, operands, ARRAY_SIZE(operands)); }
-static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, - enum vkd3d_shader_component_type component_type, unsigned int component_count) -{ - uint32_t scalar_id, type_id; - - VKD3D_ASSERT(component_type < VKD3D_SHADER_COMPONENT_TYPE_COUNT); - if (!component_count || component_count > VKD3D_VEC4_SIZE) - { - ERR("Invalid component count %u.\n", component_count); - return 0; - } - - if ((type_id = builder->numeric_type_ids[component_type][component_count - 1])) - return type_id; - - if (component_count == 1) - { - switch (component_type) - { - case VKD3D_SHADER_COMPONENT_VOID: - type_id = vkd3d_spirv_get_op_type_void(builder); - break; - case VKD3D_SHADER_COMPONENT_FLOAT: - type_id = vkd3d_spirv_get_op_type_float(builder, 32); - break; - case VKD3D_SHADER_COMPONENT_INT: - case VKD3D_SHADER_COMPONENT_UINT: - type_id = vkd3d_spirv_get_op_type_int(builder, 32, component_type == VKD3D_SHADER_COMPONENT_INT); - break; - case VKD3D_SHADER_COMPONENT_BOOL: - type_id = vkd3d_spirv_get_op_type_bool(builder); - break; - case VKD3D_SHADER_COMPONENT_DOUBLE: - type_id = vkd3d_spirv_get_op_type_float(builder, 64); - break; - case VKD3D_SHADER_COMPONENT_UINT64: - type_id = vkd3d_spirv_get_op_type_int(builder, 64, 0); - break; - default: - FIXME("Unhandled component type %#x.\n", component_type); - return 0; - } - } - else - { - VKD3D_ASSERT(component_type != VKD3D_SHADER_COMPONENT_VOID); - scalar_id = vkd3d_spirv_get_type_id(builder, component_type, 1); - type_id = vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); - } - - builder->numeric_type_ids[component_type][component_count - 1] = type_id; - - return type_id; -} - -static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder *builder, - enum vsir_data_type data_type, unsigned int component_count) -{ - enum vkd3d_shader_component_type component_type; - - component_type = vkd3d_component_type_from_data_type(data_type); - return vkd3d_spirv_get_type_id(builder, component_type, component_count); -} - static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, const char *entry_point, const char *source_name) { @@ -2794,7 +2703,7 @@ struct vkd3d_symbol_register_data { SpvStorageClass storage_class; uint32_t member_idx; - enum vkd3d_shader_component_type component_type; + enum vsir_data_type data_type; unsigned int write_mask; unsigned int structure_stride; unsigned int binding_base_idx; @@ -2804,7 +2713,7 @@ struct vkd3d_symbol_register_data struct vkd3d_symbol_resource_data { struct vkd3d_shader_register_range range; - enum vkd3d_shader_component_type sampled_type; + enum vsir_data_type sampled_type; uint32_t type_id; const struct vkd3d_spirv_resource_type *resource_type_info; unsigned int structure_stride; @@ -2919,15 +2828,14 @@ static void vkd3d_symbol_make_io(struct vkd3d_symbol *symbol, symbol->key.reg.idx = index; }
-static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, - uint32_t val_id, SpvStorageClass storage_class, - enum vkd3d_shader_component_type component_type, uint32_t write_mask) +static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, uint32_t val_id, + SpvStorageClass storage_class, enum vsir_data_type data_type, uint32_t write_mask) { symbol->id = val_id; symbol->descriptor_array = NULL; symbol->info.reg.storage_class = storage_class; symbol->info.reg.member_idx = 0; - symbol->info.reg.component_type = component_type; + symbol->info.reg.data_type = data_type; symbol->info.reg.write_mask = write_mask; symbol->info.reg.structure_stride = 0; symbol->info.reg.binding_base_idx = 0; @@ -3063,7 +2971,7 @@ struct spirv_compiler struct vkd3d_shader_output_info { uint32_t id; - enum vkd3d_shader_component_type component_type; + enum vsir_data_type data_type; uint32_t array_element_mask; } *output_info; uint32_t private_output_variable[MAX_REG_OUTPUT + 1]; /* 1 entry for oDepth */ @@ -3253,6 +3161,14 @@ static struct spirv_compiler *spirv_compiler_create(struct vsir_program *program compiler->features = option->value; break;
+ case VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER: + case VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY: + case VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT: + case VKD3D_SHADER_COMPILE_OPTION_WARN_IMPLICIT_TRUNCATION: + case VKD3D_SHADER_COMPILE_OPTION_INCLUDE_EMPTY_BUFFERS_IN_EFFECTS: + /* Explicitly ignored for this target. */ + break; + default: WARN("Ignoring unrecognised option %#x with value %#x.\n", option->name, option->value); break; @@ -3491,6 +3407,74 @@ static struct vkd3d_string_buffer *vkd3d_shader_register_range_string(struct spi return buffer; }
+static uint32_t spirv_get_type_id(struct spirv_compiler *compiler, + enum vsir_data_type data_type, unsigned int component_count) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t scalar_id, type_id; + + VKD3D_ASSERT(data_type < VSIR_DATA_TYPE_COUNT); + if (!component_count || component_count > VKD3D_VEC4_SIZE) + { + ERR("Invalid component count %u.\n", component_count); + return 0; + } + + if ((type_id = builder->numeric_type_ids[data_type][component_count - 1])) + return type_id; + + if (component_count == 1) + { + switch (data_type) + { + case VSIR_DATA_BOOL: + type_id = vkd3d_spirv_get_op_type_bool(builder); + break; + + case VSIR_DATA_F32: + case VSIR_DATA_SNORM: + case VSIR_DATA_UNORM: + type_id = vkd3d_spirv_get_op_type_float(builder, 32); + break; + + case VSIR_DATA_F64: + type_id = vkd3d_spirv_get_op_type_float(builder, 64); + break; + + case VSIR_DATA_I32: + case VSIR_DATA_U32: + case VSIR_DATA_MIXED: + type_id = vkd3d_spirv_get_op_type_int(builder, 32, data_type == VSIR_DATA_I32); + break; + + case VSIR_DATA_I64: + case VSIR_DATA_U64: + type_id = vkd3d_spirv_get_op_type_int(builder, 64, data_type == VSIR_DATA_I64); + break; + + case VSIR_DATA_UNUSED: + type_id = vkd3d_spirv_get_op_type_void(builder); + break; + + default: + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "Unhandled data type "%s" (%#x).", + vsir_data_type_get_name(data_type, "<unknown>"), data_type); + return 0; + } + } + else + { + VKD3D_ASSERT(data_type != VSIR_DATA_UNUSED); + scalar_id = spirv_get_type_id(compiler, data_type, 1); + type_id = vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); + } + + builder->numeric_type_ids[data_type][component_count - 1] = type_id; + + return type_id; +} + static uint32_t spirv_compiler_get_label_id(struct spirv_compiler *compiler, unsigned int block_id) { --block_id; @@ -3649,132 +3633,134 @@ static const struct vkd3d_symbol *spirv_compiler_put_symbol(struct spirv_compile }
static uint32_t spirv_compiler_get_constant(struct spirv_compiler *compiler, - enum vkd3d_shader_component_type component_type, unsigned int component_count, const uint32_t *values) + enum vsir_data_type data_type, unsigned int component_count, const uint32_t *values) { uint32_t type_id, scalar_type_id, component_ids[VKD3D_VEC4_SIZE]; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; unsigned int i;
VKD3D_ASSERT(0 < component_count && component_count <= VKD3D_VEC4_SIZE); - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + type_id = spirv_get_type_id(compiler, data_type, component_count);
- switch (component_type) + switch (data_type) { - case VKD3D_SHADER_COMPONENT_UINT: - case VKD3D_SHADER_COMPONENT_INT: - case VKD3D_SHADER_COMPONENT_FLOAT: - break; - case VKD3D_SHADER_COMPONENT_BOOL: + case VSIR_DATA_BOOL: if (component_count == 1) return vkd3d_spirv_get_op_constant_bool(builder, type_id, *values); - FIXME("Unsupported vector of bool.\n"); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_TYPE, "Vectors of bool type are not supported."); return vkd3d_spirv_get_op_undef(builder, type_id); + + case VSIR_DATA_F16: + case VSIR_DATA_F32: + case VSIR_DATA_I32: + case VSIR_DATA_U16: + case VSIR_DATA_U32: + case VSIR_DATA_SNORM: + case VSIR_DATA_UNORM: + break; + default: spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_TYPE, - "Unhandled component_type %#x.", component_type); + "Unhandled data type "%s" (%#x).", + vsir_data_type_get_name(data_type, "<unknown>"), data_type); return vkd3d_spirv_get_op_undef(builder, type_id); }
if (component_count == 1) - { return vkd3d_spirv_get_op_constant(builder, type_id, *values); - } - else + + scalar_type_id = spirv_get_type_id(compiler, data_type, 1); + for (i = 0; i < component_count; ++i) { - scalar_type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); - for (i = 0; i < component_count; ++i) - component_ids[i] = vkd3d_spirv_get_op_constant(builder, scalar_type_id, values[i]); - return vkd3d_spirv_get_op_constant_composite(builder, type_id, component_ids, component_count); + component_ids[i] = vkd3d_spirv_get_op_constant(builder, scalar_type_id, values[i]); } + + return vkd3d_spirv_get_op_constant_composite(builder, type_id, component_ids, component_count); }
static uint32_t spirv_compiler_get_constant64(struct spirv_compiler *compiler, - enum vkd3d_shader_component_type component_type, unsigned int component_count, const uint64_t *values) + enum vsir_data_type data_type, unsigned int component_count, const uint64_t *values) { uint32_t type_id, scalar_type_id, component_ids[VKD3D_DVEC2_SIZE]; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; unsigned int i;
VKD3D_ASSERT(0 < component_count && component_count <= VKD3D_DVEC2_SIZE); - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + type_id = spirv_get_type_id(compiler, data_type, component_count);
- if (component_type != VKD3D_SHADER_COMPONENT_DOUBLE && component_type != VKD3D_SHADER_COMPONENT_UINT64) + if (data_type != VSIR_DATA_F64 && data_type != VSIR_DATA_I64 && data_type != VSIR_DATA_U64) { - FIXME("Unhandled component_type %#x.\n", component_type); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_TYPE, + "Unhandled data type "%s" (%#x).", + vsir_data_type_get_name(data_type, "<unknown>"), data_type); return vkd3d_spirv_get_op_undef(builder, type_id); }
if (component_count == 1) - { return vkd3d_spirv_get_op_constant64(builder, type_id, *values); - } - else + + scalar_type_id = spirv_get_type_id(compiler, data_type, 1); + for (i = 0; i < component_count; ++i) { - scalar_type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); - for (i = 0; i < component_count; ++i) - component_ids[i] = vkd3d_spirv_get_op_constant64(builder, scalar_type_id, values[i]); - return vkd3d_spirv_get_op_constant_composite(builder, type_id, component_ids, component_count); + component_ids[i] = vkd3d_spirv_get_op_constant64(builder, scalar_type_id, values[i]); } + + return vkd3d_spirv_get_op_constant_composite(builder, type_id, component_ids, component_count); }
-static uint32_t spirv_compiler_get_constant_uint(struct spirv_compiler *compiler, - uint32_t value) +static uint32_t spirv_compiler_get_constant_uint(struct spirv_compiler *compiler, uint32_t value) { - return spirv_compiler_get_constant(compiler, VKD3D_SHADER_COMPONENT_UINT, 1, &value); + return spirv_compiler_get_constant(compiler, VSIR_DATA_U32, 1, &value); }
-static uint32_t spirv_compiler_get_constant_float(struct spirv_compiler *compiler, - float value) +static uint32_t spirv_compiler_get_constant_float(struct spirv_compiler *compiler, float value) { - return spirv_compiler_get_constant(compiler, VKD3D_SHADER_COMPONENT_FLOAT, 1, (uint32_t *)&value); + return spirv_compiler_get_constant(compiler, VSIR_DATA_F32, 1, (uint32_t *)&value); }
static uint32_t spirv_compiler_get_constant_vector(struct spirv_compiler *compiler, - enum vkd3d_shader_component_type component_type, unsigned int component_count, uint32_t value) + enum vsir_data_type data_type, unsigned int component_count, uint32_t value) { const uint32_t values[] = {value, value, value, value}; - return spirv_compiler_get_constant(compiler, component_type, component_count, values); + + return spirv_compiler_get_constant(compiler, data_type, component_count, values); }
static uint32_t spirv_compiler_get_constant_uint_vector(struct spirv_compiler *compiler, uint32_t value, unsigned int component_count) { - return spirv_compiler_get_constant_vector(compiler, VKD3D_SHADER_COMPONENT_UINT, component_count, value); + return spirv_compiler_get_constant_vector(compiler, VSIR_DATA_U32, component_count, value); }
static uint32_t spirv_compiler_get_constant_float_vector(struct spirv_compiler *compiler, float value, unsigned int component_count) { const float values[] = {value, value, value, value}; - return spirv_compiler_get_constant(compiler, VKD3D_SHADER_COMPONENT_FLOAT, - component_count, (const uint32_t *)values); + + return spirv_compiler_get_constant(compiler, VSIR_DATA_F32, component_count, (const uint32_t *)values); }
static uint32_t spirv_compiler_get_constant_double_vector(struct spirv_compiler *compiler, double value, unsigned int component_count) { const double values[] = {value, value}; - return spirv_compiler_get_constant64(compiler, VKD3D_SHADER_COMPONENT_DOUBLE, - component_count, (const uint64_t *)values); + + return spirv_compiler_get_constant64(compiler, VSIR_DATA_F64, component_count, (const uint64_t *)values); }
static uint32_t spirv_compiler_get_constant_uint64_vector(struct spirv_compiler *compiler, uint64_t value, unsigned int component_count) { const uint64_t values[] = {value, value}; - return spirv_compiler_get_constant64(compiler, VKD3D_SHADER_COMPONENT_UINT64, component_count, values); + + return spirv_compiler_get_constant64(compiler, VSIR_DATA_U64, component_count, values); }
static uint32_t spirv_compiler_get_type_id_for_reg(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, uint32_t write_mask) { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - - return vkd3d_spirv_get_type_id(builder, - vkd3d_component_type_from_data_type(reg->data_type), - vsir_write_mask_component_count(write_mask)); + return spirv_get_type_id(compiler, reg->data_type, vsir_write_mask_component_count(write_mask)); }
static uint32_t spirv_compiler_get_type_id_for_dst(struct spirv_compiler *compiler, @@ -3893,15 +3879,14 @@ static void spirv_compiler_emit_register_debug_name(struct vkd3d_spirv_builder * }
static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compiler, - struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, - enum vkd3d_shader_component_type component_type, unsigned int component_count, - const unsigned int *array_lengths, unsigned int length_count) + struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, enum vsir_data_type data_type, + unsigned int component_count, const unsigned int *array_lengths, unsigned int length_count) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t type_id, length_id, ptr_type_id; unsigned int i;
- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + type_id = spirv_get_type_id(compiler, data_type, component_count); for (i = 0; i < length_count; ++i) { if (!array_lengths[i]) @@ -3909,17 +3894,16 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil length_id = spirv_compiler_get_constant_uint(compiler, array_lengths[i]); type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); } - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); + return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); }
-static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, - struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, - enum vkd3d_shader_component_type component_type, unsigned int component_count) +static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, struct vkd3d_spirv_stream *stream, + SpvStorageClass storage_class, enum vsir_data_type data_type, unsigned int component_count) { - return spirv_compiler_emit_array_variable(compiler, stream, storage_class, - component_type, component_count, NULL, 0); + return spirv_compiler_emit_array_variable(compiler, stream, + storage_class, data_type, component_count, NULL, 0); }
static const struct vkd3d_spec_constant_info @@ -4000,8 +3984,8 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile info = get_spec_constant_info(name); default_value = info ? info->default_value.u : 0;
- scalar_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); - vector_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), component_count); + scalar_type_id = spirv_get_type_id(compiler, type, 1); + vector_type_id = spirv_get_type_id(compiler, type, component_count);
for (unsigned int i = 0; i < component_count; ++i) { @@ -4050,7 +4034,7 @@ static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compi unsigned int index = parameter - compiler->program->parameters; uint32_t type_id, ptr_id, ptr_type_id;
- type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), component_count); + type_id = spirv_get_type_id(compiler, type, component_count); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, compiler->spirv_parameter_info[index].buffer_id, @@ -4087,8 +4071,8 @@ static uint32_t spirv_compiler_emit_shader_parameter(struct spirv_compiler *comp type, component_count, name, parameter->data_type);
if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) - return spirv_compiler_get_constant(compiler, vkd3d_component_type_from_data_type(type), - component_count, (const uint32_t *)¶meter->u.immediate_constant); + return spirv_compiler_get_constant(compiler, type, component_count, + (const uint32_t *)¶meter->u.immediate_constant);
if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) return spirv_compiler_get_spec_constant(compiler, name, @@ -4104,32 +4088,32 @@ default_parameter: }
static uint32_t spirv_compiler_emit_construct_vector(struct spirv_compiler *compiler, - enum vkd3d_shader_component_type component_type, unsigned int component_count, - uint32_t val_id, unsigned int val_component_idx, unsigned int val_component_count) + enum vsir_data_type data_type, unsigned int component_count, uint32_t val_id, + unsigned int val_component_idx, unsigned int val_component_count) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t components[VKD3D_VEC4_SIZE]; - uint32_t type_id, result_id; + uint32_t type_id; unsigned int i;
VKD3D_ASSERT(val_component_idx < val_component_count);
- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + type_id = spirv_get_type_id(compiler, data_type, component_count); if (val_component_count == 1) { for (i = 0; i < component_count; ++i) + { components[i] = val_id; - result_id = vkd3d_spirv_build_op_composite_construct(builder, - type_id, components, component_count); + } + return vkd3d_spirv_build_op_composite_construct(builder, type_id, components, component_count); } - else + + for (i = 0; i < component_count; ++i) { - for (i = 0; i < component_count; ++i) - components[i] = val_component_idx; - result_id = vkd3d_spirv_build_op_vector_shuffle(builder, - type_id, val_id, val_id, components, component_count); + components[i] = val_component_idx; } - return result_id; + + return vkd3d_spirv_build_op_vector_shuffle(builder, type_id, val_id, val_id, components, component_count); }
static uint32_t spirv_compiler_emit_load_src(struct spirv_compiler *compiler, @@ -4147,10 +4131,11 @@ static uint32_t spirv_compiler_emit_register_addressing(struct spirv_compiler *c addr_id = spirv_compiler_emit_load_src(compiler, reg_index->rel_addr, VKD3DSP_WRITEMASK_0); if (reg_index->offset) { - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - addr_id = vkd3d_spirv_build_op_iadd(builder, type_id, - addr_id, spirv_compiler_get_constant_uint(compiler, reg_index->offset)); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); + addr_id = vkd3d_spirv_build_op_iadd(builder, type_id, addr_id, + spirv_compiler_get_constant_uint(compiler, reg_index->offset)); } + return addr_id; }
@@ -4159,7 +4144,7 @@ struct vkd3d_shader_register_info uint32_t id; const struct vkd3d_symbol *descriptor_array; SpvStorageClass storage_class; - enum vkd3d_shader_component_type component_type; + enum vsir_data_type data_type; unsigned int write_mask; uint32_t member_idx; unsigned int structure_stride; @@ -4182,7 +4167,7 @@ static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, register_info->storage_class = SpvStorageClassPrivate; register_info->descriptor_array = NULL; register_info->member_idx = 0; - register_info->component_type = VKD3D_SHADER_COMPONENT_FLOAT; + register_info->data_type = VSIR_DATA_F32; register_info->write_mask = VKD3DSP_WRITEMASK_ALL; register_info->structure_stride = 0; register_info->binding_base_idx = 0; @@ -4203,7 +4188,7 @@ static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, memset(register_info, 0, sizeof(*register_info)); register_info->id = ssa->id; register_info->storage_class = SpvStorageClassMax; - register_info->component_type = vkd3d_component_type_from_data_type(ssa->data_type); + register_info->data_type = ssa->data_type; register_info->write_mask = ssa->write_mask; return true; } @@ -4222,7 +4207,7 @@ static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, register_info->descriptor_array = symbol->descriptor_array; register_info->storage_class = symbol->info.reg.storage_class; register_info->member_idx = symbol->info.reg.member_idx; - register_info->component_type = symbol->info.reg.component_type; + register_info->data_type = symbol->info.reg.data_type; register_info->write_mask = symbol->info.reg.write_mask; register_info->structure_stride = symbol->info.reg.structure_stride; register_info->binding_base_idx = symbol->info.reg.binding_base_idx; @@ -4296,7 +4281,7 @@ static uint32_t spirv_compiler_get_descriptor_index(struct spirv_compiler *compi struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t type_id, ptr_type_id, ptr_id, offset_id, index_ids[2];
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); if (!(offset_id = compiler->descriptor_offset_ids[push_constant_index])) { index_ids[0] = compiler->descriptor_offsets_member_id; @@ -4369,7 +4354,7 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp if (index_count) { component_count = vsir_write_mask_component_count(register_info->write_mask); - type_id = vkd3d_spirv_get_type_id(builder, register_info->component_type, component_count); + type_id = spirv_get_type_id(compiler, register_info->data_type, component_count); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, register_info->storage_class, type_id); register_info->id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id, register_info->id, indexes, index_count); @@ -4391,7 +4376,7 @@ static uint32_t spirv_compiler_get_register_id(struct spirv_compiler *compiler, }
return spirv_compiler_emit_variable(compiler, &builder->global_stream, - SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + SpvStorageClassPrivate, VSIR_DATA_F32, VKD3D_VEC4_SIZE); }
static bool vkd3d_swizzle_is_equal(uint32_t dst_write_mask, uint32_t swizzle, uint32_t write_mask) @@ -4413,9 +4398,8 @@ static bool vkd3d_swizzle_is_scalar(uint32_t swizzle, const struct vkd3d_shader_ && vsir_swizzle_get_component(swizzle, 3) == component_idx; }
-static uint32_t spirv_compiler_emit_swizzle(struct spirv_compiler *compiler, - uint32_t val_id, uint32_t val_write_mask, enum vkd3d_shader_component_type component_type, - uint32_t swizzle, uint32_t write_mask) +static uint32_t spirv_compiler_emit_swizzle(struct spirv_compiler *compiler, uint32_t val_id, + uint32_t val_write_mask, enum vsir_data_type data_type, uint32_t swizzle, uint32_t write_mask) { unsigned int i, component_idx, component_count, val_component_count; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -4428,7 +4412,7 @@ static uint32_t spirv_compiler_emit_swizzle(struct spirv_compiler *compiler, && (component_count == 1 || vkd3d_swizzle_is_equal(val_write_mask, swizzle, write_mask))) return val_id;
- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + type_id = spirv_get_type_id(compiler, data_type, component_count);
if (component_count == 1) { @@ -4456,13 +4440,14 @@ static uint32_t spirv_compiler_emit_swizzle(struct spirv_compiler *compiler, if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) components[component_idx++] = vsir_swizzle_get_component(swizzle, i); } - return vkd3d_spirv_build_op_vector_shuffle(builder, - type_id, val_id, val_id, components, component_count); + + return vkd3d_spirv_build_op_vector_shuffle(builder, type_id, + val_id, val_id, components, component_count); }
static uint32_t spirv_compiler_emit_vector_shuffle(struct spirv_compiler *compiler, uint32_t vector1_id, uint32_t vector2_id, uint32_t swizzle, uint32_t write_mask, - enum vkd3d_shader_component_type component_type, unsigned int component_count) + enum vsir_data_type data_type, unsigned int component_count) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t components[VKD3D_VEC4_SIZE]; @@ -4478,10 +4463,10 @@ static uint32_t spirv_compiler_emit_vector_shuffle(struct spirv_compiler *compil else components[i] = VKD3D_VEC4_SIZE + vsir_swizzle_get_component(swizzle, i); } + type_id = spirv_get_type_id(compiler, data_type, component_count);
- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - return vkd3d_spirv_build_op_vector_shuffle(builder, - type_id, vector1_id, vector2_id, components, component_count); + return vkd3d_spirv_build_op_vector_shuffle(builder, type_id, + vector1_id, vector2_id, components, component_count); }
static uint32_t spirv_compiler_emit_int_to_bool(struct spirv_compiler *compiler, @@ -4494,10 +4479,11 @@ static uint32_t spirv_compiler_emit_int_to_bool(struct spirv_compiler *compiler,
VKD3D_ASSERT(!(condition & ~(VKD3D_SHADER_CONDITIONAL_OP_NZ | VKD3D_SHADER_CONDITIONAL_OP_Z)));
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); + type_id = spirv_get_type_id(compiler, VSIR_DATA_BOOL, component_count); op = condition & VKD3D_SHADER_CONDITIONAL_OP_Z ? SpvOpIEqual : SpvOpINotEqual; + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, type_id, val_id, - data_type == VSIR_DATA_U64 + data_type_is_64_bit(data_type) ? spirv_compiler_get_constant_uint64_vector(compiler, 0, component_count) : spirv_compiler_get_constant_uint_vector(compiler, 0, component_count)); } @@ -4510,7 +4496,8 @@ static uint32_t spirv_compiler_emit_bool_to_int(struct spirv_compiler *compiler,
true_id = spirv_compiler_get_constant_uint_vector(compiler, signedness ? 0xffffffff : 1, component_count); false_id = spirv_compiler_get_constant_uint_vector(compiler, 0, component_count); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, component_count); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, component_count); + return vkd3d_spirv_build_op_select(builder, type_id, val_id, true_id, false_id); }
@@ -4523,7 +4510,8 @@ static uint32_t spirv_compiler_emit_bool_to_int64(struct spirv_compiler *compile true_id = spirv_compiler_get_constant_uint64_vector(compiler, signedness ? UINT64_MAX : 1, component_count); false_id = spirv_compiler_get_constant_uint64_vector(compiler, 0, component_count); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT64, component_count); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U64, component_count); + return vkd3d_spirv_build_op_select(builder, type_id, val_id, true_id, false_id); }
@@ -4535,7 +4523,8 @@ static uint32_t spirv_compiler_emit_bool_to_float(struct spirv_compiler *compile
true_id = spirv_compiler_get_constant_float_vector(compiler, signedness ? -1.0f : 1.0f, component_count); false_id = spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, component_count); + type_id = spirv_get_type_id(compiler, VSIR_DATA_F32, component_count); + return vkd3d_spirv_build_op_select(builder, type_id, val_id, true_id, false_id); }
@@ -4547,7 +4536,8 @@ static uint32_t spirv_compiler_emit_bool_to_double(struct spirv_compiler *compil
true_id = spirv_compiler_get_constant_double_vector(compiler, signedness ? -1.0 : 1.0, component_count); false_id = spirv_compiler_get_constant_double_vector(compiler, 0.0, component_count); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_DOUBLE, component_count); + type_id = spirv_get_type_id(compiler, VSIR_DATA_F64, component_count); + return vkd3d_spirv_build_op_select(builder, type_id, val_id, true_id, false_id); }
@@ -4574,8 +4564,7 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile } }
- return spirv_compiler_get_constant(compiler, - vkd3d_component_type_from_data_type(reg->data_type), component_count, values); + return spirv_compiler_get_constant(compiler, reg->data_type, component_count, values); }
static uint32_t spirv_compiler_emit_load_constant64(struct spirv_compiler *compiler, @@ -4601,8 +4590,7 @@ static uint32_t spirv_compiler_emit_load_constant64(struct spirv_compiler *compi } }
- return spirv_compiler_get_constant64(compiler, - vkd3d_component_type_from_data_type(reg->data_type), component_count, values); + return spirv_compiler_get_constant64(compiler, reg->data_type, component_count, values); }
static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler, @@ -4614,18 +4602,18 @@ static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler,
VKD3D_ASSERT(reg->type == VKD3DSPR_UNDEF);
- type_id = vkd3d_spirv_get_type_id_for_data_type(builder, reg->data_type, component_count); + type_id = spirv_get_type_id(compiler, reg->data_type, component_count); + return vkd3d_spirv_get_op_undef(builder, type_id); }
static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t swizzle, uint32_t write_mask, - const struct vkd3d_shader_register_info *reg_info) + const struct vkd3d_shader_register *reg, uint32_t swizzle, + uint32_t write_mask, const struct vkd3d_shader_register_info *reg_info) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t type_id, ptr_type_id, index, reg_id, val_id; unsigned int component_idx, reg_component_count; - enum vkd3d_shader_component_type component_type; uint32_t skipped_component_mask;
VKD3D_ASSERT(!register_is_constant_or_undef(reg)); @@ -4636,7 +4624,6 @@ static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, skipped_component_mask = ~reg_info->write_mask & ((VKD3DSP_WRITEMASK_0 << component_idx) - 1); if (skipped_component_mask) component_idx -= vsir_write_mask_component_count(skipped_component_mask); - component_type = vkd3d_component_type_from_data_type(reg->data_type);
reg_component_count = vsir_write_mask_component_count(reg_info->write_mask);
@@ -4646,7 +4633,7 @@ static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, component_idx, reg->type, reg->idx[0].offset, reg_info->write_mask); }
- type_id = vkd3d_spirv_get_type_id(builder, reg_info->component_type, 1); + type_id = spirv_get_type_id(compiler, reg_info->data_type, 1); reg_id = reg_info->id; if (reg_component_count != 1) { @@ -4656,27 +4643,22 @@ static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, }
val_id = vkd3d_spirv_build_op_load(builder, type_id, reg_id, SpvMemoryAccessMaskNone); + if (reg->data_type == reg_info->data_type) + return val_id;
- if (component_type != reg_info->component_type) + if (reg->data_type != VSIR_DATA_BOOL) { - if (component_type == VKD3D_SHADER_COMPONENT_BOOL) - { - if (reg_info->component_type != VKD3D_SHADER_COMPONENT_UINT) - { - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); - } - val_id = spirv_compiler_emit_int_to_bool(compiler, - VKD3D_SHADER_CONDITIONAL_OP_NZ, VSIR_DATA_U32, 1, val_id); - } - else - { - type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); - val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); - } + type_id = spirv_get_type_id(compiler, reg->data_type, 1); + return vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); }
- return val_id; + if (reg_info->data_type != VSIR_DATA_U32) + { + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); + val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + } + + return spirv_compiler_emit_int_to_bool(compiler, VKD3D_SHADER_CONDITIONAL_OP_NZ, VSIR_DATA_U32, 1, val_id); }
static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compiler, @@ -4684,14 +4666,12 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil { uint32_t *elements, elem_type_id, length_id, type_id, const_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - enum vkd3d_shader_component_type component_type; unsigned int i, element_count, component_count;
element_count = icb->element_count;
- component_type = vkd3d_component_type_from_data_type(icb->data_type); component_count = icb->component_count; - elem_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, icb->data_type, component_count); + elem_type_id = spirv_get_type_id(compiler, icb->data_type, component_count); length_id = spirv_compiler_get_constant_uint(compiler, element_count); type_id = vkd3d_spirv_get_op_type_array(builder, elem_type_id, length_id);
@@ -4706,7 +4686,6 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil
if (!(elements = vkd3d_calloc(element_count, sizeof(*elements)))) { - ERR("Failed to allocate %u elements.", element_count); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_OUT_OF_MEMORY, "Failed to allocate %u constant array elements.", element_count); return 0; @@ -4718,22 +4697,27 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil case VSIR_DATA_I32: case VSIR_DATA_U32: for (i = 0; i < element_count; ++i) - elements[i] = spirv_compiler_get_constant(compiler, component_type, component_count, - &icb->data[component_count * i]); + { + elements[i] = spirv_compiler_get_constant(compiler, icb->data_type, + component_count, &icb->data[component_count * i]); + } break; + case VSIR_DATA_F64: + case VSIR_DATA_I64: case VSIR_DATA_U64: { uint64_t *data = (uint64_t *)icb->data; for (i = 0; i < element_count; ++i) - elements[i] = spirv_compiler_get_constant64(compiler, component_type, component_count, - &data[component_count * i]); + elements[i] = spirv_compiler_get_constant64(compiler, icb->data_type, + component_count, &data[component_count * i]); break; } + default: - FIXME("Unhandled data type %u.\n", icb->data_type); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_TYPE, - "Immediate constant buffer data type %u is unhandled.", icb->data_type); + "Immediate constant buffer data type "%s" (%#x) is unhandled.", + vsir_data_type_get_name(icb->data_type, "<unknown>"), icb->data_type); break; }
@@ -4756,7 +4740,6 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, uint32_t swizzle, uint32_t write_mask) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - enum vkd3d_shader_component_type component_type; struct vkd3d_shader_register_info reg_info; unsigned int component_count; uint32_t type_id, val_id; @@ -4769,20 +4752,25 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, else if (reg->type == VKD3DSPR_UNDEF) return spirv_compiler_emit_load_undef(compiler, reg, write_mask); else if (reg->type == VKD3DSPR_PARAMETER) - return spirv_compiler_emit_shader_parameter(compiler, reg->idx[0].offset, + { + val_id = spirv_compiler_emit_shader_parameter(compiler, reg->idx[0].offset, reg->data_type, reg->dimension == VSIR_DIMENSION_VEC4 ? 4 : 1); + if (reg->dimension != VSIR_DIMENSION_VEC4) + return val_id; + return spirv_compiler_emit_swizzle(compiler, val_id, + VKD3DSP_WRITEMASK_ALL, reg->data_type, swizzle, write_mask); + }
component_count = vsir_write_mask_component_count(write_mask); - component_type = vkd3d_component_type_from_data_type(reg->data_type);
if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) { - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + type_id = spirv_get_type_id(compiler, reg->data_type, component_count); return vkd3d_spirv_get_op_undef(builder, type_id); } spirv_compiler_emit_dereference_register(compiler, reg, ®_info);
- val_write_mask = (data_type_is_64_bit(reg->data_type) && !component_type_is_64_bit(reg_info.component_type)) + val_write_mask = (data_type_is_64_bit(reg->data_type) && !data_type_is_64_bit(reg_info.data_type)) ? vsir_write_mask_32_from_64(write_mask) : write_mask;
/* Intermediate value (no storage class). */ @@ -4796,35 +4784,31 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, } else { - type_id = vkd3d_spirv_get_type_id(builder, - reg_info.component_type, vsir_write_mask_component_count(reg_info.write_mask)); + type_id = spirv_get_type_id(compiler, reg_info.data_type, + vsir_write_mask_component_count(reg_info.write_mask)); val_id = vkd3d_spirv_build_op_load(builder, type_id, reg_info.id, SpvMemoryAccessMaskNone); swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; }
- val_id = spirv_compiler_emit_swizzle(compiler, - val_id, reg_info.write_mask, reg_info.component_type, swizzle, val_write_mask); + val_id = spirv_compiler_emit_swizzle(compiler, val_id, + reg_info.write_mask, reg_info.data_type, swizzle, val_write_mask); + if (reg->data_type == reg_info.data_type) + return val_id;
- if (component_type != reg_info.component_type) + if (reg->data_type != VSIR_DATA_BOOL) { - if (component_type == VKD3D_SHADER_COMPONENT_BOOL) - { - if (reg_info.component_type != VKD3D_SHADER_COMPONENT_UINT) - { - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, component_count); - val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); - } - val_id = spirv_compiler_emit_int_to_bool(compiler, - VKD3D_SHADER_CONDITIONAL_OP_NZ, VSIR_DATA_U32, component_count, val_id); - } - else - { - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); - } + type_id = spirv_get_type_id(compiler, reg->data_type, component_count); + return vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); }
- return val_id; + if (reg_info.data_type != VSIR_DATA_U32) + { + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, component_count); + val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + } + + return spirv_compiler_emit_int_to_bool(compiler, VKD3D_SHADER_CONDITIONAL_OP_NZ, + VSIR_DATA_U32, component_count, val_id); }
static void spirv_compiler_emit_execution_mode(struct spirv_compiler *compiler, @@ -4842,79 +4826,24 @@ static void spirv_compiler_emit_execution_mode1(struct spirv_compiler *compiler, spirv_compiler_emit_execution_mode(compiler, mode, &literal, 1); }
-static uint32_t spirv_compiler_emit_abs(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t write_mask, uint32_t val_id) -{ - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id; - - type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (data_type_is_floating_point(reg->data_type)) - return vkd3d_spirv_build_op_glsl_std450_fabs(builder, type_id, val_id); - - FIXME("Unhandled data type %#x.\n", reg->data_type); - return val_id; -} - -static uint32_t spirv_compiler_emit_neg(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t write_mask, uint32_t val_id) -{ - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id; - - type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (data_type_is_floating_point(reg->data_type)) - return vkd3d_spirv_build_op_fnegate(builder, type_id, val_id); - else if (data_type_is_integer(reg->data_type)) - return vkd3d_spirv_build_op_snegate(builder, type_id, val_id); - - FIXME("Unhandled data type %#x.\n", reg->data_type); - return val_id; -} - -static uint32_t spirv_compiler_emit_src_modifier(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t write_mask, - enum vkd3d_shader_src_modifier modifier, uint32_t val_id) -{ - switch (modifier) - { - case VKD3DSPSM_NONE: - break; - case VKD3DSPSM_NEG: - return spirv_compiler_emit_neg(compiler, reg, write_mask, val_id); - case VKD3DSPSM_ABS: - return spirv_compiler_emit_abs(compiler, reg, write_mask, val_id); - case VKD3DSPSM_ABSNEG: - val_id = spirv_compiler_emit_abs(compiler, reg, write_mask, val_id); - return spirv_compiler_emit_neg(compiler, reg, write_mask, val_id); - default: - FIXME("Unhandled src modifier %#x.\n", modifier); - break; - } - - return val_id; -} - static uint32_t spirv_compiler_emit_load_src(struct spirv_compiler *compiler, const struct vkd3d_shader_src_param *src, uint32_t write_mask) { - uint32_t val_id; - - val_id = spirv_compiler_emit_load_reg(compiler, &src->reg, src->swizzle, write_mask); - return spirv_compiler_emit_src_modifier(compiler, &src->reg, write_mask, src->modifiers, val_id); + return spirv_compiler_emit_load_reg(compiler, &src->reg, src->swizzle, write_mask); }
static uint32_t spirv_compiler_emit_load_src_with_type(struct spirv_compiler *compiler, - const struct vkd3d_shader_src_param *src, uint32_t write_mask, enum vkd3d_shader_component_type component_type) + const struct vkd3d_shader_src_param *src, uint32_t write_mask, enum vsir_data_type data_type) { struct vkd3d_shader_src_param src_param = *src;
- src_param.reg.data_type = vsir_data_type_from_component_type(component_type); + src_param.reg.data_type = data_type; + return spirv_compiler_emit_load_src(compiler, &src_param, write_mask); }
static void spirv_compiler_emit_store_scalar(struct spirv_compiler *compiler, - uint32_t dst_id, uint32_t dst_write_mask, enum vkd3d_shader_component_type component_type, + uint32_t dst_id, uint32_t dst_write_mask, enum vsir_data_type data_type, SpvStorageClass storage_class, uint32_t write_mask, uint32_t val_id) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -4923,7 +4852,7 @@ static void spirv_compiler_emit_store_scalar(struct spirv_compiler *compiler,
if (vsir_write_mask_component_count(dst_write_mask) > 1) { - type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); + type_id = spirv_get_type_id(compiler, data_type, 1); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); component_idx = vsir_write_mask_get_component_idx(write_mask); component_idx -= vsir_write_mask_get_component_idx(dst_write_mask); @@ -4934,9 +4863,8 @@ static void spirv_compiler_emit_store_scalar(struct spirv_compiler *compiler, vkd3d_spirv_build_op_store(builder, dst_id, val_id, SpvMemoryAccessMaskNone); }
-static void spirv_compiler_emit_store(struct spirv_compiler *compiler, - uint32_t dst_id, uint32_t dst_write_mask, enum vkd3d_shader_component_type component_type, - SpvStorageClass storage_class, uint32_t write_mask, uint32_t val_id) +static void spirv_compiler_emit_store(struct spirv_compiler *compiler, uint32_t dst_id, uint32_t dst_write_mask, + enum vsir_data_type data_type, SpvStorageClass storage_class, uint32_t write_mask, uint32_t val_id) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; unsigned int component_count, dst_component_count; @@ -4951,7 +4879,7 @@ static void spirv_compiler_emit_store(struct spirv_compiler *compiler,
if (dst_component_count == 1 && component_count != 1) { - type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); + type_id = spirv_get_type_id(compiler, data_type, 1); val_id = vkd3d_spirv_build_op_composite_extract1(builder, type_id, val_id, vsir_write_mask_get_component_idx(dst_write_mask)); write_mask &= dst_write_mask; @@ -4959,14 +4887,12 @@ static void spirv_compiler_emit_store(struct spirv_compiler *compiler, }
if (component_count == 1) - { - return spirv_compiler_emit_store_scalar(compiler, - dst_id, dst_write_mask, component_type, storage_class, write_mask, val_id); - } + return spirv_compiler_emit_store_scalar(compiler, dst_id, + dst_write_mask, data_type, storage_class, write_mask, val_id);
if (dst_component_count != component_count) { - type_id = vkd3d_spirv_get_type_id(builder, component_type, dst_component_count); + type_id = spirv_get_type_id(compiler, data_type, dst_component_count); dst_val_id = vkd3d_spirv_build_op_load(builder, type_id, dst_id, SpvMemoryAccessMaskNone);
VKD3D_ASSERT(component_count <= ARRAY_SIZE(components)); @@ -4993,9 +4919,9 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, uint32_t write_mask, uint32_t val_id) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - enum vkd3d_shader_component_type component_type; struct vkd3d_shader_register_info reg_info; uint32_t src_write_mask = write_mask; + enum vsir_data_type data_type; uint32_t type_id;
VKD3D_ASSERT(!register_is_constant_or_undef(reg)); @@ -5010,22 +4936,22 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, return; spirv_compiler_emit_dereference_register(compiler, reg, ®_info);
- component_type = vkd3d_component_type_from_data_type(reg->data_type); - if (component_type != reg_info.component_type) + data_type = reg->data_type; + if (data_type != reg_info.data_type) { - if (data_type_is_64_bit(reg->data_type)) + if (data_type_is_64_bit(data_type)) src_write_mask = vsir_write_mask_32_from_64(write_mask); - if (component_type == VKD3D_SHADER_COMPONENT_BOOL) + if (data_type == VSIR_DATA_BOOL) val_id = spirv_compiler_emit_bool_to_int(compiler, vsir_write_mask_component_count(src_write_mask), val_id, false); - type_id = vkd3d_spirv_get_type_id(builder, reg_info.component_type, + type_id = spirv_get_type_id(compiler, reg_info.data_type, vsir_write_mask_component_count(src_write_mask)); val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); - component_type = reg_info.component_type; + data_type = reg_info.data_type; }
- spirv_compiler_emit_store(compiler, - reg_info.id, reg_info.write_mask, component_type, reg_info.storage_class, src_write_mask, val_id); + spirv_compiler_emit_store(compiler, reg_info.id, reg_info.write_mask, + data_type, reg_info.storage_class, src_write_mask, val_id); }
static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, @@ -5057,62 +4983,36 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, static void spirv_compiler_emit_store_dst(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst, uint32_t val_id) { - uint32_t modifiers = dst->modifiers; - - /* It is always legitimate to ignore _pp. */ - modifiers &= ~VKD3DSPDM_PARTIALPRECISION; - - if (modifiers & VKD3DSPDM_SATURATE) - { - val_id = spirv_compiler_emit_sat(compiler, &dst->reg, dst->write_mask, val_id); - modifiers &= ~VKD3DSPDM_SATURATE; - } - - if (dst->modifiers & VKD3DSPDM_MSAMPCENTROID) - { - FIXME("Ignoring _centroid modifier.\n"); - modifiers &= ~VKD3DSPDM_MSAMPCENTROID; - } - - VKD3D_ASSERT(!modifiers); - spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, val_id); }
static void spirv_compiler_emit_store_dst_swizzled(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst, uint32_t val_id, - enum vkd3d_shader_component_type component_type, uint32_t swizzle) + const struct vkd3d_shader_dst_param *dst, uint32_t val_id, enum vsir_data_type data_type, uint32_t swizzle) { struct vkd3d_shader_dst_param typed_dst = *dst; - val_id = spirv_compiler_emit_swizzle(compiler, - val_id, VKD3DSP_WRITEMASK_ALL, component_type, swizzle, dst->write_mask); + + val_id = spirv_compiler_emit_swizzle(compiler, val_id, + VKD3DSP_WRITEMASK_ALL, data_type, swizzle, dst->write_mask); /* XXX: The register data type could be fixed by the shader parser. For SM5 - * shaders the data types are stored in instructions modifiers. - */ - typed_dst.reg.data_type = vsir_data_type_from_component_type(component_type); + * shaders the data types are stored in instructions modifiers. */ + typed_dst.reg.data_type = data_type; spirv_compiler_emit_store_dst(compiler, &typed_dst, val_id); }
static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_component_type component_type, - uint32_t *component_ids) + const struct vkd3d_shader_dst_param *dst, enum vsir_data_type data_type, uint32_t *component_ids) { unsigned int component_count = vsir_write_mask_component_count(dst->write_mask); struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t type_id, dst_type_id, val_id;
- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + type_id = spirv_get_type_id(compiler, data_type, component_count); if (component_count > 1) - { - val_id = vkd3d_spirv_build_op_composite_construct(builder, - type_id, component_ids, component_count); - } + val_id = vkd3d_spirv_build_op_composite_construct(builder, type_id, component_ids, component_count); else - { val_id = *component_ids; - }
- dst_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, component_count); + dst_type_id = spirv_get_type_id(compiler, dst->reg.data_type, component_count); if (dst_type_id != type_id) val_id = vkd3d_spirv_build_op_bitcast(builder, dst_type_id, val_id);
@@ -5120,8 +5020,7 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp }
static void spirv_compiler_emit_store_dst_scalar(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst, uint32_t val_id, - enum vkd3d_shader_component_type component_type, uint32_t swizzle) + const struct vkd3d_shader_dst_param *dst, uint32_t val_id, enum vsir_data_type data_type, uint32_t swizzle) { unsigned int component_count = vsir_write_mask_component_count(dst->write_mask); uint32_t component_ids[VKD3D_VEC4_SIZE]; @@ -5135,7 +5034,7 @@ static void spirv_compiler_emit_store_dst_scalar(struct spirv_compiler *compiler
component_ids[i] = val_id; } - spirv_compiler_emit_store_dst_components(compiler, dst, component_type, component_ids); + spirv_compiler_emit_store_dst_components(compiler, dst, data_type, component_ids); }
static void spirv_compiler_decorate_builtin(struct spirv_compiler *compiler, @@ -5227,7 +5126,7 @@ static void spirv_compiler_decorate_builtin(struct spirv_compiler *compiler, }
static void spirv_compiler_emit_interpolation_decorations(struct spirv_compiler *compiler, - enum vkd3d_shader_component_type component_type, uint32_t id, enum vkd3d_shader_interpolation_mode mode) + enum vsir_data_type data_type, uint32_t id, enum vkd3d_shader_interpolation_mode mode) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
@@ -5236,7 +5135,7 @@ static void spirv_compiler_emit_interpolation_decorations(struct spirv_compiler case VKD3DSIM_NONE: /* VUID-StandaloneSpirv-Flat-04744: integer or double types must be * decorated 'Flat' for fragment shaders. */ - if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL || component_type == VKD3D_SHADER_COMPONENT_FLOAT) + if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL || data_type == VSIR_DATA_F32) break; /* fall through */ case VKD3DSIM_CONSTANT: @@ -5276,14 +5175,13 @@ static uint32_t spirv_compiler_emit_draw_parameter_fixup(struct spirv_compiler *
vkd3d_spirv_enable_capability(builder, SpvCapabilityDrawParameters);
- base_var_id = spirv_compiler_emit_variable(compiler, &builder->global_stream, - SpvStorageClassInput, VKD3D_SHADER_COMPONENT_INT, 1); + base_var_id = spirv_compiler_emit_variable(compiler, + &builder->global_stream, SpvStorageClassInput, VSIR_DATA_I32, 1); vkd3d_spirv_add_iface_variable(builder, base_var_id); spirv_compiler_decorate_builtin(compiler, base_var_id, base);
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, 1); - base_id = vkd3d_spirv_build_op_load(builder, - type_id, base_var_id, SpvMemoryAccessMaskNone); + type_id = spirv_get_type_id(compiler, VSIR_DATA_I32, 1); + base_id = vkd3d_spirv_build_op_load(builder, type_id, base_var_id, SpvMemoryAccessMaskNone);
return vkd3d_spirv_build_op_isub(builder, type_id, index_id, base_id); } @@ -5311,23 +5209,22 @@ static uint32_t sv_front_face_fixup(struct spirv_compiler *compiler, }
/* frag_coord.w = 1.0f / frag_coord.w */ -static uint32_t frag_coord_fixup(struct spirv_compiler *compiler, - uint32_t frag_coord_id) +static uint32_t frag_coord_fixup(struct spirv_compiler *compiler, uint32_t frag_coord_id) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t type_id, w_id;
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_F32, 1); w_id = vkd3d_spirv_build_op_composite_extract1(builder, type_id, frag_coord_id, 3); - w_id = vkd3d_spirv_build_op_fdiv(builder, type_id, - spirv_compiler_get_constant_float(compiler, 1.0f), w_id); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + w_id = vkd3d_spirv_build_op_fdiv(builder, type_id, spirv_compiler_get_constant_float(compiler, 1.0f), w_id); + type_id = spirv_get_type_id(compiler, VSIR_DATA_F32, VKD3D_VEC4_SIZE); + return vkd3d_spirv_build_op_composite_insert1(builder, type_id, w_id, frag_coord_id, 3); }
struct vkd3d_spirv_builtin { - enum vkd3d_shader_component_type component_type; + enum vsir_data_type data_type; unsigned int component_count; SpvBuiltIn spirv_builtin; vkd3d_spirv_builtin_fixup_pfn fixup_pfn; @@ -5347,43 +5244,43 @@ static const struct } vkd3d_system_value_builtins[] = { - {VKD3D_SHADER_SV_VERTEX_ID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInVertexId}, + {VKD3D_SHADER_SV_VERTEX_ID, {VSIR_DATA_I32, 1, SpvBuiltInVertexId}, VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5}, - {VKD3D_SHADER_SV_INSTANCE_ID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInstanceId}, + {VKD3D_SHADER_SV_INSTANCE_ID, {VSIR_DATA_I32, 1, SpvBuiltInInstanceId}, VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5},
- {VKD3D_SHADER_SV_POSITION, {VKD3D_SHADER_COMPONENT_FLOAT, 4, SpvBuiltInPosition}}, - {VKD3D_SHADER_SV_VERTEX_ID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInVertexIndex, sv_vertex_id_fixup}}, - {VKD3D_SHADER_SV_INSTANCE_ID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInstanceIndex, sv_instance_id_fixup}}, + {VKD3D_SHADER_SV_POSITION, {VSIR_DATA_F32, 4, SpvBuiltInPosition}}, + {VKD3D_SHADER_SV_VERTEX_ID, {VSIR_DATA_I32, 1, SpvBuiltInVertexIndex, sv_vertex_id_fixup}}, + {VKD3D_SHADER_SV_INSTANCE_ID, {VSIR_DATA_I32, 1, SpvBuiltInInstanceIndex, sv_instance_id_fixup}},
- {VKD3D_SHADER_SV_PRIMITIVE_ID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInPrimitiveId}}, + {VKD3D_SHADER_SV_PRIMITIVE_ID, {VSIR_DATA_I32, 1, SpvBuiltInPrimitiveId}},
- {VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInLayer}}, - {VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInViewportIndex}}, + {VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX, {VSIR_DATA_I32, 1, SpvBuiltInLayer}}, + {VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX, {VSIR_DATA_I32, 1, SpvBuiltInViewportIndex}},
- {VKD3D_SHADER_SV_IS_FRONT_FACE, {VKD3D_SHADER_COMPONENT_BOOL, 1, SpvBuiltInFrontFacing, sv_front_face_fixup}}, + {VKD3D_SHADER_SV_IS_FRONT_FACE, {VSIR_DATA_BOOL, 1, SpvBuiltInFrontFacing, sv_front_face_fixup}},
- {VKD3D_SHADER_SV_SAMPLE_INDEX, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleId}}, + {VKD3D_SHADER_SV_SAMPLE_INDEX, {VSIR_DATA_U32, 1, SpvBuiltInSampleId}},
- {VKD3D_SHADER_SV_CLIP_DISTANCE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInClipDistance, NULL, 1}}, - {VKD3D_SHADER_SV_CULL_DISTANCE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInCullDistance, NULL, 1}}, + {VKD3D_SHADER_SV_CLIP_DISTANCE, {VSIR_DATA_F32, 1, SpvBuiltInClipDistance, NULL, 1}}, + {VKD3D_SHADER_SV_CULL_DISTANCE, {VSIR_DATA_F32, 1, SpvBuiltInCullDistance, NULL, 1}},
- {VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelOuter, NULL, 4}}, - {VKD3D_SHADER_SV_TESS_FACTOR_QUADINT, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelInner, NULL, 2}}, + {VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE, {VSIR_DATA_F32, 1, SpvBuiltInTessLevelOuter, NULL, 4}}, + {VKD3D_SHADER_SV_TESS_FACTOR_QUADINT, {VSIR_DATA_F32, 1, SpvBuiltInTessLevelInner, NULL, 2}},
- {VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelOuter, NULL, 4}}, - {VKD3D_SHADER_SV_TESS_FACTOR_TRIINT, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelInner, NULL, 2}}, + {VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE, {VSIR_DATA_F32, 1, SpvBuiltInTessLevelOuter, NULL, 4}}, + {VKD3D_SHADER_SV_TESS_FACTOR_TRIINT, {VSIR_DATA_F32, 1, SpvBuiltInTessLevelInner, NULL, 2}},
- {VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelOuter, NULL, 4, 0}}, - {VKD3D_SHADER_SV_TESS_FACTOR_LINEDET, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInTessLevelOuter, NULL, 4, 1}}, + {VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN, {VSIR_DATA_F32, 1, SpvBuiltInTessLevelOuter, NULL, 4, 0}}, + {VKD3D_SHADER_SV_TESS_FACTOR_LINEDET, {VSIR_DATA_F32, 1, SpvBuiltInTessLevelOuter, NULL, 4, 1}}, }; static const struct vkd3d_spirv_builtin vkd3d_pixel_shader_position_builtin = { - VKD3D_SHADER_COMPONENT_FLOAT, 4, SpvBuiltInFragCoord, frag_coord_fixup, + VSIR_DATA_F32, 4, SpvBuiltInFragCoord, frag_coord_fixup, }; static const struct vkd3d_spirv_builtin vkd3d_output_point_size_builtin = { - VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInPointSize, + VSIR_DATA_F32, 1, SpvBuiltInPointSize, }; static const struct { @@ -5393,31 +5290,31 @@ static const struct } vkd3d_register_builtins[] = { - {VKD3DSPR_THREADID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInGlobalInvocationId}}, - {VKD3DSPR_LOCALTHREADID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInLocalInvocationId}}, - {VKD3DSPR_LOCALTHREADINDEX, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInLocalInvocationIndex}}, - {VKD3DSPR_THREADGROUPID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInWorkgroupId}}, + {VKD3DSPR_THREADID, SpvStorageClassInput, {VSIR_DATA_I32, 3, SpvBuiltInGlobalInvocationId}}, + {VKD3DSPR_LOCALTHREADID, SpvStorageClassInput, {VSIR_DATA_I32, 3, SpvBuiltInLocalInvocationId}}, + {VKD3DSPR_LOCALTHREADINDEX, SpvStorageClassInput, {VSIR_DATA_I32, 1, SpvBuiltInLocalInvocationIndex}}, + {VKD3DSPR_THREADGROUPID, SpvStorageClassInput, {VSIR_DATA_I32, 3, SpvBuiltInWorkgroupId}},
- {VKD3DSPR_GSINSTID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, - {VKD3DSPR_OUTPOINTID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, + {VKD3DSPR_GSINSTID, SpvStorageClassInput, {VSIR_DATA_I32, 1, SpvBuiltInInvocationId}}, + {VKD3DSPR_OUTPOINTID, SpvStorageClassInput, {VSIR_DATA_I32, 1, SpvBuiltInInvocationId}},
- {VKD3DSPR_PRIMID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInPrimitiveId}}, + {VKD3DSPR_PRIMID, SpvStorageClassInput, {VSIR_DATA_I32, 1, SpvBuiltInPrimitiveId}},
- {VKD3DSPR_TESSCOORD, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}}, + {VKD3DSPR_TESSCOORD, SpvStorageClassInput, {VSIR_DATA_F32, 3, SpvBuiltInTessCoord}},
- {VKD3DSPR_POINT_COORD, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_FLOAT, 2, SpvBuiltInPointCoord}}, + {VKD3DSPR_POINT_COORD, SpvStorageClassInput, {VSIR_DATA_F32, 2, SpvBuiltInPointCoord}},
- {VKD3DSPR_COVERAGE, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, - {VKD3DSPR_SAMPLEMASK, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, + {VKD3DSPR_COVERAGE, SpvStorageClassInput, {VSIR_DATA_U32, 1, SpvBuiltInSampleMask, NULL, 1}}, + {VKD3DSPR_SAMPLEMASK, SpvStorageClassOutput, {VSIR_DATA_U32, 1, SpvBuiltInSampleMask, NULL, 1}},
- {VKD3DSPR_DEPTHOUT, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, - {VKD3DSPR_DEPTHOUTGE, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, - {VKD3DSPR_DEPTHOUTLE, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, + {VKD3DSPR_DEPTHOUT, SpvStorageClassOutput, {VSIR_DATA_F32, 1, SpvBuiltInFragDepth}}, + {VKD3DSPR_DEPTHOUTGE, SpvStorageClassOutput, {VSIR_DATA_F32, 1, SpvBuiltInFragDepth}}, + {VKD3DSPR_DEPTHOUTLE, SpvStorageClassOutput, {VSIR_DATA_F32, 1, SpvBuiltInFragDepth}},
- {VKD3DSPR_OUTSTENCILREF, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}}, + {VKD3DSPR_OUTSTENCILREF, SpvStorageClassOutput, {VSIR_DATA_U32, 1, SpvBuiltInFragStencilRefEXT}},
- {VKD3DSPR_WAVELANECOUNT, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}}, - {VKD3DSPR_WAVELANEINDEX, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}}, + {VKD3DSPR_WAVELANECOUNT, SpvStorageClassInput, {VSIR_DATA_U32, 1, SpvBuiltInSubgroupSize}}, + {VKD3DSPR_WAVELANEINDEX, SpvStorageClassInput, {VSIR_DATA_U32, 1, SpvBuiltInSubgroupLocalInvocationId}}, };
static void spirv_compiler_emit_register_execution_mode(struct spirv_compiler *compiler, @@ -5527,7 +5424,8 @@ static uint32_t spirv_compiler_emit_load_invocation_id(struct spirv_compiler *co uint32_t type_id, id;
id = spirv_compiler_get_invocation_id(compiler); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_I32, 1); + return vkd3d_spirv_build_op_load(builder, type_id, id, SpvMemoryAccessMaskNone); }
@@ -5641,13 +5539,12 @@ static uint32_t spirv_compiler_emit_builtin_variable_v(struct spirv_compiler *co sizes[0] = max(sizes[0], builtin->spirv_array_size);
id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, storage_class, - builtin->component_type, builtin->component_count, array_sizes, size_count); + builtin->data_type, builtin->component_count, array_sizes, size_count); vkd3d_spirv_add_iface_variable(builder, id); spirv_compiler_decorate_builtin(compiler, id, builtin->spirv_builtin);
if (compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL && storage_class == SpvStorageClassInput - && builtin->component_type != VKD3D_SHADER_COMPONENT_FLOAT - && builtin->component_type != VKD3D_SHADER_COMPONENT_BOOL) + && builtin->data_type != VSIR_DATA_F32 && builtin->data_type != VSIR_DATA_BOOL) vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationFlat, NULL, 0);
return id; @@ -5711,8 +5608,7 @@ static const struct vkd3d_symbol *spirv_compiler_emit_io_register(struct spirv_c spirv_compiler_emit_register_debug_name(builder, id, reg);
write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); - vkd3d_symbol_set_register_info(®_symbol, id, - storage_class, builtin->component_type, write_mask); + vkd3d_symbol_set_register_info(®_symbol, id, storage_class, builtin->data_type, write_mask); reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size;
return spirv_compiler_put_symbol(compiler, ®_symbol); @@ -5725,7 +5621,6 @@ static void spirv_compiler_emit_input(struct spirv_compiler *compiler, unsigned int component_idx, input_component_count; const struct signature_element *signature_element; const struct shader_signature *shader_signature; - enum vkd3d_shader_component_type component_type; enum vkd3d_shader_register_type sysval_reg_type; const struct vkd3d_spirv_builtin *builtin; enum vkd3d_shader_sysval_semantic sysval; @@ -5733,6 +5628,7 @@ static void spirv_compiler_emit_input(struct spirv_compiler *compiler, uint32_t val_id, input_id, var_id; uint32_t type_id, float_type_id; struct vkd3d_symbol reg_symbol; + enum vsir_data_type data_type; SpvStorageClass storage_class; bool use_private_var = false; unsigned int array_sizes[2]; @@ -5781,12 +5677,12 @@ static void spirv_compiler_emit_input(struct spirv_compiler *compiler,
if (builtin) { - component_type = builtin->component_type; + data_type = builtin->data_type; input_component_count = builtin->component_count; } else { - component_type = signature_element->component_type; + data_type = vsir_data_type_from_component_type(signature_element->component_type); input_component_count = vsir_write_mask_component_count(signature_element->mask); }
@@ -5828,7 +5724,7 @@ static void spirv_compiler_emit_input(struct spirv_compiler *compiler, unsigned int location = signature_element->target_location;
input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - storage_class, component_type, input_component_count, array_sizes, 2); + storage_class, data_type, input_component_count, array_sizes, 2); vkd3d_spirv_add_iface_variable(builder, input_id); if (reg_type == VKD3DSPR_PATCHCONST) { @@ -5839,8 +5735,8 @@ static void spirv_compiler_emit_input(struct spirv_compiler *compiler, if (component_idx) vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationComponent, component_idx);
- spirv_compiler_emit_interpolation_decorations(compiler, component_type, input_id, - signature_element->interpolation_mode); + spirv_compiler_emit_interpolation_decorations(compiler, data_type, + input_id, signature_element->interpolation_mode); }
var_id = input_id; @@ -5848,11 +5744,11 @@ static void spirv_compiler_emit_input(struct spirv_compiler *compiler, { storage_class = SpvStorageClassPrivate; var_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, array_sizes, 2); + storage_class, VSIR_DATA_F32, VKD3D_VEC4_SIZE, array_sizes, 2); }
vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, + use_private_var ? VSIR_DATA_F32 : data_type, use_private_var ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_var || array_sizes[0] || array_sizes[1]); @@ -5867,22 +5763,22 @@ static void spirv_compiler_emit_input(struct spirv_compiler *compiler, vsir_register_init(&dst_reg, reg_type, VSIR_DATA_F32, 1); dst_reg.idx[0].offset = element_idx;
- type_id = vkd3d_spirv_get_type_id(builder, component_type, input_component_count); + type_id = spirv_get_type_id(compiler, data_type, input_component_count);
val_id = vkd3d_spirv_build_op_load(builder, type_id, input_id, SpvMemoryAccessMaskNone);
if (builtin && builtin->fixup_pfn) val_id = builtin->fixup_pfn(compiler, val_id);
- if (component_type != VKD3D_SHADER_COMPONENT_FLOAT) + if (data_type != VSIR_DATA_F32) { - float_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, input_component_count); + float_type_id = spirv_get_type_id(compiler, VSIR_DATA_F32, input_component_count); val_id = vkd3d_spirv_build_op_bitcast(builder, float_type_id, val_id); }
val_id = spirv_compiler_emit_swizzle(compiler, val_id, - vkd3d_write_mask_from_component_count(input_component_count), - VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_SHADER_NO_SWIZZLE, signature_element->mask >> component_idx); + vkd3d_write_mask_from_component_count(input_component_count), VSIR_DATA_F32, + VKD3D_SHADER_NO_SWIZZLE, signature_element->mask >> component_idx);
spirv_compiler_emit_store_reg(compiler, &dst_reg, signature_element->mask >> component_idx, val_id); } @@ -5917,7 +5813,7 @@ static void calculate_clip_or_cull_distance_mask(const struct signature_element return; }
- write_mask = e->mask >> vsir_write_mask_get_component_idx(e->mask); + write_mask = e->mask; *mask |= (write_mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); }
@@ -5973,13 +5869,13 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * { case VKD3D_SHADER_SV_CLIP_DISTANCE: compiler->output_info[i].id = clip_distance_id; - compiler->output_info[i].component_type = VKD3D_SHADER_COMPONENT_FLOAT; + compiler->output_info[i].data_type = VSIR_DATA_F32; compiler->output_info[i].array_element_mask = clip_distance_mask; break;
case VKD3D_SHADER_SV_CULL_DISTANCE: compiler->output_info[i].id = cull_distance_id; - compiler->output_info[i].component_type = VKD3D_SHADER_COMPONENT_FLOAT; + compiler->output_info[i].data_type = VSIR_DATA_F32; compiler->output_info[i].array_element_mask = cull_distance_mask; break;
@@ -6020,17 +5916,17 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; unsigned int component_idx, output_component_count; const struct signature_element *signature_element; - enum vkd3d_shader_component_type component_type; const struct shader_signature *shader_signature; const struct vkd3d_spirv_builtin *builtin; enum vkd3d_shader_sysval_semantic sysval; - uint32_t write_mask; bool use_private_variable = false; struct vkd3d_symbol reg_symbol; + enum vsir_data_type data_type; SpvStorageClass storage_class; unsigned int array_sizes[2]; bool is_patch_constant; uint32_t id, var_id; + uint32_t write_mask;
is_patch_constant = (reg_type == VKD3DSPR_PATCHCONST);
@@ -6055,13 +5951,13 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, output_component_count = vsir_write_mask_component_count(write_mask); if (builtin) { - component_type = builtin->component_type; + data_type = builtin->data_type; if (!builtin->spirv_array_size) output_component_count = builtin->component_count; } else { - component_type = signature_element->component_type; + data_type = vsir_data_type_from_component_type(signature_element->component_type); }
storage_class = SpvStorageClassOutput; @@ -6102,7 +5998,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, { storage_class = SpvStorageClassPrivate; id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - storage_class, component_type, output_component_count, array_sizes, 2); + storage_class, data_type, output_component_count, array_sizes, 2); } else { @@ -6115,7 +6011,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, location = signature_element->semantic_index;
id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - storage_class, component_type, output_component_count, array_sizes, 2); + storage_class, data_type, output_component_count, array_sizes, 2); vkd3d_spirv_add_iface_variable(builder, id);
if (is_dual_source_blending(compiler) && location < 2) @@ -6140,7 +6036,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, if (!is_patch_constant) { compiler->output_info[element_idx].id = id; - compiler->output_info[element_idx].component_type = component_type; + compiler->output_info[element_idx].data_type = data_type; }
var_id = id; @@ -6148,11 +6044,11 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, { storage_class = SpvStorageClassPrivate; var_id = spirv_compiler_emit_variable(compiler, &builder->global_stream, - storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + storage_class, VSIR_DATA_F32, VKD3D_VEC4_SIZE); }
vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, + use_private_variable ? VSIR_DATA_F32 : data_type, use_private_variable ? VKD3DSP_WRITEMASK_ALL : write_mask); reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); @@ -6164,7 +6060,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, if (use_private_variable) { compiler->private_output_variable[element_idx] = var_id; - compiler->private_output_variable_write_mask[element_idx] |= write_mask >> component_idx; + compiler->private_output_variable_write_mask[element_idx] |= write_mask; if (!compiler->epilogue_function_id) compiler->epilogue_function_id = vkd3d_spirv_alloc_id(builder); } @@ -6198,6 +6094,7 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi uint32_t type_id, zero_id, ptr_type_id, chain_id, object_id; const struct signature_element *element; unsigned int i, index, array_idx; + enum vsir_data_type data_type; uint32_t output_id;
dst_write_mask = output->mask; @@ -6215,17 +6112,16 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi use_mask |= element->used_mask; } } - index = vsir_write_mask_get_component_idx(output->mask); - dst_write_mask >>= index; - use_mask >>= index; write_mask &= dst_write_mask;
if (!write_mask) return;
- if (output_info->component_type != VKD3D_SHADER_COMPONENT_FLOAT) + data_type = output_info->data_type; + + if (data_type != VSIR_DATA_F32) { - type_id = vkd3d_spirv_get_type_id(builder, output_info->component_type, VKD3D_VEC4_SIZE); + type_id = spirv_get_type_id(compiler, data_type, VKD3D_VEC4_SIZE); val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); }
@@ -6235,35 +6131,32 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi { /* Set values to 0 for not initialized shader output components. */ write_mask |= uninit_mask; - zero_id = spirv_compiler_get_constant_vector(compiler, - output_info->component_type, VKD3D_VEC4_SIZE, 0); - val_id = spirv_compiler_emit_vector_shuffle(compiler, - zero_id, val_id, swizzle, uninit_mask, output_info->component_type, - vsir_write_mask_component_count(write_mask)); + zero_id = spirv_compiler_get_constant_vector(compiler, data_type, VKD3D_VEC4_SIZE, 0); + val_id = spirv_compiler_emit_vector_shuffle(compiler, zero_id, val_id, swizzle, + uninit_mask, data_type, vsir_write_mask_component_count(write_mask)); } else { - val_id = spirv_compiler_emit_swizzle(compiler, - val_id, VKD3DSP_WRITEMASK_ALL, output_info->component_type, swizzle, write_mask); + val_id = spirv_compiler_emit_swizzle(compiler, val_id, + VKD3DSP_WRITEMASK_ALL, data_type, swizzle, write_mask); }
output_id = output_info->id; if (output_index_id) { - type_id = vkd3d_spirv_get_type_id(builder, - output_info->component_type, vsir_write_mask_component_count(dst_write_mask)); + type_id = spirv_get_type_id(compiler, data_type, vsir_write_mask_component_count(dst_write_mask)); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); output_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, output_id, output_index_id); }
if (!output_info->array_element_mask) { - spirv_compiler_emit_store(compiler, - output_id, dst_write_mask, output_info->component_type, SpvStorageClassOutput, write_mask, val_id); + spirv_compiler_emit_store(compiler, output_id, dst_write_mask, + data_type, SpvStorageClassOutput, write_mask, val_id); return; }
- type_id = vkd3d_spirv_get_type_id(builder, output_info->component_type, 1); + type_id = spirv_get_type_id(compiler, data_type, 1); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); mask = output_info->array_element_mask; array_idx = spirv_compiler_get_output_array_index(compiler, output); @@ -6276,9 +6169,9 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi chain_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, output_id, spirv_compiler_get_constant_uint(compiler, index)); object_id = spirv_compiler_emit_swizzle(compiler, val_id, write_mask, - output_info->component_type, VKD3D_SHADER_NO_SWIZZLE, VKD3DSP_WRITEMASK_0 << i); - spirv_compiler_emit_store(compiler, chain_id, VKD3DSP_WRITEMASK_0, - output_info->component_type, SpvStorageClassOutput, VKD3DSP_WRITEMASK_0 << i, object_id); + data_type, VKD3D_SHADER_NO_SWIZZLE, VKD3DSP_WRITEMASK_0 << i); + spirv_compiler_emit_store(compiler, chain_id, VKD3DSP_WRITEMASK_0, data_type, + SpvStorageClassOutput, VKD3DSP_WRITEMASK_0 << i, object_id); ++index; } } @@ -6305,7 +6198,7 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * function_id = compiler->epilogue_function_id;
void_id = vkd3d_spirv_get_op_type_void(builder); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); + type_id = spirv_get_type_id(compiler, VSIR_DATA_F32, 4); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); for (i = 0, count = 0; i < ARRAY_SIZE(compiler->private_output_variable); ++i) { @@ -6485,7 +6378,7 @@ static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t for (i = 0; i < compiler->temp_count; ++i) { id = spirv_compiler_emit_variable(compiler, &builder->global_stream, - SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + SpvStorageClassPrivate, VSIR_DATA_F32, VKD3D_VEC4_SIZE); if (!i) compiler->temp_id = id; VKD3D_ASSERT(id == compiler->temp_id + i); @@ -6514,7 +6407,6 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil const struct vkd3d_shader_indexable_temp *temp = &instruction->declaration.indexable_temp; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t id, type_id, length_id, ptr_type_id, init_id = 0; - enum vkd3d_shader_component_type component_type; struct vkd3d_shader_register reg; struct vkd3d_symbol reg_symbol; SpvStorageClass storage_class; @@ -6535,8 +6427,7 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil function_location = spirv_compiler_get_current_function_location(compiler); vkd3d_spirv_begin_function_stream_insertion(builder, function_location);
- component_type = vkd3d_component_type_from_data_type(temp->data_type); - type_id = vkd3d_spirv_get_type_id(builder, component_type, temp->component_count); + type_id = spirv_get_type_id(compiler, temp->data_type, temp->component_count); length_id = spirv_compiler_get_constant_uint(compiler, temp->register_size); type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); @@ -6549,8 +6440,8 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil vkd3d_spirv_end_function_stream_insertion(builder);
vkd3d_symbol_make_register(®_symbol, ®); - vkd3d_symbol_set_register_info(®_symbol, id, storage_class, - component_type, vkd3d_write_mask_from_component_count(temp->component_count)); + vkd3d_symbol_set_register_info(®_symbol, id, storage_class, temp->data_type, + vkd3d_write_mask_from_component_count(temp->component_count)); spirv_compiler_put_symbol(compiler, ®_symbol); }
@@ -6577,7 +6468,7 @@ static void spirv_compiler_emit_push_constant_buffers(struct spirv_compiler *com if (!(member_ids = vkd3d_calloc(count, sizeof(*member_ids)))) return;
- vec4_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + vec4_id = spirv_get_type_id(compiler, VSIR_DATA_F32, VKD3D_VEC4_SIZE);
for (i = 0, j = 0; i < compiler->shader_interface.push_constant_buffer_count; ++i) { @@ -6594,7 +6485,7 @@ static void spirv_compiler_emit_push_constant_buffers(struct spirv_compiler *com
if (compiler->offset_info.descriptor_table_count) { - uint32_t type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + uint32_t type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); length_id = spirv_compiler_get_constant_uint(compiler, compiler->offset_info.descriptor_table_count); member_ids[j] = vkd3d_spirv_build_op_type_array(builder, type_id, length_id); vkd3d_spirv_build_op_decorate1(builder, member_ids[j], SpvDecorationArrayStride, 4); @@ -6626,8 +6517,7 @@ static void spirv_compiler_emit_push_constant_buffers(struct spirv_compiler *com vkd3d_spirv_build_op_member_name(builder, struct_id, j, "cb%u", reg_idx);
vkd3d_symbol_make_register(®_symbol, &cb->reg); - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, VSIR_DATA_F32, VKD3DSP_WRITEMASK_ALL); reg_symbol.info.reg.member_idx = j; spirv_compiler_put_symbol(compiler, ®_symbol);
@@ -6778,17 +6668,22 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, /* Push constant buffers are handled in * spirv_compiler_emit_push_constant_buffers(). */ - push_cb->reg = reg; - push_cb->size = size; if (size_in_bytes > push_cb->pc.size) { - WARN("Constant buffer size %u exceeds push constant size %u.\n", - size_in_bytes, push_cb->pc.size); + spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_INVALID_SIZE, + "Constant buffer cb%u, space %u, has size %u which exceeds the push constant size %u.", + push_cb->pc.register_index, push_cb->pc.register_space, size_in_bytes, push_cb->pc.size); + size_in_bytes = push_cb->pc.size; + size = align(size_in_bytes, VKD3D_VEC4_SIZE * sizeof(uint32_t)); + size /= VKD3D_VEC4_SIZE * sizeof(uint32_t); } + + push_cb->reg = reg; + push_cb->size = size; return; }
- vec4_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + vec4_id = spirv_get_type_id(compiler, VSIR_DATA_F32, VKD3D_VEC4_SIZE); length_id = spirv_compiler_get_constant_uint(compiler, size); array_type_id = vkd3d_spirv_build_op_type_array(builder, vec4_id, length_id); vkd3d_spirv_build_op_decorate1(builder, array_type_id, SpvDecorationArrayStride, 16); @@ -6802,38 +6697,12 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, ®, range, VKD3D_SHADER_RESOURCE_BUFFER, descriptor, false, &var_info);
vkd3d_symbol_make_register(®_symbol, ®); - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, VSIR_DATA_F32, VKD3DSP_WRITEMASK_ALL); reg_symbol.descriptor_array = var_info.array_symbol; reg_symbol.info.reg.binding_base_idx = var_info.binding_base_idx; spirv_compiler_put_symbol(compiler, ®_symbol); }
-static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - const struct vkd3d_shader_immediate_constant_buffer *icb = instruction->declaration.icb; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id, const_id, ptr_type_id, icb_id; - struct vkd3d_shader_register reg; - struct vkd3d_symbol reg_symbol; - - const_id = spirv_compiler_emit_constant_array(compiler, icb, &type_id); - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); - icb_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, - ptr_type_id, SpvStorageClassPrivate, const_id); - vkd3d_spirv_build_op_name(builder, icb_id, "icb"); - - /* Set an index count of 2 so vkd3d_symbol_make_register() uses idx[0] as a buffer id. */ - vsir_register_init(®, VKD3DSPR_IMMCONSTBUFFER, VSIR_DATA_F32, 2); - reg.idx[0].offset = icb->register_idx; - vkd3d_symbol_make_register(®_symbol, ®); - vkd3d_symbol_set_register_info(®_symbol, icb_id, SpvStorageClassPrivate, - vkd3d_component_type_from_data_type(icb->data_type), - vkd3d_write_mask_from_component_count(icb->component_count)); - spirv_compiler_put_symbol(compiler, ®_symbol); -} - static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compiler, const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) { @@ -6859,8 +6728,7 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi ®, range, VKD3D_SHADER_RESOURCE_NONE, descriptor, false, &var_info);
vkd3d_symbol_make_register(®_symbol, ®); - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, VSIR_DATA_F32, VKD3DSP_WRITEMASK_ALL); reg_symbol.descriptor_array = var_info.array_symbol; reg_symbol.info.reg.binding_base_idx = var_info.binding_base_idx; spirv_compiler_put_symbol(compiler, ®_symbol); @@ -6883,7 +6751,7 @@ static const struct vkd3d_spirv_resource_type *spirv_compiler_enable_resource_ty return resource_type_info; }
-static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_type data_type) +static SpvImageFormat image_format_for_image_read(enum vsir_data_type data_type) { /* The following formats are supported by Direct3D 11 hardware for UAV * typed loads. A newer hardware may support more formats for UAV typed @@ -6891,11 +6759,13 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty */ switch (data_type) { - case VKD3D_SHADER_COMPONENT_FLOAT: + case VSIR_DATA_F32: + case VSIR_DATA_SNORM: + case VSIR_DATA_UNORM: return SpvImageFormatR32f; - case VKD3D_SHADER_COMPONENT_INT: + case VSIR_DATA_I32: return SpvImageFormatR32i; - case VKD3D_SHADER_COMPONENT_UINT: + case VSIR_DATA_U32: return SpvImageFormatR32ui; default: FIXME("Unhandled type %#x.\n", data_type); @@ -6905,8 +6775,7 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty
static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, const struct vkd3d_shader_register_range *range, - const struct vkd3d_spirv_resource_type *resource_type_info, enum vkd3d_shader_component_type data_type, - bool raw_structured) + const struct vkd3d_spirv_resource_type *resource_type_info, enum vsir_data_type data_type, bool raw_structured) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_descriptor_info1 *d; @@ -6927,15 +6796,15 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler vkd3d_spirv_enable_capability(builder, SpvCapabilityStorageImageReadWithoutFormat); }
- sampled_type_id = vkd3d_spirv_get_type_id(builder, data_type, 1); - return vkd3d_spirv_get_op_type_image(builder, sampled_type_id, resource_type_info->dim, - 2, resource_type_info->arrayed, resource_type_info->ms, - reg->type == VKD3DSPR_UAV ? 2 : 1, format); + sampled_type_id = spirv_get_type_id(compiler, data_type, 1); + + return vkd3d_spirv_get_op_type_image(builder, sampled_type_id, resource_type_info->dim, 2, + resource_type_info->arrayed, resource_type_info->ms, reg->type == VKD3DSPR_UAV ? 2 : 1, format); }
static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compiler *compiler, const struct vkd3d_shader_register *resource, const struct vkd3d_shader_register_range *resource_range, - enum vkd3d_shader_resource_type resource_type, enum vkd3d_shader_component_type sampled_type, + enum vkd3d_shader_resource_type resource_type, enum vsir_data_type sampled_type, unsigned int structure_stride, bool raw, const struct vkd3d_spirv_resource_type *resource_type_info) { const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; @@ -7021,9 +6890,9 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp uint32_t counter_type_id, type_id, var_id, counter_var_id = 0; const struct vkd3d_spirv_resource_type *resource_type_info; unsigned int sample_count = descriptor->sample_count; - enum vkd3d_shader_component_type sampled_type; struct vkd3d_symbol resource_symbol; struct vkd3d_shader_register reg; + enum vsir_data_type sampled_type;
vsir_register_init(®, is_uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, VSIR_DATA_F32, 1); reg.idx[0].offset = descriptor->register_id; @@ -7040,7 +6909,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp return; }
- sampled_type = vkd3d_component_type_from_data_type(descriptor->resource_data_type); + sampled_type = descriptor->resource_data_type;
if (!is_uav && spirv_compiler_has_combined_sampler_for_resource(compiler, range)) { @@ -7053,7 +6922,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp { uint32_t array_type_id, struct_id;
- type_id = vkd3d_spirv_get_type_id(builder, sampled_type, 1); + type_id = spirv_get_type_id(compiler, sampled_type, 1);
array_type_id = vkd3d_spirv_get_op_type_runtime_array(builder, type_id); vkd3d_spirv_build_op_decorate1(builder, array_type_id, SpvDecorationArrayStride, 4); @@ -7094,7 +6963,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp { VKD3D_ASSERT(structure_stride); /* counters are valid only for structured buffers */
- counter_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + counter_type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); if (spirv_compiler_is_opengl_target(compiler)) { vkd3d_spirv_enable_capability(builder, SpvCapabilityAtomicStorage); @@ -7158,7 +7027,7 @@ static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler if (alignment) TRACE("Ignoring alignment %u.\n", alignment);
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); length_id = spirv_compiler_get_constant_uint(compiler, size); array_type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id);
@@ -7170,8 +7039,7 @@ static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler spirv_compiler_emit_register_debug_name(builder, var_id, reg);
vkd3d_symbol_make_register(®_symbol, reg); - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - VKD3D_SHADER_COMPONENT_UINT, VKD3DSP_WRITEMASK_0); + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, VSIR_DATA_U32, VKD3DSP_WRITEMASK_0); reg_symbol.info.reg.structure_stride = structure_stride; spirv_compiler_put_symbol(compiler, ®_symbol); } @@ -7209,10 +7077,9 @@ static void spirv_compiler_emit_output_vertex_count(struct spirv_compiler *compi SpvExecutionModeOutputVertices, instruction->declaration.count); }
-static void spirv_compiler_emit_dcl_input_primitive(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) +static void spirv_compiler_emit_input_primitive(struct spirv_compiler *compiler) { - enum vkd3d_primitive_type primitive_type = instruction->declaration.primitive_type.type; + enum vkd3d_primitive_type primitive_type = compiler->program->input_primitive; SpvExecutionMode mode;
switch (primitive_type) @@ -7233,7 +7100,8 @@ static void spirv_compiler_emit_dcl_input_primitive(struct spirv_compiler *compi mode = SpvExecutionModeInputTrianglesAdjacency; break; default: - FIXME("Unhandled primitive type %#x.\n", primitive_type); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "Unhandled input primitive type %#x.", primitive_type); return; }
@@ -7263,10 +7131,9 @@ static void spirv_compiler_emit_point_size(struct spirv_compiler *compiler) } }
-static void spirv_compiler_emit_dcl_output_topology(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) +static void spirv_compiler_emit_output_topology(struct spirv_compiler *compiler) { - enum vkd3d_primitive_type primitive_type = instruction->declaration.primitive_type.type; + enum vkd3d_primitive_type primitive_type = compiler->program->output_topology; SpvExecutionMode mode;
switch (primitive_type) @@ -7282,7 +7149,8 @@ static void spirv_compiler_emit_dcl_output_topology(struct spirv_compiler *compi mode = SpvExecutionModeOutputTriangleStrip; break; default: - ERR("Unexpected primitive type %#x.\n", primitive_type); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "Unhandled output topology %#x.", primitive_type); return; }
@@ -7569,6 +7437,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru {VSIR_OP_ITOF, SpvOpConvertSToF}, {VSIR_OP_ITOI, SpvOpSConvert}, {VSIR_OP_MUL, SpvOpFMul}, + {VSIR_OP_NEG, SpvOpFNegate}, {VSIR_OP_NOT, SpvOpNot}, {VSIR_OP_OR, SpvOpBitwiseOr}, {VSIR_OP_UDIV_SIMPLE, SpvOpUDiv}, @@ -7624,11 +7493,12 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->opcode == VSIR_OP_ITOF); } - else if (dst->reg.data_type == VSIR_DATA_U16 || dst->reg.data_type == VSIR_DATA_U32) + else if (dst->reg.data_type == VSIR_DATA_I16 || dst->reg.data_type == VSIR_DATA_I32 + || dst->reg.data_type == VSIR_DATA_U16 || dst->reg.data_type == VSIR_DATA_U32) { val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->opcode == VSIR_OP_ITOI); } - else if (dst->reg.data_type == VSIR_DATA_U64) + else if (dst->reg.data_type == VSIR_DATA_I64 || dst->reg.data_type == VSIR_DATA_U64) { val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->opcode == VSIR_OP_ITOI); } @@ -7645,14 +7515,13 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { - uint32_t src_ids[SPIRV_MAX_SRC_COUNT], condition_id = 0, uint_max_id = 0; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; - unsigned int i, component_count; + uint32_t src_ids[SPIRV_MAX_SRC_COUNT]; uint32_t type_id, val_id; SpvOp op = SpvOpMax; - bool check_zero; + unsigned int i;
if (src->reg.data_type == VSIR_DATA_U64 && instruction->opcode == VSIR_OP_COUNTBITS) { @@ -7692,44 +7561,14 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil return VKD3D_ERROR_INVALID_SHADER; }
- /* SPIR-V doesn't mandate a behaviour when a denominator is zero, - * so we have an explicit check. */ - switch (instruction->opcode) - { - case VSIR_OP_IDIV: - case VSIR_OP_IREM: - case VSIR_OP_UDIV_SIMPLE: - case VSIR_OP_UREM: - check_zero = true; - break; - - default: - check_zero = false; - break; - } - VKD3D_ASSERT(instruction->dst_count == 1); VKD3D_ASSERT(instruction->src_count <= SPIRV_MAX_SRC_COUNT); - if (check_zero) - VKD3D_ASSERT(instruction->src_count == 2);
- component_count = vsir_write_mask_component_count(dst[0].write_mask); type_id = spirv_compiler_get_type_id_for_dst(compiler, dst);
for (i = 0; i < instruction->src_count; ++i) src_ids[i] = spirv_compiler_emit_load_src(compiler, &src[i], dst->write_mask);
- if (check_zero) - { - condition_id = spirv_compiler_emit_int_to_bool(compiler, - VKD3D_SHADER_CONDITIONAL_OP_NZ, src[1].reg.data_type, component_count, src_ids[1]); - - if (dst[0].reg.data_type == VSIR_DATA_U64) - uint_max_id = spirv_compiler_get_constant_uint64_vector(compiler, UINT64_MAX, component_count); - else - uint_max_id = spirv_compiler_get_constant_uint_vector(compiler, UINT_MAX, component_count); - } - /* The SPIR-V specification states, "The resulting value is undefined if * Shift is greater than or equal to the bit width of the components of * Base." Direct3D applies only the lowest 5 bits of the shift. @@ -7741,7 +7580,7 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil || instruction->opcode == VSIR_OP_ISHR || instruction->opcode == VSIR_OP_USHR)) { uint32_t mask_id = spirv_compiler_get_constant_vector(compiler, - VKD3D_SHADER_COMPONENT_UINT, vsir_write_mask_component_count(dst->write_mask), 0x1f); + VSIR_DATA_U32, vsir_write_mask_component_count(dst->write_mask), 0x1f); src_ids[1] = vkd3d_spirv_build_op_and(builder, type_id, src_ids[1], mask_id); }
@@ -7750,13 +7589,22 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil if (instruction->flags & VKD3DSI_PRECISE_XYZW) vkd3d_spirv_build_op_decorate(builder, val_id, SpvDecorationNoContraction, NULL, 0);
- if (check_zero) - val_id = vkd3d_spirv_build_op_select(builder, type_id, condition_id, val_id, uint_max_id); - spirv_compiler_emit_store_dst(compiler, dst, val_id); return VKD3D_OK; }
+static void spirv_compiler_emit_saturate(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t val_id; + + val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); + val_id = spirv_compiler_emit_sat(compiler, &dst->reg, dst->write_mask, val_id); + spirv_compiler_emit_store_dst(compiler, dst, val_id); +} + static void spirv_compiler_emit_isfinite(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { @@ -7839,7 +7687,7 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp unsigned int i, component_count; enum GLSLstd450 glsl_inst;
- if (src[0].reg.data_type == VSIR_DATA_U64 && (instruction->opcode == VSIR_OP_FIRSTBIT_HI + if (data_type_is_64_bit(src[0].reg.data_type) && (instruction->opcode == VSIR_OP_FIRSTBIT_HI || instruction->opcode == VSIR_OP_FIRSTBIT_LO || instruction->opcode == VSIR_OP_FIRSTBIT_SHI)) { /* At least some drivers support this anyway, but if validation is enabled it will fail. */ @@ -7878,7 +7726,7 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp component_count = vsir_write_mask_component_count(dst->write_mask); uint_max_id = spirv_compiler_get_constant_uint_vector(compiler, UINT32_MAX, component_count); condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpIEqual, - vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count), val_id, uint_max_id); + spirv_get_type_id(compiler, VSIR_DATA_BOOL, component_count), val_id, uint_max_id); rev_val_id = vkd3d_spirv_build_op_isub(builder, type_id, spirv_compiler_get_constant_uint_vector(compiler, 31, component_count), val_id); val_id = vkd3d_spirv_build_op_select(builder, type_id, condition_id, val_id, rev_val_id); @@ -7905,7 +7753,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, spirv_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info); spirv_compiler_get_register_info(compiler, &src->reg, &src_reg_info);
- if (dst_reg_info.component_type != src_reg_info.component_type + if (dst_reg_info.data_type != src_reg_info.data_type || dst_reg_info.write_mask != src_reg_info.write_mask) goto general_implementation;
@@ -7928,7 +7776,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, dst_id = spirv_compiler_get_register_id(compiler, &dst->reg); src_id = spirv_compiler_get_register_id(compiler, &src->reg);
- type_id = vkd3d_spirv_get_type_id(builder, dst_reg_info.component_type, VKD3D_VEC4_SIZE); + type_id = spirv_get_type_id(compiler, dst_reg_info.data_type, VKD3D_VEC4_SIZE); val_id = vkd3d_spirv_build_op_load(builder, type_id, src_id, SpvMemoryAccessMaskNone); dst_val_id = vkd3d_spirv_build_op_load(builder, type_id, dst_id, SpvMemoryAccessMaskNone);
@@ -7957,7 +7805,7 @@ general_implementation: val_id = spirv_compiler_emit_load_src(compiler, src, write_mask); if (dst->reg.data_type != src->reg.data_type) { - val_id = vkd3d_spirv_build_op_bitcast(builder, vkd3d_spirv_get_type_id_for_data_type(builder, + val_id = vkd3d_spirv_build_op_bitcast(builder, spirv_get_type_id(compiler, dst->reg.data_type, vsir_write_mask_component_count(dst->write_mask)), val_id); } spirv_compiler_emit_store_dst(compiler, dst, val_id); @@ -7983,7 +7831,7 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, { if (instruction->opcode == VSIR_OP_CMP) condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, - vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count), condition_id, + spirv_get_type_id(compiler, VSIR_DATA_BOOL, component_count), condition_id, spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count)); else condition_id = spirv_compiler_emit_int_to_bool(compiler, @@ -8010,7 +7858,7 @@ static void spirv_compiler_emit_swapc(struct spirv_compiler *compiler, src2_id = spirv_compiler_emit_load_src(compiler, &src[2], dst->write_mask);
component_count = vsir_write_mask_component_count(dst->write_mask); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, component_count); + type_id = spirv_get_type_id(compiler, VSIR_DATA_F32, component_count);
condition_id = spirv_compiler_emit_int_to_bool(compiler, VKD3D_SHADER_CONDITIONAL_OP_NZ, src[0].reg.data_type, component_count, condition_id); @@ -8027,13 +7875,13 @@ static void spirv_compiler_emit_dot(struct spirv_compiler *compiler, struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; - enum vkd3d_shader_component_type component_type; uint32_t type_id, val_id, src_ids[2]; unsigned int component_count, i; + enum vsir_data_type data_type; uint32_t write_mask;
component_count = vsir_write_mask_component_count(dst->write_mask); - component_type = vkd3d_component_type_from_data_type(dst->reg.data_type); + data_type = dst->reg.data_type;
if (instruction->opcode == VSIR_OP_DP4) write_mask = VKD3DSP_WRITEMASK_ALL; @@ -8046,15 +7894,12 @@ static void spirv_compiler_emit_dot(struct spirv_compiler *compiler, for (i = 0; i < ARRAY_SIZE(src_ids); ++i) src_ids[i] = spirv_compiler_emit_load_src(compiler, &src[i], write_mask);
- type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); + type_id = spirv_get_type_id(compiler, data_type, 1);
val_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpDot, type_id, src_ids[0], src_ids[1]); if (component_count > 1) - { - val_id = spirv_compiler_emit_construct_vector(compiler, - component_type, component_count, val_id, 0, 1); - } + val_id = spirv_compiler_emit_construct_vector(compiler, data_type, component_count, val_id, 0, 1); if (instruction->flags & VKD3DSI_PRECISE_XYZW) vkd3d_spirv_build_op_decorate(builder, val_id, SpvDecorationNoContraction, NULL, 0);
@@ -8092,7 +7937,7 @@ static void spirv_compiler_emit_imad(struct spirv_compiler *compiler, unsigned int i, component_count;
component_count = vsir_write_mask_component_count(dst->write_mask); - type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, component_count); + type_id = spirv_get_type_id(compiler, dst->reg.data_type, component_count);
for (i = 0; i < ARRAY_SIZE(src_ids); ++i) src_ids[i] = spirv_compiler_emit_load_src(compiler, &src[i], dst->write_mask); @@ -8111,7 +7956,6 @@ static void spirv_compiler_emit_ftoi(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; uint32_t src_type_id, dst_type_id, condition_type_id; - enum vkd3d_shader_component_type component_type; unsigned int component_count; uint32_t write_mask;
@@ -8143,17 +7987,16 @@ static void spirv_compiler_emit_ftoi(struct spirv_compiler *compiler, val_id = vkd3d_spirv_build_op_glsl_std450_max(builder, src_type_id, src_id, int_min_id);
/* VSIR allows the destination of a signed conversion to be unsigned. */ - component_type = vkd3d_component_type_from_data_type(dst->reg.data_type);
- int_max_id = spirv_compiler_get_constant_vector(compiler, component_type, component_count, INT_MAX); - condition_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); + int_max_id = spirv_compiler_get_constant_vector(compiler, dst->reg.data_type, component_count, INT_MAX); + condition_type_id = spirv_get_type_id(compiler, VSIR_DATA_BOOL, component_count); condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, condition_type_id, val_id, float_max_id);
val_id = vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpConvertFToS, dst_type_id, val_id); val_id = vkd3d_spirv_build_op_select(builder, dst_type_id, condition_id, int_max_id, val_id);
- zero_id = spirv_compiler_get_constant_vector(compiler, component_type, component_count, 0); + zero_id = spirv_compiler_get_constant_vector(compiler, dst->reg.data_type, component_count, 0); condition_id = vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpIsNan, condition_type_id, src_id); val_id = vkd3d_spirv_build_op_select(builder, dst_type_id, condition_id, zero_id, val_id);
@@ -8199,7 +8042,7 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler, val_id = vkd3d_spirv_build_op_glsl_std450_max(builder, src_type_id, src_id, zero_id);
uint_max_id = spirv_compiler_get_constant_uint_vector(compiler, UINT_MAX, component_count); - condition_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); + condition_type_id = spirv_get_type_id(compiler, VSIR_DATA_BOOL, component_count); condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, condition_type_id, val_id, float_max_id);
@@ -8224,7 +8067,7 @@ static void spirv_compiler_emit_dtof(struct spirv_compiler *compiler,
src_id = spirv_compiler_emit_load_src(compiler, src, write_mask);
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, component_count); + type_id = spirv_get_type_id(compiler, VSIR_DATA_F32, component_count); val_id = vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpFConvert, type_id, src_id); if (instruction->flags & VKD3DSI_PRECISE_XYZW) vkd3d_spirv_build_op_decorate(builder, val_id, SpvDecorationNoContraction, NULL, 0); @@ -8239,17 +8082,17 @@ static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *comp struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; - enum vkd3d_shader_component_type component_type; unsigned int i, j, k, src_count, size; + enum vsir_data_type data_type; uint32_t write_mask; SpvOp op;
src_count = instruction->src_count; VKD3D_ASSERT(2 <= src_count && src_count <= ARRAY_SIZE(src_ids));
- component_type = vkd3d_component_type_from_data_type(dst->reg.data_type); - type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); - size = (src[src_count - 1].reg.data_type == VSIR_DATA_U64) ? 0x40 : 0x20; + data_type = dst->reg.data_type; + type_id = spirv_get_type_id(compiler, data_type, 1); + size = data_type_is_64_bit(src[src_count - 1].reg.data_type) ? 0x40 : 0x20; mask_id = spirv_compiler_get_constant_uint(compiler, size - 1); size_id = spirv_compiler_get_constant_uint(compiler, size);
@@ -8274,7 +8117,7 @@ static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *comp for (j = 0; j < src_count; ++j) { src_ids[src_count - j - 1] = spirv_compiler_emit_load_src_with_type(compiler, - &src[j], write_mask, component_type); + &src[j], write_mask, data_type); }
/* In SPIR-V, the last two operands are Offset and Count. */ @@ -8290,7 +8133,7 @@ static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *comp op, type_id, src_ids, src_count); }
- spirv_compiler_emit_store_dst_components(compiler, dst, component_type, constituents); + spirv_compiler_emit_store_dst_components(compiler, dst, data_type, constituents); }
static void spirv_compiler_emit_f16tof32(struct spirv_compiler *compiler, @@ -8305,8 +8148,8 @@ static void spirv_compiler_emit_f16tof32(struct spirv_compiler *compiler, unsigned int i, j;
instr_set_id = vkd3d_spirv_get_glsl_std450_instr_set(builder); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 2); - scalar_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_F32, 2); + scalar_type_id = spirv_get_type_id(compiler, VSIR_DATA_F32, 1);
/* FIXME: Consider a single UnpackHalf2x16 instruction per 2 components. */ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); @@ -8322,8 +8165,7 @@ static void spirv_compiler_emit_f16tof32(struct spirv_compiler *compiler, scalar_type_id, result_id, 0); }
- spirv_compiler_emit_store_dst_components(compiler, - dst, vkd3d_component_type_from_data_type(dst->reg.data_type), components); + spirv_compiler_emit_store_dst_components(compiler, dst, dst->reg.data_type, components); }
static void spirv_compiler_emit_f32tof16(struct spirv_compiler *compiler, @@ -8338,8 +8180,8 @@ static void spirv_compiler_emit_f32tof16(struct spirv_compiler *compiler, unsigned int i, j;
instr_set_id = vkd3d_spirv_get_glsl_std450_instr_set(builder); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 2); - scalar_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_F32, 2); + scalar_type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); zero_id = spirv_compiler_get_constant_float(compiler, 0.0f);
/* FIXME: Consider a single PackHalf2x16 instruction per 2 components. */ @@ -8358,8 +8200,7 @@ static void spirv_compiler_emit_f32tof16(struct spirv_compiler *compiler, instr_set_id, GLSLstd450PackHalf2x16, &src_id, 1); }
- spirv_compiler_emit_store_dst_components(compiler, - dst, vkd3d_component_type_from_data_type(dst->reg.data_type), components); + spirv_compiler_emit_store_dst_components(compiler, dst, dst->reg.data_type, components); }
static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *compiler, @@ -8418,7 +8259,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co src0_id = spirv_compiler_emit_load_src(compiler, &src[0], write_mask); src1_id = spirv_compiler_emit_load_src(compiler, &src[1], write_mask);
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); + type_id = spirv_get_type_id(compiler, VSIR_DATA_BOOL, component_count); result_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, type_id, src0_id, src1_id);
@@ -8470,7 +8311,7 @@ static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compil src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask);
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); + type_id = spirv_get_type_id(compiler, VSIR_DATA_BOOL, component_count); result_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, type_id, src0_id, src1_id);
result_id = spirv_compiler_emit_bool_to_float(compiler, component_count, result_id, false); @@ -8806,7 +8647,7 @@ struct vkd3d_shader_image uint32_t image_id; uint32_t sampled_image_id;
- enum vkd3d_shader_component_type sampled_type; + enum vsir_data_type sampled_type; uint32_t image_type_id; const struct vkd3d_spirv_resource_type *resource_type_info; unsigned int structure_stride; @@ -8926,9 +8767,8 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, image->image_id = 0; }
- image->image_type_id = spirv_compiler_get_image_type_id(compiler, resource_reg, - &symbol->info.resource.range, image->resource_type_info, - image->sampled_type, image->structure_stride || image->raw); + image->image_type_id = spirv_compiler_get_image_type_id(compiler, resource_reg, &symbol->info.resource.range, + image->resource_type_info, image->sampled_type, image->structure_stride || image->raw);
if (sampled) { @@ -8977,8 +8817,7 @@ static uint32_t spirv_compiler_emit_texel_offset(struct spirv_compiler *compiler int32_t data[4] = {offset->u, offset->v, offset->w, 0};
VKD3D_ASSERT(resource_type_info->dim != SpvDimCube); - return spirv_compiler_get_constant(compiler, - VKD3D_SHADER_COMPONENT_INT, component_count, (const uint32_t *)data); + return spirv_compiler_get_constant(compiler, VSIR_DATA_I32, component_count, (const uint32_t *)data); }
static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, @@ -8999,7 +8838,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler,
spirv_compiler_prepare_image(compiler, &image, &src[1].reg, NULL, VKD3D_IMAGE_FLAG_NONE);
- type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); + type_id = spirv_get_type_id(compiler, image.sampled_type, VKD3D_VEC4_SIZE); coordinate_mask = (1u << image.resource_type_info->coordinate_component_count) - 1; coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], coordinate_mask); if (image.resource_type_info->resource_type != VKD3D_SHADER_RESOURCE_BUFFER && !multisample) @@ -9024,8 +8863,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, val_id = vkd3d_spirv_build_op_image_fetch(builder, type_id, image.image_id, coordinate_id, operands_mask, image_operands, image_operand_count);
- spirv_compiler_emit_store_dst_swizzled(compiler, - dst, val_id, image.sampled_type, src[1].swizzle); + spirv_compiler_emit_store_dst_swizzled(compiler, dst, val_id, image.sampled_type, src[1].swizzle); }
static void spirv_compiler_emit_lod(struct spirv_compiler *compiler, @@ -9045,13 +8883,12 @@ static void spirv_compiler_emit_lod(struct spirv_compiler *compiler, spirv_compiler_prepare_image(compiler, &image, &resource->reg, &sampler->reg, VKD3D_IMAGE_FLAG_SAMPLED);
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 2); + type_id = spirv_get_type_id(compiler, VSIR_DATA_F32, 2); coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_ALL); val_id = vkd3d_spirv_build_op_image_query_lod(builder, type_id, image.sampled_image_id, coordinate_id);
- spirv_compiler_emit_store_dst_swizzled(compiler, - dst, val_id, image.sampled_type, resource->swizzle); + spirv_compiler_emit_store_dst_swizzled(compiler, dst, val_id, image.sampled_type, resource->swizzle); }
static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, @@ -9115,14 +8952,13 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, instruction, image.resource_type_info); }
- sampled_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); + sampled_type_id = spirv_get_type_id(compiler, image.sampled_type, VKD3D_VEC4_SIZE); coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_ALL); VKD3D_ASSERT(image_operand_count <= ARRAY_SIZE(image_operands)); val_id = vkd3d_spirv_build_op_image_sample(builder, op, sampled_type_id, image.sampled_image_id, coordinate_id, operands_mask, image_operands, image_operand_count);
- spirv_compiler_emit_store_dst_swizzled(compiler, - dst, val_id, image.sampled_type, resource->swizzle); + spirv_compiler_emit_store_dst_swizzled(compiler, dst, val_id, image.sampled_type, resource->swizzle); }
static void spirv_compiler_emit_sample_c(struct spirv_compiler *compiler, @@ -9160,15 +8996,14 @@ static void spirv_compiler_emit_sample_c(struct spirv_compiler *compiler, instruction, image.resource_type_info); }
- sampled_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, 1); + sampled_type_id = spirv_get_type_id(compiler, image.sampled_type, 1); coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_ALL); dref_id = spirv_compiler_emit_load_src(compiler, &src[3], VKD3DSP_WRITEMASK_0); val_id = vkd3d_spirv_build_op_image_sample_dref(builder, op, sampled_type_id, image.sampled_image_id, coordinate_id, dref_id, operands_mask, image_operands, image_operand_count);
- spirv_compiler_emit_store_dst_scalar(compiler, - dst, val_id, image.sampled_type, src[1].swizzle); + spirv_compiler_emit_store_dst_scalar(compiler, dst, val_id, image.sampled_type, src[1].swizzle); }
static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, @@ -9219,7 +9054,7 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, instruction, image.resource_type_info); }
- sampled_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); + sampled_type_id = spirv_get_type_id(compiler, image.sampled_type, VKD3D_VEC4_SIZE); coordinate_mask = (1u << image.resource_type_info->coordinate_component_count) - 1; coordinate_id = spirv_compiler_emit_load_src(compiler, addr, coordinate_mask); if (image_flags & VKD3D_IMAGE_FLAG_DEPTH) @@ -9234,15 +9069,13 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, { component_idx = vsir_swizzle_get_component(sampler->swizzle, 0); /* Nvidia driver requires signed integer type. */ - component_id = spirv_compiler_get_constant(compiler, - VKD3D_SHADER_COMPONENT_INT, 1, &component_idx); + component_id = spirv_compiler_get_constant(compiler, VSIR_DATA_I32, 1, &component_idx); val_id = vkd3d_spirv_build_op_image_gather(builder, sampled_type_id, image.sampled_image_id, coordinate_id, component_id, operands_mask, image_operands, image_operand_count); }
- spirv_compiler_emit_store_dst_swizzled(compiler, - dst, val_id, image.sampled_type, resource->swizzle); + spirv_compiler_emit_store_dst_swizzled(compiler, dst, val_id, image.sampled_type, resource->swizzle); }
static uint32_t spirv_compiler_emit_raw_structured_addressing( @@ -9301,10 +9134,10 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler
if (storage_buffer_uav) { - texel_type_id = vkd3d_spirv_get_type_id(builder, resource_symbol->info.resource.sampled_type, 1); + texel_type_id = spirv_get_type_id(compiler, resource_symbol->info.resource.sampled_type, 1); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, texel_type_id);
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, resource_symbol->info.resource.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); @@ -9336,11 +9169,11 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler
spirv_compiler_prepare_image(compiler, &image, &resource->reg, NULL, VKD3D_IMAGE_FLAG_NONE);
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0);
- texel_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); + texel_type_id = spirv_get_type_id(compiler, image.sampled_type, VKD3D_VEC4_SIZE); VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) { @@ -9359,7 +9192,7 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler type_id, val_id, 0); } } - spirv_compiler_emit_store_dst_components(compiler, dst, VKD3D_SHADER_COMPONENT_UINT, constituents); + spirv_compiler_emit_store_dst_components(compiler, dst, VSIR_DATA_U32, constituents); }
static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, @@ -9379,7 +9212,7 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, if (!spirv_compiler_get_register_info(compiler, &resource->reg, ®_info)) return;
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info.storage_class, type_id); base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); @@ -9399,7 +9232,7 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, reg_info.id, coordinate_id); constituents[j++] = vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); } - spirv_compiler_emit_store_dst_components(compiler, dst, VKD3D_SHADER_COMPONENT_UINT, constituents); + spirv_compiler_emit_store_dst_components(compiler, dst, VSIR_DATA_U32, constituents); }
static void spirv_compiler_emit_ld_raw_structured(struct spirv_compiler *compiler, @@ -9438,10 +9271,10 @@ static void spirv_compiler_emit_store_uav_raw_structured(struct spirv_compiler *
if (spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource)) { - type_id = vkd3d_spirv_get_type_id(builder, resource_symbol->info.resource.sampled_type, 1); + type_id = spirv_get_type_id(compiler, resource_symbol->info.resource.sampled_type, 1); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id);
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, resource_symbol->info.resource.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); @@ -9469,7 +9302,7 @@ static void spirv_compiler_emit_store_uav_raw_structured(struct spirv_compiler * } else { - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); spirv_compiler_prepare_image(compiler, &image, &dst->reg, NULL, VKD3D_IMAGE_FLAG_NONE); base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); @@ -9482,7 +9315,7 @@ static void spirv_compiler_emit_store_uav_raw_structured(struct spirv_compiler * for (component_idx = 0; component_idx < component_count; ++component_idx) { /* Mesa Vulkan drivers require the texel parameter to be a vector. */ - data_id = spirv_compiler_emit_construct_vector(compiler, VKD3D_SHADER_COMPONENT_UINT, + data_id = spirv_compiler_emit_construct_vector(compiler, VSIR_DATA_U32, VKD3D_VEC4_SIZE, val_id, component_idx, component_count);
coordinate_id = base_coordinate_id; @@ -9512,7 +9345,7 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, if (!spirv_compiler_get_register_info(compiler, &dst->reg, ®_info)) return;
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info.storage_class, type_id); base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); @@ -9570,7 +9403,7 @@ static void spirv_compiler_emit_ld_uav_typed(struct spirv_compiler *compiler,
if (spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource)) { - type_id = vkd3d_spirv_get_type_id(builder, resource_symbol->info.resource.sampled_type, 1); + type_id = spirv_get_type_id(compiler, resource_symbol->info.resource.sampled_type, 1); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_0); indices[0] = spirv_compiler_get_constant_uint(compiler, 0); @@ -9585,15 +9418,14 @@ static void spirv_compiler_emit_ld_uav_typed(struct spirv_compiler *compiler, else { spirv_compiler_prepare_image(compiler, &image, &src[1].reg, NULL, VKD3D_IMAGE_FLAG_NONE); - type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); + type_id = spirv_get_type_id(compiler, image.sampled_type, VKD3D_VEC4_SIZE); coordinate_mask = (1u << image.resource_type_info->coordinate_component_count) - 1; coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], coordinate_mask);
val_id = vkd3d_spirv_build_op_image_read(builder, type_id, image.image_id, coordinate_id, SpvImageOperandsMaskNone, NULL, 0);
- spirv_compiler_emit_store_dst_swizzled(compiler, - dst, val_id, image.sampled_type, src[1].swizzle); + spirv_compiler_emit_store_dst_swizzled(compiler, dst, val_id, image.sampled_type, src[1].swizzle); } }
@@ -9613,7 +9445,7 @@ static void spirv_compiler_emit_store_uav_typed(struct spirv_compiler *compiler,
if (spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource)) { - type_id = vkd3d_spirv_get_type_id(builder, resource_symbol->info.resource.sampled_type, 1); + type_id = spirv_get_type_id(compiler, resource_symbol->info.resource.sampled_type, 1); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_0); indices[0] = spirv_compiler_get_constant_uint(compiler, 0); @@ -9658,7 +9490,7 @@ static void spirv_compiler_emit_uav_counter_instruction(struct spirv_compiler *c counter_id = resource_symbol->info.resource.uav_counter_id; VKD3D_ASSERT(counter_id);
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1);
if (resource_symbol->info.resource.uav_counter_array) { @@ -9766,12 +9598,11 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil const struct vkd3d_shader_src_param *src = instruction->src; const struct vkd3d_symbol *resource_symbol = NULL; uint32_t ptr_type_id, type_id, val_id, result_id; - enum vkd3d_shader_component_type component_type; const struct vkd3d_shader_dst_param *resource; uint32_t coordinate_id, sample_id, pointer_id; struct vkd3d_shader_register_info reg_info; - SpvMemorySemanticsMask memory_semantic; struct vkd3d_shader_image image; + enum vsir_data_type data_type; unsigned int structure_stride; uint32_t coordinate_mask; uint32_t operands[6]; @@ -9820,7 +9651,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil } }
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); if (structure_stride || raw) { VKD3D_ASSERT(!raw != !structure_stride); @@ -9836,7 +9667,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil
if (resource->reg.type == VKD3DSPR_GROUPSHAREDMEM) { - component_type = VKD3D_SHADER_COMPONENT_UINT; + data_type = VSIR_DATA_U32; ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info.storage_class, type_id); pointer_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, reg_info.id, coordinate_id); } @@ -9844,8 +9675,8 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil { if (spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource)) { - component_type = resource_symbol->info.resource.sampled_type; - type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); + data_type = resource_symbol->info.resource.sampled_type; + type_id = spirv_get_type_id(compiler, data_type, 1); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); operands[0] = spirv_compiler_get_constant_uint(compiler, 0); operands[1] = coordinate_id; @@ -9853,8 +9684,8 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil } else { - component_type = image.sampled_type; - type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, 1); + data_type = image.sampled_type; + type_id = spirv_get_type_id(compiler, data_type, 1); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassImage, type_id); sample_id = spirv_compiler_get_constant_uint(compiler, 0); pointer_id = vkd3d_spirv_build_op_image_texel_pointer(builder, @@ -9862,7 +9693,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil } }
- val_id = spirv_compiler_emit_load_src_with_type(compiler, &src[1], VKD3DSP_WRITEMASK_0, component_type); + val_id = spirv_compiler_emit_load_src_with_type(compiler, &src[1], VKD3DSP_WRITEMASK_0, data_type);
if (instruction->flags & VKD3DARF_VOLATILE) { @@ -9871,17 +9702,13 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode); }
- memory_semantic = (instruction->flags & VKD3DARF_SEQ_CST) - ? SpvMemorySemanticsSequentiallyConsistentMask - : SpvMemorySemanticsMaskNone; - operands[i++] = pointer_id; operands[i++] = spirv_compiler_get_constant_uint(compiler, scope); - operands[i++] = spirv_compiler_get_constant_uint(compiler, memory_semantic); + operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone); if (instruction->src_count >= 3) { - operands[i++] = spirv_compiler_get_constant_uint(compiler, memory_semantic); - operands[i++] = spirv_compiler_emit_load_src_with_type(compiler, &src[2], VKD3DSP_WRITEMASK_0, component_type); + operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone); + operands[i++] = spirv_compiler_emit_load_src_with_type(compiler, &src[2], VKD3DSP_WRITEMASK_0, data_type); } operands[i++] = val_id; result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream, @@ -9907,7 +9734,7 @@ static void spirv_compiler_emit_bufinfo(struct spirv_compiler *compiler, { resource_symbol = spirv_compiler_find_resource(compiler, &src->reg);
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); val_id = vkd3d_spirv_build_op_array_length(builder, type_id, resource_symbol->id, 0); write_mask = VKD3DSP_WRITEMASK_0; } @@ -9917,7 +9744,7 @@ static void spirv_compiler_emit_bufinfo(struct spirv_compiler *compiler,
spirv_compiler_prepare_image(compiler, &image, &src->reg, NULL, VKD3D_IMAGE_FLAG_NONE);
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); val_id = vkd3d_spirv_build_op_image_query_size(builder, type_id, image.image_id); write_mask = VKD3DSP_WRITEMASK_0; } @@ -9927,7 +9754,7 @@ static void spirv_compiler_emit_bufinfo(struct spirv_compiler *compiler, stride_id = spirv_compiler_get_constant_uint(compiler, image.structure_stride); constituents[0] = vkd3d_spirv_build_op_udiv(builder, type_id, val_id, stride_id); constituents[1] = stride_id; - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, ARRAY_SIZE(constituents)); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, ARRAY_SIZE(constituents)); val_id = vkd3d_spirv_build_op_composite_construct(builder, type_id, constituents, ARRAY_SIZE(constituents)); write_mask |= VKD3DSP_WRITEMASK_1; @@ -9939,18 +9766,18 @@ static void spirv_compiler_emit_bufinfo(struct spirv_compiler *compiler, }
val_id = spirv_compiler_emit_swizzle(compiler, val_id, write_mask, - VKD3D_SHADER_COMPONENT_UINT, src->swizzle, dst->write_mask); + VSIR_DATA_U32, src->swizzle, dst->write_mask); spirv_compiler_emit_store_dst(compiler, dst, val_id); }
static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { - enum vkd3d_shader_component_type component_type = VKD3D_SHADER_COMPONENT_UINT; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; uint32_t type_id, lod_id, val_id, miplevel_count_id; + enum vsir_data_type data_type = VSIR_DATA_U32; uint32_t constituents[VKD3D_VEC4_SIZE]; unsigned int i, size_component_count; struct vkd3d_shader_image image; @@ -9966,14 +9793,14 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, size_component_count = image.resource_type_info->coordinate_component_count; if (image.resource_type_info->dim == SpvDimCube) --size_component_count; - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, size_component_count); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, size_component_count);
supports_mipmaps = src[1].reg.type != VKD3DSPR_UAV && !image.resource_type_info->ms; if (supports_mipmaps) { lod_id = spirv_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_0); val_id = vkd3d_spirv_build_op_image_query_size_lod(builder, type_id, image.image_id, lod_id); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); miplevel_count_id = vkd3d_spirv_build_op_image_query_levels(builder, type_id, image.image_id); } else @@ -9987,20 +9814,19 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, for (i = 0; i < 3 - size_component_count; ++i) constituents[i + 1] = spirv_compiler_get_constant_uint(compiler, 0); constituents[i + 1] = miplevel_count_id; - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, VKD3D_VEC4_SIZE); - val_id = vkd3d_spirv_build_op_composite_construct(builder, - type_id, constituents, i + 2); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, VKD3D_VEC4_SIZE); + val_id = vkd3d_spirv_build_op_composite_construct(builder, type_id, constituents, i + 2);
if (!(instruction->flags & VKD3DSI_RESINFO_UINT)) { - component_type = VKD3D_SHADER_COMPONENT_FLOAT; - type_id = vkd3d_spirv_get_type_id(builder, component_type, VKD3D_VEC4_SIZE); + data_type = VSIR_DATA_F32; + type_id = spirv_get_type_id(compiler, data_type, VKD3D_VEC4_SIZE); val_id = vkd3d_spirv_build_op_convert_utof(builder, type_id, val_id); if (instruction->flags & VKD3DSI_PRECISE_XYZW) vkd3d_spirv_build_op_decorate(builder, val_id, SpvDecorationNoContraction, NULL, 0); } - val_id = spirv_compiler_emit_swizzle(compiler, val_id, VKD3DSP_WRITEMASK_ALL, - component_type, src[1].swizzle, dst->write_mask); + val_id = spirv_compiler_emit_swizzle(compiler, val_id, + VKD3DSP_WRITEMASK_ALL, data_type, src[1].swizzle, dst->write_mask);
spirv_compiler_emit_store_dst(compiler, dst, val_id); } @@ -10022,7 +9848,7 @@ static uint32_t spirv_compiler_emit_query_sample_count(struct spirv_compiler *co vkd3d_spirv_enable_capability(builder, SpvCapabilityImageQuery);
spirv_compiler_prepare_image(compiler, &image, &src->reg, NULL, VKD3D_IMAGE_FLAG_NONE); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); val_id = vkd3d_spirv_build_op_image_query_samples(builder, type_id, image.image_id); }
@@ -10032,10 +9858,10 @@ static uint32_t spirv_compiler_emit_query_sample_count(struct spirv_compiler *co static void spirv_compiler_emit_sample_info(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { - enum vkd3d_shader_component_type component_type = VKD3D_SHADER_COMPONENT_UINT; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; + enum vsir_data_type data_type = VSIR_DATA_U32; uint32_t constituents[VKD3D_VEC4_SIZE]; uint32_t type_id, val_id; unsigned int i; @@ -10048,20 +9874,22 @@ static void spirv_compiler_emit_sample_info(struct spirv_compiler *compiler,
constituents[0] = val_id; for (i = 1; i < VKD3D_VEC4_SIZE; ++i) + { constituents[i] = spirv_compiler_get_constant_uint(compiler, 0); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, VKD3D_VEC4_SIZE); + } + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, VKD3D_VEC4_SIZE); val_id = vkd3d_spirv_build_op_composite_construct(builder, type_id, constituents, VKD3D_VEC4_SIZE);
if (!(instruction->flags & VKD3DSI_SAMPLE_INFO_UINT)) { - component_type = VKD3D_SHADER_COMPONENT_FLOAT; - type_id = vkd3d_spirv_get_type_id(builder, component_type, VKD3D_VEC4_SIZE); + data_type = VSIR_DATA_F32; + type_id = spirv_get_type_id(compiler, data_type, VKD3D_VEC4_SIZE); val_id = vkd3d_spirv_build_op_convert_utof(builder, type_id, val_id); if (instruction->flags & VKD3DSI_PRECISE_XYZW) vkd3d_spirv_build_op_decorate(builder, val_id, SpvDecorationNoContraction, NULL, 0); } - val_id = spirv_compiler_emit_swizzle(compiler, val_id, VKD3DSP_WRITEMASK_ALL, - component_type, src->swizzle, dst->write_mask); + val_id = spirv_compiler_emit_swizzle(compiler, val_id, + VKD3DSP_WRITEMASK_ALL, data_type, src->swizzle, dst->write_mask);
spirv_compiler_emit_store_dst(compiler, dst, val_id); } @@ -10121,13 +9949,12 @@ static void spirv_compiler_emit_sample_position(struct spirv_compiler *compiler, sample_count_id = spirv_compiler_emit_query_sample_count(compiler, &instruction->src[0]); sample_index_id = spirv_compiler_emit_load_src(compiler, &instruction->src[1], VKD3DSP_WRITEMASK_0);
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); index_id = vkd3d_spirv_build_op_iadd(builder, type_id, sample_count_id, sample_index_id); - index_id = vkd3d_spirv_build_op_isub(builder, - type_id, index_id, spirv_compiler_get_constant_uint(compiler, 1)); + index_id = vkd3d_spirv_build_op_isub(builder, type_id, index_id, spirv_compiler_get_constant_uint(compiler, 1));
/* Validate sample index. */ - bool_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, 1); + bool_id = spirv_get_type_id(compiler, VSIR_DATA_BOOL, 1); id = vkd3d_spirv_build_op_logical_and(builder, bool_id, vkd3d_spirv_build_op_uless_than(builder, bool_id, sample_index_id, sample_count_id), vkd3d_spirv_build_op_uless_than_equal(builder, @@ -10135,16 +9962,16 @@ static void spirv_compiler_emit_sample_position(struct spirv_compiler *compiler, index_id = vkd3d_spirv_build_op_select(builder, type_id, id, index_id, spirv_compiler_get_constant_uint(compiler, 0));
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 2); + type_id = spirv_get_type_id(compiler, VSIR_DATA_F32, 2); if (!(id = compiler->sample_positions_id)) { length_id = spirv_compiler_get_constant_uint(compiler, ARRAY_SIZE(standard_sample_positions)); array_type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id);
- for (i = 0; i < ARRAY_SIZE(standard_sample_positions); ++ i) + for (i = 0; i < ARRAY_SIZE(standard_sample_positions); ++i) { - constituents[i] = spirv_compiler_get_constant(compiler, - VKD3D_SHADER_COMPONENT_FLOAT, 2, (const uint32_t *)standard_sample_positions[i]); + constituents[i] = spirv_compiler_get_constant(compiler, VSIR_DATA_F32, + 2, (const uint32_t *)standard_sample_positions[i]); }
id = vkd3d_spirv_build_op_constant_composite(builder, array_type_id, constituents, ARRAY_SIZE(constituents)); @@ -10159,7 +9986,7 @@ static void spirv_compiler_emit_sample_position(struct spirv_compiler *compiler, id = vkd3d_spirv_build_op_load(builder, type_id, id, SpvMemoryAccessMaskNone);
id = spirv_compiler_emit_swizzle(compiler, id, VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1, - VKD3D_SHADER_COMPONENT_FLOAT, instruction->src[0].swizzle, dst->write_mask); + VSIR_DATA_F32, instruction->src[0].swizzle, dst->write_mask); spirv_compiler_emit_store_dst(compiler, dst, id); }
@@ -10199,14 +10026,14 @@ static void spirv_compiler_emit_eval_attrib(struct spirv_compiler *compiler, src_ids[src_count++] = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); }
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, + type_id = spirv_get_type_id(compiler, VSIR_DATA_F32, vsir_write_mask_component_count(register_info.write_mask));
instr_set_id = vkd3d_spirv_get_glsl_std450_instr_set(builder); val_id = vkd3d_spirv_build_op_ext_inst(builder, type_id, instr_set_id, op, src_ids, src_count);
val_id = spirv_compiler_emit_swizzle(compiler, val_id, register_info.write_mask, - VKD3D_SHADER_COMPONENT_FLOAT, src[0].swizzle, dst->write_mask); + VSIR_DATA_F32, src[0].swizzle, dst->write_mask);
spirv_compiler_emit_store_dst(compiler, dst, val_id); } @@ -10334,9 +10161,8 @@ static void spirv_compiler_emit_quad_read_across(struct spirv_compiler *compiler const struct vkd3d_shader_src_param *src = instruction->src; uint32_t type_id, direction_type_id, direction_id, val_id;
- type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, - vsir_write_mask_component_count(dst->write_mask)); - direction_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, VSIR_DATA_U32, 1); + type_id = spirv_get_type_id(compiler, dst->reg.data_type, vsir_write_mask_component_count(dst->write_mask)); + direction_type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); direction_id = map_quad_read_across_direction(instruction->opcode); direction_id = vkd3d_spirv_get_op_constant(builder, direction_type_id, direction_id); @@ -10355,14 +10181,12 @@ static void spirv_compiler_emit_quad_read_lane_at(struct spirv_compiler *compile
if (!register_is_constant_or_undef(&src[1].reg)) { - FIXME("Unsupported non-constant quad read lane index.\n"); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, "Non-constant quad read lane indices are not supported."); return; }
- type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, - vsir_write_mask_component_count(dst->write_mask)); + type_id = spirv_get_type_id(compiler, dst->reg.data_type, vsir_write_mask_component_count(dst->write_mask)); val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); lane_id = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); val_id = vkd3d_spirv_build_op_group_nonuniform_quad_broadcast(builder, type_id, val_id, lane_id); @@ -10411,7 +10235,7 @@ static uint32_t spirv_compiler_emit_group_nonuniform_ballot(struct spirv_compile struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t type_id, val_id;
- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, VKD3D_VEC4_SIZE); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, VKD3D_VEC4_SIZE); val_id = spirv_compiler_emit_load_src(compiler, src, VKD3DSP_WRITEMASK_0); val_id = vkd3d_spirv_build_op_group_nonuniform_ballot(builder, type_id, val_id);
@@ -10470,8 +10294,7 @@ static void spirv_compiler_emit_wave_alu_op(struct spirv_compiler *compiler,
op = map_wave_alu_op(instruction->opcode, data_type_is_floating_point(src->reg.data_type));
- type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, - vsir_write_mask_component_count(dst->write_mask)); + type_id = spirv_get_type_id(compiler, dst->reg.data_type, vsir_write_mask_component_count(dst->write_mask)); val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask);
vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformArithmetic); @@ -10495,7 +10318,7 @@ static void spirv_compiler_emit_wave_bit_count(struct spirv_compiler *compiler, : SpvGroupOperationReduce;
val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(compiler, VSIR_DATA_U32, 1); val_id = vkd3d_spirv_build_op_group_nonuniform_ballot_bit_count(builder, type_id, group_op, val_id);
spirv_compiler_emit_store_dst(compiler, dst, val_id); @@ -10520,8 +10343,7 @@ static void spirv_compiler_emit_wave_read_lane_at(struct spirv_compiler *compile const struct vkd3d_shader_src_param *src = instruction->src; uint32_t type_id, lane_id, val_id;
- type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, - vsir_write_mask_component_count(dst->write_mask)); + type_id = spirv_get_type_id(compiler, dst->reg.data_type, vsir_write_mask_component_count(dst->write_mask)); val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); lane_id = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0);
@@ -10548,8 +10370,7 @@ static void spirv_compiler_emit_wave_read_lane_first(struct spirv_compiler *comp const struct vkd3d_shader_src_param *src = instruction->src; uint32_t type_id, val_id;
- type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, - vsir_write_mask_component_count(dst->write_mask)); + type_id = spirv_get_type_id(compiler, dst->reg.data_type, vsir_write_mask_component_count(dst->write_mask)); val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); val_id = vkd3d_spirv_build_op_group_nonuniform_broadcast_first(builder, type_id, val_id);
@@ -10593,9 +10414,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VSIR_OP_DCL_INDEXABLE_TEMP: spirv_compiler_emit_dcl_indexable_temp(compiler, instruction); break; - case VSIR_OP_DCL_IMMEDIATE_CONSTANT_BUFFER: - spirv_compiler_emit_dcl_immediate_constant_buffer(compiler, instruction); - break; case VSIR_OP_DCL_TGSM_RAW: spirv_compiler_emit_dcl_tgsm_raw(compiler, instruction); break; @@ -10608,12 +10426,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VSIR_OP_DCL_VERTICES_OUT: spirv_compiler_emit_output_vertex_count(compiler, instruction); break; - case VSIR_OP_DCL_INPUT_PRIMITIVE: - spirv_compiler_emit_dcl_input_primitive(compiler, instruction); - break; - case VSIR_OP_DCL_OUTPUT_TOPOLOGY: - spirv_compiler_emit_dcl_output_topology(compiler, instruction); - break; case VSIR_OP_DCL_GS_INSTANCES: spirv_compiler_emit_dcl_gs_instances(compiler, instruction); break; @@ -10668,6 +10480,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VSIR_OP_ITOF: case VSIR_OP_ITOI: case VSIR_OP_MUL: + case VSIR_OP_NEG: case VSIR_OP_NOT: case VSIR_OP_OR: case VSIR_OP_UDIV_SIMPLE: @@ -10682,6 +10495,9 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VSIR_OP_ISFINITE: spirv_compiler_emit_isfinite(compiler, instruction); break; + case VSIR_OP_SATURATE: + spirv_compiler_emit_saturate(compiler, instruction); + break; case VSIR_OP_ABS: case VSIR_OP_ACOS: case VSIR_OP_ASIN: @@ -11044,6 +10860,36 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c } }
+static void spirv_compiler_emit_immediate_constant_buffers(struct spirv_compiler *compiler) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_immediate_constant_buffer *icb; + const struct vsir_program *program = compiler->program; + uint32_t type_id, const_id, ptr_type_id, icb_id; + struct vkd3d_shader_register reg; + struct vkd3d_symbol reg_symbol; + size_t i; + + for (i = 0; i < program->icb_count; ++i) + { + icb = program->icbs[i]; + + const_id = spirv_compiler_emit_constant_array(compiler, icb, &type_id); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); + icb_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, + ptr_type_id, SpvStorageClassPrivate, const_id); + vkd3d_spirv_build_op_name(builder, icb_id, "icb%zu", icb->register_idx); + + /* Set an index count of 2 so vkd3d_symbol_make_register() uses idx[0] as a buffer id. */ + vsir_register_init(®, VKD3DSPR_IMMCONSTBUFFER, VSIR_DATA_F32, 2); + reg.idx[0].offset = icb->register_idx; + vkd3d_symbol_make_register(®_symbol, ®); + vkd3d_symbol_set_register_info(®_symbol, icb_id, SpvStorageClassPrivate, + icb->data_type, vkd3d_write_mask_from_component_count(icb->component_count)); + spirv_compiler_put_symbol(compiler, ®_symbol); + } +} + static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *spirv) { @@ -11066,10 +10912,18 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, if (program->ssa_count) spirv_compiler_allocate_ssa_register_ids(compiler, program->ssa_count); if (compiler->shader_type == VKD3D_SHADER_TYPE_COMPUTE) + { spirv_compiler_emit_thread_group_size(compiler, &program->thread_group_size); + } + else if (compiler->shader_type == VKD3D_SHADER_TYPE_GEOMETRY) + { + spirv_compiler_emit_input_primitive(compiler); + spirv_compiler_emit_output_topology(compiler); + } spirv_compiler_emit_global_flags(compiler, program->global_flags);
spirv_compiler_emit_descriptor_declarations(compiler); + spirv_compiler_emit_immediate_constant_buffers(compiler);
compiler->spirv_parameter_info = vkd3d_calloc(program->parameter_count, sizeof(*compiler->spirv_parameter_info)); for (i = 0; i < program->parameter_count; ++i) @@ -11080,8 +10934,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, { uint32_t type_id, struct_id, ptr_type_id, var_id;
- type_id = vkd3d_spirv_get_type_id(builder, - vkd3d_component_type_from_data_type(parameter_data_type_map[parameter->data_type].type), + type_id = spirv_get_type_id(compiler, parameter_data_type_map[parameter->data_type].type, parameter_data_type_map[parameter->data_type].component_count);
struct_id = vkd3d_spirv_build_op_type_struct(builder, &type_id, 1); @@ -11188,7 +11041,11 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, }
if (compiler->failed) + { + vkd3d_shader_free_shader_code(spirv); + return VKD3D_ERROR_INVALID_SHADER; + }
if (compile_info->target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT) { @@ -11215,6 +11072,7 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags,
VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); VKD3D_ASSERT(program->has_descriptor_info); + VKD3D_ASSERT(program->has_no_modifiers);
if (!(spirv_compiler = spirv_compiler_create(program, compile_info, message_context, config_flags))) diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index ea15c1a9ad5..4798a75ce90 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -665,6 +665,8 @@ struct vkd3d_shader_sm4_parser { const uint32_t *start, *end, *ptr;
+ struct vsir_program *program; + enum vkd3d_shader_opcode phase; bool has_control_point_phase; unsigned int input_register_masks[MAX_REG_OUTPUT]; @@ -764,7 +766,7 @@ static const enum vsir_data_type data_type_table[] =
static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) { - const struct vkd3d_shader_version *version = &sm4->p.program->shader_version; + const struct vkd3d_shader_version *version = &sm4->program->shader_version;
return version->major >= 5 && version->minor >= 1; } @@ -849,7 +851,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui icb->element_count = icb_size / VKD3D_VEC4_SIZE; icb->is_null = false; memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); - shader_instruction_array_add_icb(&priv->p.program->instructions, icb); + vsir_program_add_icb(priv->program, icb); ins->declaration.icb = icb; }
@@ -971,7 +973,7 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) { struct vkd3d_shader_index_range *index_range = &ins->declaration.index_range; - struct vsir_program *program = priv->p.program; + struct vsir_program *program = priv->program; unsigned int i, register_idx, register_count; const struct shader_signature *signature; enum vkd3d_shader_register_type type; @@ -1094,14 +1096,14 @@ static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) FIXME("Unhandled output primitive type %#x.\n", primitive_type);
- priv->p.program->output_topology = ins->declaration.primitive_type.type; + priv->program->output_topology = ins->declaration.primitive_type.type; }
static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) { enum vkd3d_sm4_input_primitive_type primitive_type; - struct vsir_program *program = sm4->p.program; + struct vsir_program *program = sm4->program;
primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) @@ -1129,7 +1131,7 @@ static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) { - struct vsir_program *program = sm4->p.program; + struct vsir_program *program = sm4->program;
ins->declaration.count = *tokens; if (opcode == VKD3D_SM4_OP_DCL_TEMPS) @@ -1161,7 +1163,7 @@ static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, u if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VSIR_DATA_F32, dst)) { struct signature_element *e = vsir_signature_find_element_for_reg( - &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + &priv->program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask);
if (!e) { @@ -1187,7 +1189,7 @@ static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *in if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VSIR_DATA_F32, dst)) { struct signature_element *e = vsir_signature_find_element_for_reg( - &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + &priv->program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask);
if (!e) { @@ -1220,7 +1222,7 @@ static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *in uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) { ins->declaration.global_flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; - sm4->p.program->global_flags = ins->declaration.global_flags; + sm4->program->global_flags = ins->declaration.global_flags; }
static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, @@ -1256,7 +1258,7 @@ static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) { - struct vsir_program *program = sm4->p.program; + struct vsir_program *program = sm4->program;
ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; @@ -1272,7 +1274,7 @@ static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instructi { ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; - priv->p.program->tess_domain = ins->declaration.tessellator_domain; + priv->program->tess_domain = ins->declaration.tessellator_domain; }
static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1280,7 +1282,7 @@ static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_ins { ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; - priv->p.program->tess_partitioning = ins->declaration.tessellator_partitioning; + priv->program->tess_partitioning = ins->declaration.tessellator_partitioning; }
static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1288,7 +1290,7 @@ static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader { ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; - priv->p.program->tess_output_primitive = ins->declaration.tessellator_output_primitive; + priv->program->tess_output_primitive = ins->declaration.tessellator_output_primitive; }
static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1300,7 +1302,7 @@ static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instructio static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) { - struct vsir_program *program = sm4->p.program; + struct vsir_program *program = sm4->program;
ins->declaration.thread_group_size.x = *tokens++; ins->declaration.thread_group_size.y = *tokens++; @@ -2009,7 +2011,7 @@ static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const { if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) { - struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(priv->p.program, 1); + struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(priv->program, 1);
if (!(reg_idx->rel_addr = rel_addr)) { @@ -2284,7 +2286,7 @@ static bool register_is_control_point_input(const struct vkd3d_shader_register * { return reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT || (reg->type == VKD3DSPR_INPUT && (priv->phase == VSIR_OP_HS_CONTROL_POINT_PHASE - || priv->p.program->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY)); + || priv->program->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY)); }
static uint32_t mask_from_swizzle(uint32_t swizzle) @@ -2608,8 +2610,8 @@ static void shader_sm4_read_instruction_modifier(uint32_t modifier, struct vkd3d static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_instruction *ins) { const struct vkd3d_sm4_opcode_info *opcode_info; - struct vsir_program *program = sm4->p.program; uint32_t opcode_token, opcode, previous_token; + struct vsir_program *program = sm4->program; struct vkd3d_shader_dst_param *dst_params; struct vkd3d_shader_src_param *src_params; const uint32_t **ptr = &sm4->ptr; @@ -2814,8 +2816,9 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) return false; - vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); + vkd3d_shader_parser_init(&sm4->p, message_context, compile_info->source_name); sm4->ptr = sm4->start; + sm4->program = program;
init_sm4_lookup_tables(&sm4->lookup);
@@ -2973,13 +2976,13 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con } } if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL - && !sm4.has_control_point_phase && !sm4.p.failed) + && !sm4.has_control_point_phase && sm4.p.status >= 0) shader_sm4_validate_default_phase_index_ranges(&sm4);
- if (sm4.p.failed) + if (sm4.p.status < 0) { vsir_program_cleanup(program); - return VKD3D_ERROR_INVALID_SHADER; + return sm4.p.status; }
return VKD3D_OK; @@ -3480,7 +3483,9 @@ static uint32_t sm4_encode_register(const struct tpf_compiler *tpf, const struct switch (sm4_swizzle_type) { case VKD3D_SM4_SWIZZLE_NONE: - VKD3D_ASSERT(sm4_swizzle || register_is_constant(reg)); + if (register_is_constant(reg)) + break; + VKD3D_ASSERT(sm4_swizzle); token |= (sm4_swizzle << VKD3D_SM4_WRITEMASK_SHIFT) & VKD3D_SM4_WRITEMASK_MASK; break;
@@ -3692,7 +3697,7 @@ static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct s static void tpf_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) { const struct vkd3d_shader_constant_buffer *cb = &ins->declaration.cb; - size_t size = (cb->size + 3) / 4; + size_t size = cb->size / VKD3D_VEC4_SIZE / sizeof(float);
struct sm4_instruction instr = { @@ -3869,8 +3874,11 @@ static void tpf_dcl_texture(const struct tpf_compiler *tpf, const struct vkd3d_s
if (ins->opcode == VSIR_OP_DCL || ins->opcode == VSIR_OP_DCL_UAV_TYPED) { + enum vkd3d_sm4_resource_type resource_type = sm4_resource_dimension(ins->declaration.semantic.resource_type); + instr.idx[0] = pack_resource_data_type(ins->declaration.semantic.resource_data_type); instr.idx_count = 1; + instr.extra_bits |= resource_type << VKD3D_SM4_RESOURCE_TYPE_SHIFT; instr.extra_bits |= ins->declaration.semantic.sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; resource = &ins->declaration.semantic.resource; } @@ -3905,8 +3913,6 @@ static void tpf_dcl_texture(const struct tpf_compiler *tpf, const struct vkd3d_s if (uav) instr.extra_bits |= ins->flags << VKD3D_SM5_UAV_FLAGS_SHIFT;
- instr.extra_bits |= (sm4_resource_dimension(ins->resource_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); - write_sm4_instruction(tpf, &instr); }
@@ -4164,6 +4170,9 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ { switch (ins->opcode) { + case VSIR_OP_NOP: + break; + case VSIR_OP_DCL_CONSTANT_BUFFER: tpf_dcl_constant_buffer(tpf, ins); break; @@ -4222,6 +4231,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
case VSIR_OP_DCL: case VSIR_OP_DCL_RESOURCE_RAW: + case VSIR_OP_DCL_RESOURCE_STRUCTURED: case VSIR_OP_DCL_UAV_RAW: case VSIR_OP_DCL_UAV_STRUCTURED: case VSIR_OP_DCL_UAV_TYPED: @@ -4242,6 +4252,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VSIR_OP_BREAK: case VSIR_OP_CASE: case VSIR_OP_CONTINUE: + case VSIR_OP_COUNTBITS: case VSIR_OP_CUT: case VSIR_OP_CUT_STREAM: case VSIR_OP_DCL_STREAM: @@ -4267,6 +4278,9 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VSIR_OP_EXP: case VSIR_OP_F16TOF32: case VSIR_OP_F32TOF16: + case VSIR_OP_FIRSTBIT_HI: + case VSIR_OP_FIRSTBIT_LO: + case VSIR_OP_FIRSTBIT_SHI: case VSIR_OP_FRC: case VSIR_OP_FTOI: case VSIR_OP_FTOU: @@ -4305,6 +4319,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VSIR_OP_LD: case VSIR_OP_LD2DMS: case VSIR_OP_LD_RAW: + case VSIR_OP_LD_STRUCTURED: case VSIR_OP_LD_UAV_TYPED: case VSIR_OP_LOG: case VSIR_OP_LOOP: @@ -4508,7 +4523,8 @@ static void tpf_write_section(struct tpf_compiler *tpf, uint32_t tag, const stru add_section(tpf, tag, &buffer); }
-int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_code *rdef, +int tpf_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *rdef, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { enum vkd3d_shader_type shader_type = program->shader_version.type; @@ -4517,6 +4533,9 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struc size_t i; int ret;
+ if ((ret = vsir_program_optimize(program, config_flags, compile_info, message_context))) + return ret; + if ((ret = vsir_allocate_temp_registers(program, message_context))) return ret;
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index 5fcc836aae1..ee113f57736 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -266,7 +266,7 @@ void vkd3d_shader_trace_text_(const char *text, size_t size, const char *functio q = end; else ++q; - vkd3d_dbg_printf(VKD3D_DBG_LEVEL_TRACE, function, "%.*s", (int)(q - p), p); + vkd3d_dbg_printf(VKD3D_DEBUG_ENV_NAME, VKD3D_DBG_LEVEL_TRACE, function, "%.*s", (int)(q - p), p); } }
@@ -722,14 +722,13 @@ uint64_t vkd3d_shader_init_config_flags(void) return config_flags; }
-void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vsir_program *program, +void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vkd3d_shader_message_context *message_context, const char *source_name) { parser->message_context = message_context; parser->location.source_name = source_name; parser->location.line = 1; parser->location.column = 0; - parser->program = program; }
void VKD3D_PRINTF_FUNC(3, 4) vkd3d_shader_parser_error(struct vkd3d_shader_parser *parser, @@ -741,7 +740,32 @@ void VKD3D_PRINTF_FUNC(3, 4) vkd3d_shader_parser_error(struct vkd3d_shader_parse vkd3d_shader_verror(parser->message_context, &parser->location, error, format, args); va_end(args);
- parser->failed = true; + if (parser->status >= 0) + { + switch (error) + { + case VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED: + case VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED: + case VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED: + case VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED: + case VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED: + case VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED: + parser->status = VKD3D_ERROR_NOT_IMPLEMENTED; + break; + case VKD3D_SHADER_ERROR_DXBC_OUT_OF_MEMORY: + case VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY: + case VKD3D_SHADER_ERROR_SPV_OUT_OF_MEMORY: + case VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY: + case VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY: + case VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY: + case VKD3D_SHADER_ERROR_FX_OUT_OF_MEMORY: + parser->status = VKD3D_ERROR_OUT_OF_MEMORY; + break; + default: + parser->status = VKD3D_ERROR_INVALID_SHADER; + break; + } + } }
void VKD3D_PRINTF_FUNC(3, 4) vkd3d_shader_parser_warning(struct vkd3d_shader_parser *parser, @@ -924,6 +948,7 @@ static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_sig struct vkd3d_shader_scan_context { const struct vkd3d_shader_version *version; + const struct vkd3d_shader_d3dbc_source_info *d3dbc_source_info;
struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; size_t descriptors_size; @@ -991,6 +1016,7 @@ static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *con context->location.line = 2; /* Line 1 is the version token. */ context->api_version = VKD3D_SHADER_API_VERSION_1_2; context->combined_sampler_info = combined_sampler_info; + context->d3dbc_source_info = vkd3d_find_struct(compile_info->next, D3DBC_SOURCE_INFO);
for (i = 0; i < compile_info->option_count; ++i) { @@ -1192,15 +1218,6 @@ static void vkd3d_shader_scan_sampler_declaration(struct vkd3d_shader_scan_conte d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; }
-static void vkd3d_shader_scan_combined_sampler_declaration( - struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_semantic *semantic) -{ - vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &semantic->resource.reg.reg, - &semantic->resource.range, VKD3D_SHADER_RESOURCE_NONE, VSIR_DATA_UNUSED); - vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, &semantic->resource.reg.reg, - &semantic->resource.range, semantic->resource_type, VSIR_DATA_F32); -} - const struct vkd3d_shader_descriptor_info1 *vkd3d_shader_find_descriptor( const struct vkd3d_shader_scan_descriptor_info1 *info, enum vkd3d_shader_descriptor_type type, unsigned int register_id) @@ -1286,40 +1303,6 @@ static void vkd3d_shader_scan_combined_sampler_usage(struct vkd3d_shader_scan_co s->sampler_index = sampler_idx; }
-static void vkd3d_shader_scan_sample_instruction(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_register *resource, const struct vkd3d_shader_register *sampler) -{ - unsigned int resource_idx = resource->idx[0].offset; - unsigned int sampler_idx = sampler->idx[0].offset; - - vkd3d_shader_scan_combined_sampler_usage(context, resource, sampler); - - if (!context->scan_descriptor_info) - return; - - /* Sample instructions lowered from 1.x texture instructions have no - * DCL, so we need to add the resource if it didn't already exist. - * Such descriptors have a fixed count, type, etc. */ - - if (!vkd3d_shader_find_descriptor(context->scan_descriptor_info, - VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_idx)) - { - struct vkd3d_shader_register_range range = {.first = resource_idx, .last = resource_idx}; - - vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource, - &range, VKD3D_SHADER_RESOURCE_TEXTURE_2D, VSIR_DATA_F32); - } - - if (!vkd3d_shader_find_descriptor(context->scan_descriptor_info, - VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_idx)) - { - struct vkd3d_shader_register_range range = {.first = sampler_idx, .last = sampler_idx}; - - vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, resource, - &range, VKD3D_SHADER_RESOURCE_NONE, VSIR_DATA_UNUSED); - } -} - static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type, enum vsir_data_type resource_data_type, unsigned int sample_count, @@ -1377,14 +1360,9 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte vkd3d_shader_scan_sampler_declaration(context, instruction); break; case VSIR_OP_DCL: - if (instruction->declaration.semantic.resource_type == VKD3D_SHADER_RESOURCE_NONE) + if (instruction->declaration.semantic.resource_type == VKD3D_SHADER_RESOURCE_NONE + || instruction->declaration.semantic.resource.reg.reg.type == VKD3DSPR_COMBINED_SAMPLER) break; - - if (instruction->declaration.semantic.resource.reg.reg.type == VKD3DSPR_COMBINED_SAMPLER) - { - vkd3d_shader_scan_combined_sampler_declaration(context, &instruction->declaration.semantic); - break; - } /* fall through */ case VSIR_OP_DCL_UAV_TYPED: vkd3d_shader_scan_typed_resource_declaration(context, instruction); @@ -1523,9 +1501,6 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte if (context->cf_info_count) context->cf_info[context->cf_info_count - 1].inside_block = false; break; - case VSIR_OP_TEX: - case VSIR_OP_TEXBEM: - case VSIR_OP_TEXBEML: case VSIR_OP_TEXDP3TEX: case VSIR_OP_TEXM3x2TEX: case VSIR_OP_TEXM3x3SPEC: @@ -1539,6 +1514,7 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte break; case VSIR_OP_GATHER4: case VSIR_OP_GATHER4_C: + case VSIR_OP_SAMPLE: case VSIR_OP_SAMPLE_B: case VSIR_OP_SAMPLE_C: case VSIR_OP_SAMPLE_C_LZ: @@ -1546,9 +1522,6 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte case VSIR_OP_SAMPLE_LOD: vkd3d_shader_scan_combined_sampler_usage(context, &instruction->src[1].reg, &instruction->src[2].reg); break; - case VSIR_OP_SAMPLE: - vkd3d_shader_scan_sample_instruction(context, &instruction->src[1].reg, &instruction->src[2].reg); - break; case VSIR_OP_GATHER4_PO: case VSIR_OP_GATHER4_PO_C: vkd3d_shader_scan_combined_sampler_usage(context, &instruction->src[2].reg, &instruction->src[3].reg); @@ -1683,12 +1656,12 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; struct vkd3d_shader_scan_hull_shader_tessellation_info *tessellation_info; + struct vkd3d_shader_scan_thread_group_size_info *thread_group_size_info; struct vkd3d_shader_scan_descriptor_info *descriptor_info; struct vkd3d_shader_scan_signature_info *signature_info; struct vkd3d_shader_scan_context context; struct vkd3d_shader_instruction *ins; int ret = VKD3D_OK; - unsigned int i;
descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO); if (descriptor_info) @@ -1703,7 +1676,11 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh add_descriptor_info = true; }
+ if (program->has_descriptor_info) + add_descriptor_info = false; + tessellation_info = vkd3d_find_struct(compile_info->next, SCAN_HULL_SHADER_TESSELLATION_INFO); + thread_group_size_info = vkd3d_find_struct(compile_info->next, SCAN_THREAD_GROUP_SIZE_INFO);
vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, add_descriptor_info ? &program->descriptors : NULL, combined_sampler_info, message_context); @@ -1720,21 +1697,6 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh break; }
- for (i = 0; i < ARRAY_SIZE(program->flat_constant_count); ++i) - { - struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; - struct vkd3d_shader_register reg = {.idx[0].offset = i, .idx_count = 1}; - unsigned int size = program->flat_constant_count[i]; - struct vkd3d_shader_descriptor_info1 *d; - - if (size) - { - if ((d = vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, - ®, &range, VKD3D_SHADER_RESOURCE_BUFFER, VSIR_DATA_U32))) - d->buffer_size = size * 16; - } - } - if (!ret && signature_info) { if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &program->input_signature) @@ -1756,6 +1718,13 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh tessellation_info->partitioning = context.partitioning; }
+ if (!ret && thread_group_size_info) + { + thread_group_size_info->x = program->thread_group_size.x; + thread_group_size_info->y = program->thread_group_size.y; + thread_group_size_info->z = program->thread_group_size.z; + } + if (ret < 0) { if (combined_sampler_info) @@ -1840,7 +1809,7 @@ static int vsir_program_compile(struct vsir_program *program, const struct vkd3d case VKD3D_SHADER_TARGET_DXBC_TPF: if ((ret = vsir_program_scan(program, &scan_info, message_context, true)) < 0) return ret; - ret = tpf_compile(program, config_flags, reflection_data, out, message_context); + ret = tpf_compile(program, config_flags, compile_info, reflection_data, out, message_context); break;
case VKD3D_SHADER_TARGET_GLSL: @@ -2182,6 +2151,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( VKD3D_SHADER_TARGET_D3D_BYTECODE, VKD3D_SHADER_TARGET_DXBC_TPF, VKD3D_SHADER_TARGET_FX, +#ifdef VKD3D_SHADER_UNSUPPORTED_MSL + VKD3D_SHADER_TARGET_MSL, +#endif };
static const enum vkd3d_shader_target_type d3dbc_types[] = @@ -2253,6 +2225,7 @@ int vkd3d_shader_preprocess(const struct vkd3d_shader_compile_info *compile_info struct vkd3d_shader_code *out, char **messages) { struct vkd3d_shader_message_context message_context; + struct shader_dump_data dump_data; int ret;
TRACE("compile_info %p, out %p, messages %p.\n", compile_info, out, messages); @@ -2265,7 +2238,11 @@ int vkd3d_shader_preprocess(const struct vkd3d_shader_compile_info *compile_info
vkd3d_shader_message_context_init(&message_context, compile_info->log_level);
- ret = preproc_lexer_parse(compile_info, out, &message_context); + fill_shader_dump_data(compile_info, &dump_data); + vkd3d_shader_dump_shader(&dump_data, compile_info->source.code, compile_info->source.size, SHADER_DUMP_TYPE_SOURCE); + + if ((ret = preproc_lexer_parse(compile_info, out, &message_context)) >= 0) + vkd3d_shader_dump_shader(&dump_data, out->code, out->size, SHADER_DUMP_TYPE_PREPROC);
vkd3d_shader_message_context_trace_messages(&message_context); if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) @@ -2279,204 +2256,6 @@ void vkd3d_shader_set_log_callback(PFN_vkd3d_log callback) vkd3d_dbg_set_log_callback(callback); }
-static struct vkd3d_shader_param_node *shader_param_allocator_node_create( - struct vkd3d_shader_param_allocator *allocator) -{ - struct vkd3d_shader_param_node *node; - - if (!(node = vkd3d_malloc(offsetof(struct vkd3d_shader_param_node, param[allocator->count * allocator->stride])))) - return NULL; - node->next = NULL; - return node; -} - -static void shader_param_allocator_init(struct vkd3d_shader_param_allocator *allocator, - size_t count, size_t stride) -{ - allocator->count = max(count, MAX_REG_OUTPUT); - allocator->stride = stride; - allocator->head = NULL; - allocator->current = NULL; - allocator->index = allocator->count; -} - -static void shader_param_allocator_destroy(struct vkd3d_shader_param_allocator *allocator) -{ - struct vkd3d_shader_param_node *current = allocator->head; - - while (current) - { - struct vkd3d_shader_param_node *next = current->next; - vkd3d_free(current); - current = next; - } -} - -void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, size_t count) -{ - void *params; - - if (!allocator->current || count > allocator->count - allocator->index) - { - struct vkd3d_shader_param_node *next; - - /* Monolithic switch has no definite parameter count limit. */ - allocator->count = max(allocator->count, count); - - if (!(next = shader_param_allocator_node_create(allocator))) - return NULL; - if (allocator->current) - allocator->current->next = next; - else - allocator->head = next; - allocator->current = next; - allocator->index = 0; - } - - params = &allocator->current->param[allocator->index * allocator->stride]; - allocator->index += count; - return params; -} - -bool shader_instruction_array_init(struct vkd3d_shader_instruction_array *instructions, size_t reserve) -{ - memset(instructions, 0, sizeof(*instructions)); - /* Size the parameter initial allocations so they are large enough for most shaders. The - * code path for chained allocations will be tested if a few shaders need to use it. */ - shader_param_allocator_init(&instructions->dst_params, reserve - reserve / 8u, - sizeof(struct vkd3d_shader_dst_param)); - shader_param_allocator_init(&instructions->src_params, reserve * 2u, sizeof(struct vkd3d_shader_src_param)); - return shader_instruction_array_reserve(instructions, reserve); -} - -bool shader_instruction_array_reserve(struct vkd3d_shader_instruction_array *instructions, size_t reserve) -{ - if (!vkd3d_array_reserve((void **)&instructions->elements, &instructions->capacity, reserve, - sizeof(*instructions->elements))) - { - ERR("Failed to allocate instructions.\n"); - return false; - } - return true; -} - -bool shader_instruction_array_insert_at(struct vkd3d_shader_instruction_array *instructions, - size_t idx, size_t count) -{ - VKD3D_ASSERT(idx <= instructions->count); - - if (!shader_instruction_array_reserve(instructions, instructions->count + count)) - return false; - - memmove(&instructions->elements[idx + count], &instructions->elements[idx], - (instructions->count - idx) * sizeof(*instructions->elements)); - memset(&instructions->elements[idx], 0, count * sizeof(*instructions->elements)); - - instructions->count += count; - - return true; -} - -bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *instructions, - struct vkd3d_shader_immediate_constant_buffer *icb) -{ - if (!vkd3d_array_reserve((void **)&instructions->icbs, &instructions->icb_capacity, instructions->icb_count + 1, - sizeof(*instructions->icbs))) - return false; - instructions->icbs[instructions->icb_count++] = icb; - return true; -} - -static struct vkd3d_shader_src_param *shader_instruction_array_clone_src_params( - struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_src_param *params, - size_t count); - -static bool shader_register_clone_relative_addresses(struct vkd3d_shader_register *reg, - struct vkd3d_shader_instruction_array *instructions) -{ - unsigned int i; - - for (i = 0; i < reg->idx_count; ++i) - { - if (!reg->idx[i].rel_addr) - continue; - - if (!(reg->idx[i].rel_addr = shader_instruction_array_clone_src_params(instructions, reg->idx[i].rel_addr, 1))) - return false; - } - - return true; -} - -static struct vkd3d_shader_dst_param *shader_instruction_array_clone_dst_params( - struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_dst_param *params, - size_t count) -{ - struct vkd3d_shader_dst_param *dst_params; - size_t i; - - if (!(dst_params = shader_dst_param_allocator_get(&instructions->dst_params, count))) - return NULL; - - memcpy(dst_params, params, count * sizeof(*params)); - for (i = 0; i < count; ++i) - { - if (!shader_register_clone_relative_addresses(&dst_params[i].reg, instructions)) - return NULL; - } - - return dst_params; -} - -static struct vkd3d_shader_src_param *shader_instruction_array_clone_src_params( - struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_src_param *params, - size_t count) -{ - struct vkd3d_shader_src_param *src_params; - size_t i; - - if (!(src_params = shader_src_param_allocator_get(&instructions->src_params, count))) - return NULL; - - memcpy(src_params, params, count * sizeof(*params)); - for (i = 0; i < count; ++i) - { - if (!shader_register_clone_relative_addresses(&src_params[i].reg, instructions)) - return NULL; - } - - return src_params; -} - -/* NOTE: Immediate constant buffers are not cloned, so the source must not be destroyed while the - * destination is in use. This seems like a reasonable requirement given how this is currently used. */ -bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, - size_t dst, size_t src) -{ - struct vkd3d_shader_instruction *ins = &instructions->elements[dst]; - - *ins = instructions->elements[src]; - - if (ins->dst_count && ins->dst && !(ins->dst = shader_instruction_array_clone_dst_params(instructions, - ins->dst, ins->dst_count))) - return false; - - return !ins->src_count || !!(ins->src = shader_instruction_array_clone_src_params(instructions, - ins->src, ins->src_count)); -} - -void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions) -{ - unsigned int i; - - vkd3d_free(instructions->elements); - shader_param_allocator_destroy(&instructions->dst_params); - shader_param_allocator_destroy(&instructions->src_params); - for (i = 0; i < instructions->icb_count; ++i) - vkd3d_free(instructions->icbs[i]); - vkd3d_free(instructions->icbs); -} - void vkd3d_shader_build_varying_map(const struct vkd3d_shader_signature *output_signature, const struct vkd3d_shader_signature *input_signature, unsigned int *ret_count, struct vkd3d_shader_varying_map *varyings) diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index e758c16b3d4..97fe5238046 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -110,6 +110,7 @@ enum vkd3d_shader_error VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE = 2300, VKD3D_SHADER_WARNING_SPV_INVALID_UAV_FLAGS = 2301, VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG = 2302, + VKD3D_SHADER_WARNING_SPV_INVALID_SIZE = 2303,
VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY = 3000, VKD3D_SHADER_ERROR_RS_INVALID_VERSION = 3001, @@ -175,6 +176,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_MISSING_PRIMITIVE_TYPE = 5043, VKD3D_SHADER_ERROR_HLSL_MISPLACED_STREAM_OUTPUT = 5044, VKD3D_SHADER_ERROR_HLSL_MISSING_INPUT_PATCH = 5045, + VKD3D_SHADER_ERROR_HLSL_CANNOT_FLATTEN = 5046,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, @@ -213,7 +215,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT = 8005, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_TABLE = 8006, VKD3D_SHADER_ERROR_DXIL_INVALID_VALUE_SYMTAB = 8007, - VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT = 8008, + VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED = 8008, VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL = 8009, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID = 8010, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE = 8011, @@ -226,6 +228,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES = 8018, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCE_HANDLE = 8019, VKD3D_SHADER_ERROR_DXIL_INVALID_CONSTANT = 8020, + VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED = 8021,
VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER = 8300, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE = 8301, @@ -502,6 +505,7 @@ enum vkd3d_shader_opcode VSIR_OP_MOVC, VSIR_OP_MSAD, VSIR_OP_MUL, + VSIR_OP_NEG, VSIR_OP_NEO, VSIR_OP_NEU, VSIR_OP_NOP, @@ -540,6 +544,7 @@ enum vkd3d_shader_opcode VSIR_OP_SAMPLE_LOD, VSIR_OP_SAMPLE_LOD_S, VSIR_OP_SAMPLE_POS, + VSIR_OP_SATURATE, VSIR_OP_SETP, VSIR_OP_SGE, VSIR_OP_SGN, @@ -716,7 +721,10 @@ enum vsir_data_type VSIR_DATA_F32, VSIR_DATA_F64,
+ VSIR_DATA_I8, + VSIR_DATA_I16, VSIR_DATA_I32, + VSIR_DATA_I64,
VSIR_DATA_U8, VSIR_DATA_U16, @@ -734,10 +742,18 @@ enum vsir_data_type VSIR_DATA_TYPE_COUNT, };
+const char *vsir_data_type_get_name(enum vsir_data_type t, const char *error); + static inline bool data_type_is_integer(enum vsir_data_type data_type) { - return data_type == VSIR_DATA_I32 || data_type == VSIR_DATA_U8 || data_type == VSIR_DATA_U16 - || data_type == VSIR_DATA_U32 || data_type == VSIR_DATA_U64; + return data_type == VSIR_DATA_I8 + || data_type == VSIR_DATA_I16 + || data_type == VSIR_DATA_I32 + || data_type == VSIR_DATA_I64 + || data_type == VSIR_DATA_U8 + || data_type == VSIR_DATA_U16 + || data_type == VSIR_DATA_U32 + || data_type == VSIR_DATA_U64; }
static inline bool data_type_is_bool(enum vsir_data_type data_type) @@ -752,7 +768,7 @@ static inline bool data_type_is_floating_point(enum vsir_data_type data_type)
static inline bool data_type_is_64_bit(enum vsir_data_type data_type) { - return data_type == VSIR_DATA_F64 || data_type == VSIR_DATA_U64; + return data_type == VSIR_DATA_F64 || data_type == VSIR_DATA_I64 || data_type == VSIR_DATA_U64; }
enum vsir_dimension @@ -866,8 +882,7 @@ enum vkd3d_shader_uav_flags
enum vkd3d_shader_atomic_rmw_flags { - VKD3DARF_SEQ_CST = 0x1, - VKD3DARF_VOLATILE = 0x2, + VKD3DARF_VOLATILE = 0x1, };
enum vkd3d_tessellator_domain @@ -1357,6 +1372,13 @@ static inline bool vkd3d_shader_ver_le(const struct vkd3d_shader_version *v, uns void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode opcode);
+static inline void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) +{ + struct vkd3d_shader_location location = ins->location; + + vsir_instruction_init(ins, &location, VSIR_OP_NOP); +} + static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) { return ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w; @@ -1410,44 +1432,15 @@ struct vkd3d_shader_param_allocator
void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, size_t count);
-static inline struct vkd3d_shader_src_param *shader_src_param_allocator_get( - struct vkd3d_shader_param_allocator *allocator, size_t count) -{ - VKD3D_ASSERT(allocator->stride == sizeof(struct vkd3d_shader_src_param)); - return shader_param_allocator_get(allocator, count); -} - -static inline struct vkd3d_shader_dst_param *shader_dst_param_allocator_get( - struct vkd3d_shader_param_allocator *allocator, size_t count) -{ - VKD3D_ASSERT(allocator->stride == sizeof(struct vkd3d_shader_dst_param)); - return shader_param_allocator_get(allocator, count); -} - struct vkd3d_shader_instruction_array { struct vkd3d_shader_instruction *elements; size_t capacity; size_t count; - - struct vkd3d_shader_param_allocator src_params; - struct vkd3d_shader_param_allocator dst_params; - struct vkd3d_shader_immediate_constant_buffer **icbs; - size_t icb_capacity; - size_t icb_count; - - struct vkd3d_shader_src_param *outpointid_param; };
-bool shader_instruction_array_init(struct vkd3d_shader_instruction_array *instructions, size_t reserve); -bool shader_instruction_array_reserve(struct vkd3d_shader_instruction_array *instructions, size_t reserve); -bool shader_instruction_array_insert_at(struct vkd3d_shader_instruction_array *instructions, - size_t idx, size_t count); -bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *instructions, - struct vkd3d_shader_immediate_constant_buffer *icb); -bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, - size_t dst, size_t src); -void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); +struct vkd3d_shader_instruction *shader_instruction_array_append(struct vkd3d_shader_instruction_array *array); +bool shader_instruction_array_insert_at(struct vkd3d_shader_instruction_array *instructions, size_t idx, size_t count);
struct vsir_program_iterator { @@ -1503,13 +1496,46 @@ static inline struct vkd3d_shader_instruction *vsir_program_iterator_prev( }
/* When insertion takes place, argument `it' is updated to point to the same - * instruction as before the insertion, but all other iterators and pointers - * to the same container are invalidated and cannot be used any more. */ + * instruction as before the insertion, but all existing pointers to the same + * container, as well as any iterators pointing to instructions after the + * insertion point should be considered invalid. */ static inline bool vsir_program_iterator_insert_after(struct vsir_program_iterator *it, size_t count) { return shader_instruction_array_insert_at(it->array, it->idx + 1, count); }
+/* When insertion takes place, argument `it' is updated to point to the same + * instruction as before the insertion, and argument `ins_it' is initialized + * to point to the first inserted instruction. A pointer to the first inserted + * instruction is returned. */ +static inline struct vkd3d_shader_instruction *vsir_program_iterator_insert_before( + struct vsir_program_iterator *it, struct vsir_program_iterator *ins_it, size_t count) +{ + VKD3D_ASSERT(it != ins_it); + VKD3D_ASSERT(it->idx != SIZE_MAX); + + if (!shader_instruction_array_insert_at(it->array, it->idx, count)) + return NULL; + + *ins_it = *it; + it->idx += count; + + return vsir_program_iterator_current(ins_it); +} + +/* When insertion takes place, argument `it' is updated to point to the first + * inserted instruction. A pointer to this first inserted instruction is + * returned. */ +static inline struct vkd3d_shader_instruction *vsir_program_iterator_insert_before_and_move( + struct vsir_program_iterator *it, size_t count) +{ + VKD3D_ASSERT(it->idx != SIZE_MAX); + + if (!shader_instruction_array_insert_at(it->array, it->idx, count)) + return NULL; + return vsir_program_iterator_current(it); +} + enum vkd3d_shader_config_flags { VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION = 0x00000001, @@ -1567,6 +1593,7 @@ struct vsir_program
struct vkd3d_shader_scan_descriptor_info1 descriptors; bool has_descriptor_info; + size_t descriptors_size;
unsigned int parameter_count; const struct vkd3d_shader_parameter1 *parameters; @@ -1574,8 +1601,7 @@ struct vsir_program
unsigned int input_control_point_count, output_control_point_count; struct vsir_thread_group_size thread_group_size; - unsigned int flat_constant_count[3]; - unsigned int block_count; + unsigned int block_count; /* maximum block count in any function */ unsigned int temp_count; unsigned int ssa_count; enum vsir_global_flags global_flags; @@ -1586,6 +1612,7 @@ struct vsir_program uint8_t diffuse_written_mask; enum vsir_control_flow_type cf_type; enum vsir_normalisation_level normalisation_level; + bool has_no_modifiers; enum vkd3d_tessellator_domain tess_domain; enum vkd3d_shader_tessellator_partitioning tess_partitioning; enum vkd3d_shader_tessellator_output_primitive tess_output_primitive; @@ -1599,12 +1626,25 @@ struct vsir_program struct vkd3d_shader_source_list source_files; const char **block_names; size_t block_name_count; + + struct vkd3d_shader_immediate_constant_buffer **icbs; + size_t icb_capacity; + size_t icb_count; + + struct vkd3d_shader_param_allocator src_params; + struct vkd3d_shader_param_allocator dst_params; };
enum vkd3d_result vsir_allocate_temp_registers(struct vsir_program *program, struct vkd3d_shader_message_context *message_context); enum vkd3d_result vsir_update_dcl_temps(struct vsir_program *program, struct vkd3d_shader_message_context *message_context); + +struct vkd3d_shader_descriptor_info1 *vsir_program_add_descriptor(struct vsir_program *program, + enum vkd3d_shader_descriptor_type type, unsigned int register_id, + const struct vkd3d_shader_register_range *range, + enum vkd3d_shader_resource_type resource_type, enum vsir_data_type resource_data_type); +bool vsir_program_add_icb(struct vsir_program *program, struct vkd3d_shader_immediate_constant_buffer *icb); void vsir_program_cleanup(struct vsir_program *program); const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( const struct vsir_program *program, enum vkd3d_shader_parameter_name name); @@ -1613,6 +1653,8 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c enum vsir_normalisation_level normalisation_level); enum vkd3d_result vsir_program_lower_d3dbc(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); +enum vkd3d_result vsir_program_optimize(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags, @@ -1627,37 +1669,39 @@ bool vsir_instruction_init_with_params(struct vsir_program *program,
static inline struct vkd3d_shader_instruction *vsir_program_append(struct vsir_program *program) { - struct vkd3d_shader_instruction_array *array = &program->instructions; - - if (!shader_instruction_array_insert_at(array, array->count, 1)) - return NULL; - - return &array->elements[array->count - 1]; + return shader_instruction_array_append(&program->instructions); }
static inline struct vkd3d_shader_dst_param *vsir_program_get_dst_params( struct vsir_program *program, unsigned int count) { - return shader_dst_param_allocator_get(&program->instructions.dst_params, count); + struct vkd3d_shader_param_allocator *allocator = &program->dst_params; + + VKD3D_ASSERT(allocator->stride == sizeof(struct vkd3d_shader_dst_param)); + + return shader_param_allocator_get(allocator, count); }
static inline struct vkd3d_shader_src_param *vsir_program_get_src_params( struct vsir_program *program, unsigned int count) { - return shader_src_param_allocator_get(&program->instructions.src_params, count); + struct vkd3d_shader_param_allocator *allocator = &program->src_params; + + VKD3D_ASSERT(allocator->stride == sizeof(struct vkd3d_shader_src_param)); + + return shader_param_allocator_get(allocator, count); }
struct vkd3d_shader_parser { struct vkd3d_shader_message_context *message_context; struct vkd3d_shader_location location; - struct vsir_program *program; - bool failed; + enum vkd3d_result status; };
void vkd3d_shader_parser_error(struct vkd3d_shader_parser *parser, enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); -void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vsir_program *program, +void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vkd3d_shader_message_context *message_context, const char *source_name); void vkd3d_shader_parser_warning(struct vkd3d_shader_parser *parser, enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); @@ -1826,7 +1870,8 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
-int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_code *rdef, +int tpf_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *rdef, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
enum vkd3d_md5_variant @@ -1846,55 +1891,39 @@ int hlsl_parse(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context, struct vsir_program *program, struct vkd3d_shader_code *reflection_data);
-static inline enum vkd3d_shader_component_type vkd3d_component_type_from_data_type( enum vsir_data_type data_type) -{ - switch (data_type) - { - case VSIR_DATA_BOOL: - return VKD3D_SHADER_COMPONENT_BOOL; - case VSIR_DATA_F16: /* Minimum precision. TODO: native 16-bit */ - case VSIR_DATA_F32: - case VSIR_DATA_SNORM: - case VSIR_DATA_UNORM: - return VKD3D_SHADER_COMPONENT_FLOAT; - case VSIR_DATA_F64: - return VKD3D_SHADER_COMPONENT_DOUBLE; - case VSIR_DATA_I32: - return VKD3D_SHADER_COMPONENT_INT; - case VSIR_DATA_U16: /* Minimum precision. TODO: native 16-bit */ - case VSIR_DATA_U32: - return VKD3D_SHADER_COMPONENT_UINT; - case VSIR_DATA_U64: - return VKD3D_SHADER_COMPONENT_UINT64; - default: - FIXME("Unhandled data type %#x.\n", data_type); - /* fall-through */ - case VSIR_DATA_MIXED: - return VKD3D_SHADER_COMPONENT_UINT; - } -} - static inline enum vsir_data_type vsir_data_type_from_component_type(enum vkd3d_shader_component_type component_type) { switch (component_type) { - case VKD3D_SHADER_COMPONENT_FLOAT: - return VSIR_DATA_F32; + case VKD3D_SHADER_COMPONENT_VOID: + return VSIR_DATA_UNUSED; case VKD3D_SHADER_COMPONENT_UINT: return VSIR_DATA_U32; case VKD3D_SHADER_COMPONENT_INT: return VSIR_DATA_I32; + case VKD3D_SHADER_COMPONENT_FLOAT: + return VSIR_DATA_F32; + case VKD3D_SHADER_COMPONENT_BOOL: + return VSIR_DATA_BOOL; case VKD3D_SHADER_COMPONENT_DOUBLE: return VSIR_DATA_F64; - default: - FIXME("Unhandled component type %#x.\n", component_type); - return VSIR_DATA_F32; + case VKD3D_SHADER_COMPONENT_UINT64: + return VSIR_DATA_U64; + case VKD3D_SHADER_COMPONENT_INT64: + return VSIR_DATA_I64; + case VKD3D_SHADER_COMPONENT_FLOAT16: + return VSIR_DATA_F16; + case VKD3D_SHADER_COMPONENT_UINT16: + return VSIR_DATA_U16; + case VKD3D_SHADER_COMPONENT_INT16: + return VSIR_DATA_I16; + case VKD3D_SHADER_COMPONENT_TYPE_FORCE_32BIT: + break; } -}
-static inline bool component_type_is_64_bit(enum vkd3d_shader_component_type component_type) -{ - return component_type == VKD3D_SHADER_COMPONENT_DOUBLE || component_type == VKD3D_SHADER_COMPONENT_UINT64; + FIXME("Unhandled component type %#x.\n", component_type); + + return VSIR_DATA_UNUSED; }
static inline unsigned int vsir_write_mask_get_component_idx(uint32_t write_mask) diff --git a/libs/vkd3d/libs/vkd3d-utils/vkd3d_utils_main.c b/libs/vkd3d/libs/vkd3d-utils/vkd3d_utils_main.c index f2967835b62..6e90b48e877 100644 --- a/libs/vkd3d/libs/vkd3d-utils/vkd3d_utils_main.c +++ b/libs/vkd3d/libs/vkd3d-utils/vkd3d_utils_main.c @@ -269,7 +269,7 @@ HRESULT WINAPI D3DCompile2VKD3D(const void *data, SIZE_T data_size, const char *
option = &options[0]; option->name = VKD3D_SHADER_COMPILE_OPTION_API_VERSION; - option->value = VKD3D_SHADER_API_VERSION_1_17; + option->value = VKD3D_SHADER_API_VERSION_CURRENT;
compile_info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; compile_info.next = &preprocess_info; @@ -433,7 +433,7 @@ HRESULT WINAPI D3DPreprocess(const void *data, SIZE_T size, const char *filename
static const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_17}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_CURRENT}, };
TRACE("data %p, size %"PRIuPTR", filename %s, macros %p, include %p, preprocessed_blob %p, messages_blob %p.\n", @@ -482,7 +482,10 @@ HRESULT WINAPI D3DPreprocess(const void *data, SIZE_T size, const char *filename
if (!ret) { - if (FAILED(hr = vkd3d_blob_create((void *)preprocessed_code.code, preprocessed_code.size, preprocessed_blob))) + /* vkd3d-shader output is null-terminated, but the null terminator isn't + * included in the size. Increase the size to account for that. */ + if (FAILED(hr = vkd3d_blob_create((void *)preprocessed_code.code, + preprocessed_code.size + 1, preprocessed_blob))) { vkd3d_shader_free_shader_code(&preprocessed_code); return hr; @@ -979,7 +982,7 @@ HRESULT WINAPI D3DDisassemble(const void *data, SIZE_T data_size,
static const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_17}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_CURRENT}, };
TRACE("data %p, data_size %"PRIuPTR", flags %#x, comments %p, blob %p.\n", @@ -1032,7 +1035,9 @@ HRESULT WINAPI D3DDisassemble(const void *data, SIZE_T data_size, return hresult_from_vkd3d_result(ret); }
- if (FAILED(hr = vkd3d_blob_create((void *)output.code, output.size, blob))) + /* vkd3d-shader output is null-terminated, but the null terminator isn't + * included in the size. Increase the size to account for that. */ + if (FAILED(hr = vkd3d_blob_create((void *)output.code, output.size + 1, blob))) vkd3d_shader_free_shader_code(&output);
return hr; diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index 074d8430585..69f42280e8a 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -2688,32 +2688,12 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList6 *iface, ID3D12CommandAllocator *allocator, ID3D12PipelineState *initial_pipeline_state) { - struct d3d12_command_allocator *allocator_impl = unsafe_impl_from_ID3D12CommandAllocator(allocator); struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - HRESULT hr;
TRACE("iface %p, allocator %p, initial_pipeline_state %p.\n", iface, allocator, initial_pipeline_state);
- if (!allocator_impl) - { - WARN("Command allocator is NULL.\n"); - return E_INVALIDARG; - } - - if (list->is_recording) - { - WARN("Command list is in the recording state.\n"); - return E_FAIL; - } - - if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator_impl, list))) - { - list->allocator = allocator_impl; - d3d12_command_list_reset_state(list, initial_pipeline_state); - } - - return hr; + return d3d12_command_list_reset(list, allocator, initial_pipeline_state); }
static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList6 *iface, @@ -3049,7 +3029,7 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list
if (descriptor_count > range->vk_binding_count) { - ERR("Heap descriptor count %u exceeds maximum Vulkan count %u. Reducing to the Vulkan maximum.\n", + MESSAGE("Heap descriptor count %u exceeds maximum Vulkan count %u. Reducing to the Vulkan maximum.\n", descriptor_count, range->vk_binding_count); descriptor_count = range->vk_binding_count; } @@ -6405,9 +6385,8 @@ static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12Comma return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList6_iface); }
-static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d3d12_device *device, - D3D12_COMMAND_LIST_TYPE type, struct d3d12_command_allocator *allocator, - ID3D12PipelineState *initial_pipeline_state) +static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, + struct d3d12_device *device, D3D12_COMMAND_LIST_TYPE type) { HRESULT hr;
@@ -6421,31 +6400,37 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d
d3d12_device_add_ref(list->device = device);
- list->allocator = allocator; + return hr; +}
- list->descriptor_heap_count = 0; +HRESULT d3d12_command_list_create(struct d3d12_device *device, UINT node_mask, + D3D12_COMMAND_LIST_TYPE type, struct d3d12_command_list **list) +{ + struct d3d12_command_list *object; + HRESULT hr;
- if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) - { - list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_GRAPHICS].vk_uav_counter_views = NULL; - list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_COMPUTE].vk_uav_counter_views = NULL; - d3d12_command_list_reset_state(list, initial_pipeline_state); - } - else + debug_ignored_node_mask(node_mask); + + if (!(object = vkd3d_calloc(1, sizeof(*object)))) + return E_OUTOFMEMORY; + + if (FAILED(hr = d3d12_command_list_init(object, device, type))) { - vkd3d_private_store_destroy(&list->private_store); - d3d12_device_release(device); + vkd3d_free(object); + return hr; }
- return hr; + TRACE("Created command list %p.\n", object); + + *list = object; + + return S_OK; }
-HRESULT d3d12_command_list_create(struct d3d12_device *device, - UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *allocator_iface, - ID3D12PipelineState *initial_pipeline_state, struct d3d12_command_list **list) +HRESULT d3d12_command_list_reset(struct d3d12_command_list *list, ID3D12CommandAllocator *allocator_iface, + ID3D12PipelineState *initial_pipeline_state) { struct d3d12_command_allocator *allocator; - struct d3d12_command_list *object; HRESULT hr;
if (!(allocator = unsafe_impl_from_ID3D12CommandAllocator(allocator_iface))) @@ -6454,29 +6439,26 @@ HRESULT d3d12_command_list_create(struct d3d12_device *device, return E_INVALIDARG; }
- if (allocator->type != type) + if (allocator->type != list->type) { WARN("Command list types do not match (allocator %#x, list %#x).\n", - allocator->type, type); + allocator->type, list->type); return E_INVALIDARG; }
- debug_ignored_node_mask(node_mask); - - if (!(object = vkd3d_malloc(sizeof(*object)))) - return E_OUTOFMEMORY; - - if (FAILED(hr = d3d12_command_list_init(object, device, type, allocator, initial_pipeline_state))) + if (list->is_recording) { - vkd3d_free(object); - return hr; + WARN("Command list is in the recording state.\n"); + return E_FAIL; }
- TRACE("Created command list %p.\n", object); - - *list = object; + if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) + { + list->allocator = allocator; + d3d12_command_list_reset_state(list, initial_pipeline_state); + }
- return S_OK; + return hr; }
/* ID3D12CommandQueue */ diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index 67f84aafa29..6af5e2a5c7d 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -3271,10 +3271,15 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device9 *i iface, node_mask, type, command_allocator, initial_pipeline_state, debugstr_guid(riid), command_list);
- if (FAILED(hr = d3d12_command_list_create(device, node_mask, type, command_allocator, - initial_pipeline_state, &object))) + if (FAILED(hr = d3d12_command_list_create(device, node_mask, type, &object))) return hr;
+ if (FAILED(hr = d3d12_command_list_reset(object, command_allocator, initial_pipeline_state))) + { + ID3D12GraphicsCommandList6_Release(&object->ID3D12GraphicsCommandList6_iface); + return hr; + } + return return_interface(&object->ID3D12GraphicsCommandList6_iface, &IID_ID3D12GraphicsCommandList6, riid, command_list); } @@ -5082,10 +5087,21 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device9 * UINT node_mask, D3D12_COMMAND_LIST_TYPE type, D3D12_COMMAND_LIST_FLAGS flags, REFIID iid, void **command_list) { - FIXME("iface %p, node_mask 0x%08x, type %#x, flags %#x, iid %s, command_list %p stub!\n", + struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_command_list *object; + HRESULT hr; + + TRACE("iface %p, node_mask 0x%08x, type %#x, flags %#x, iid %s, command_list %p.\n", iface, node_mask, type, flags, debugstr_guid(iid), command_list);
- return E_NOTIMPL; + if (flags) + FIXME("Ignoring flags %#x.\n", flags); + + if (FAILED(hr = d3d12_command_list_create(device, node_mask, type, &object))) + return hr; + + return return_interface(&object->ID3D12GraphicsCommandList6_iface, + &IID_ID3D12GraphicsCommandList6, iid, command_list); }
static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3D12Device9 *iface, diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index 6bbd6533b74..4bd97fd599f 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -843,7 +843,7 @@ static struct vk_binding_array *d3d12_root_signature_vk_binding_array_for_type( { if (!context->static_samplers_descriptor_set) { - if (!context->push_descriptor && context->root_descriptor_set) + if (!root_signature->device->vk_info.KHR_push_descriptor && context->root_descriptor_set) context->static_samplers_descriptor_set = context->root_descriptor_set; else /* The descriptor type is irrelevant here, it will never be used. */ @@ -2391,7 +2391,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device,
const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_17}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_CURRENT}, {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, {VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE, 0}, {VKD3D_SHADER_COMPILE_OPTION_FEATURE, feature_flags_compile_option(device)}, @@ -2456,7 +2456,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER
const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_17}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_CURRENT}, {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, };
@@ -4135,7 +4135,7 @@ static int compile_hlsl_cs(const struct vkd3d_shader_code *hlsl, struct vkd3d_sh
static const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_17}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_CURRENT}, };
info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c index c2832a61f67..2d0510e5456 100644 --- a/libs/vkd3d/libs/vkd3d/utils.c +++ b/libs/vkd3d/libs/vkd3d/utils.c @@ -703,7 +703,7 @@ const char *debug_vk_extent_3d(VkExtent3D extent)
const char *debug_vk_queue_flags(VkQueueFlags flags) { - char buffer[191]; + char buffer[222];
buffer[0] = '\0'; #define FLAG_TO_STR(f) if (flags & f) { strcat(buffer, " | "#f); flags &= ~f; } @@ -716,6 +716,7 @@ const char *debug_vk_queue_flags(VkQueueFlags flags) #define FLAG_TO_STR(f, n) if (flags & f) { strcat(buffer, " | "#n); flags &= ~f; } FLAG_TO_STR(0x20, VK_QUEUE_VIDEO_DECODE_BIT_KHR) FLAG_TO_STR(0x40, VK_QUEUE_VIDEO_ENCODE_BIT_KHR) + FLAG_TO_STR(0x100, VK_QUEUE_OPTICAL_FLOW_BIT_NV) #undef FLAG_TO_STR if (flags) FIXME("Unrecognized flag(s) %#x.\n", flags); diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c index 5215cf8ef86..4c58f0a1787 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c +++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c @@ -18,8 +18,6 @@
#include "vkd3d_private.h"
-VKD3D_DEBUG_ENV_NAME("VKD3D_DEBUG"); - HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, REFIID iid, void **device) { diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index 9fb6834158f..0a8c5aef674 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -1327,8 +1327,9 @@ struct d3d12_command_list };
HRESULT d3d12_command_list_create(struct d3d12_device *device, - UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *allocator_iface, - ID3D12PipelineState *initial_pipeline_state, struct d3d12_command_list **list); + UINT node_mask, D3D12_COMMAND_LIST_TYPE type, struct d3d12_command_list **list); +HRESULT d3d12_command_list_reset(struct d3d12_command_list *list, + ID3D12CommandAllocator *allocator_iface, ID3D12PipelineState *initial_pipeline_state);
struct vkd3d_queue {
Is this missing an update to PACKAGE_STRING/PACKAGE_VERSION?
Yes, I always forget that one, thanks!
This merge request was approved by Matteo Bruni.
This merge request was approved by Elizabeth Figura.