From: Alexandre Julliard <julliard@winehq.org> --- libs/vkd3d/AUTHORS | 1 + libs/vkd3d/config.h | 5 +- libs/vkd3d/include/private/vkd3d_common.h | 161 +- .../include/private/vkd3d_shader_utils.h | 56 + libs/vkd3d/include/vkd3d.h | 1 + libs/vkd3d/include/vkd3d_shader.h | 85 +- libs/vkd3d/libs/vkd3d-common/debug.c | 161 +- libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 225 +- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 201 +- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 15 - libs/vkd3d/libs/vkd3d-shader/dxil.c | 989 +++++---- libs/vkd3d/libs/vkd3d-shader/fx.c | 297 +-- libs/vkd3d/libs/vkd3d-shader/glsl.c | 34 +- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 184 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 72 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 13 +- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 642 +++--- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1216 ++++++++--- .../libs/vkd3d-shader/hlsl_constant_ops.c | 20 +- libs/vkd3d/libs/vkd3d-shader/ir.c | 1814 ++++++++++++++--- libs/vkd3d/libs/vkd3d-shader/msl.c | 59 +- libs/vkd3d/libs/vkd3d-shader/preproc.h | 10 +- libs/vkd3d/libs/vkd3d-shader/preproc.l | 67 +- libs/vkd3d/libs/vkd3d-shader/preproc.y | 84 +- libs/vkd3d/libs/vkd3d-shader/spirv.c | 377 ++-- libs/vkd3d/libs/vkd3d-shader/tpf.c | 123 +- .../libs/vkd3d-shader/vkd3d_shader_main.c | 175 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 173 +- .../vkd3d/libs/vkd3d-utils/vkd3d_utils_main.c | 152 +- libs/vkd3d/libs/vkd3d/command.c | 11 +- libs/vkd3d/libs/vkd3d/state.c | 3 + libs/vkd3d/libs/vkd3d/utils.c | 8 + 32 files changed, 5099 insertions(+), 2335 deletions(-) diff --git a/libs/vkd3d/AUTHORS b/libs/vkd3d/AUTHORS index a42d303410e..d78543efc82 100644 --- a/libs/vkd3d/AUTHORS +++ b/libs/vkd3d/AUTHORS @@ -24,6 +24,7 @@ Giovanni Mascellani Hans-Kristian Arntzen Henri Verbeet Isabella Bosia +Ivan Lyugaev Jacek Caban Jactry Zeng Jan Sikorski diff --git a/libs/vkd3d/config.h b/libs/vkd3d/config.h index 0d6b8b14536..ba80a26bb00 100644 --- a/libs/vkd3d/config.h +++ b/libs/vkd3d/config.h @@ -1,5 +1,6 @@ #define PACKAGE_NAME "vkd3d" -#define PACKAGE_STRING "vkd3d 1.19" -#define PACKAGE_VERSION "1.19" +#define PACKAGE_STRING "vkd3d 1.20" +#define PACKAGE_VERSION "1.20" #define PATH_MAX 1024 #define SONAME_LIBVULKAN "vulkan-1.dll" +#define HAVE__STRTOF_L 1 diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index 196ab307d0c..4fdc3fcb802 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -43,7 +43,7 @@ #include <intrin.h> #endif -#define VKD3D_SHADER_API_VERSION_CURRENT VKD3D_SHADER_API_VERSION_1_19 +#define VKD3D_SHADER_API_VERSION_CURRENT VKD3D_SHADER_API_VERSION_2_0 #ifndef ARRAY_SIZE # define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) @@ -129,35 +129,54 @@ static inline uint64_t align(uint64_t addr, size_t alignment) #ifdef VKD3D_NO_TRACE_MESSAGES #define TRACE(args...) do { } while (0) +#define TRACE_(ch, args...) do { } while (0) #define TRACE_ON() (false) +#define TRACE_ON_(ch) (false) +#define TRACE_TEXT(text, size) do { } while (0) +#define TRACE_TEXT_(ch, text, size) do { } while (0) #endif #ifdef VKD3D_NO_DEBUG_MESSAGES #define WARN(args...) do { } while (0) +#define WARN_(ch, args...) do { } while (0) #define FIXME(args...) do { } while (0) +#define FIXME_(ch, args...) do { } while (0) #define WARN_ON() (false) +#define WARN_ON_(ch) (false) #define FIXME_ONCE(args...) do { } while (0) +#define FIXME_ONCE_(ch, args...) do { } while (0) #endif #ifdef VKD3D_NO_ERROR_MESSAGES #define ERR(args...) do { } while (0) +#define ERR_(ch, args...) do { } while (0) #define MESSAGE(args...) do { } while (0) +#define MESSAGE_(ch, args...) do { } while (0) #endif -enum vkd3d_dbg_level +enum vkd3d_debug_class { - VKD3D_DBG_LEVEL_NONE, - VKD3D_DBG_LEVEL_MESSAGE, - VKD3D_DBG_LEVEL_ERR, - VKD3D_DBG_LEVEL_FIXME, - VKD3D_DBG_LEVEL_WARN, - VKD3D_DBG_LEVEL_TRACE, + VKD3D_DEBUG_CLASS_MESSAGE, + VKD3D_DEBUG_CLASS_ERR, + VKD3D_DEBUG_CLASS_FIXME, + VKD3D_DEBUG_CLASS_WARN, + VKD3D_DEBUG_CLASS_TRACE, + + VKD3D_DEBUG_CLASS_INIT = 7 /* lazy init flag */ +}; + +struct vkd3d_debug_channel +{ + uint8_t flags; + char name[15]; }; -enum vkd3d_dbg_level vkd3d_dbg_get_level(const char *vkd3d_dbg_env_name); +uint8_t vkd3d_debug_channel_get_flags(struct vkd3d_debug_channel *channel, const char *vkd3d_dbg_env_name); +void vkd3d_debug_channel_print_text(struct vkd3d_debug_channel *channel, const char *vkd3d_dbg_env_name, + enum vkd3d_debug_class cls, const char *function, const char *text, size_t size); +void vkd3d_debug_channel_printf(struct vkd3d_debug_channel *channel, const char *vkd3d_dbg_env_name, + enum vkd3d_debug_class cls, const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(5, 6); -void vkd3d_dbg_printf(const char *vkd3d_dbg_env_name, enum vkd3d_dbg_level level, - const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); void vkd3d_dbg_set_log_callback(PFN_vkd3d_log callback); const char *vkd3d_dbg_sprintf(const char *fmt, ...) VKD3D_PRINTF_FUNC(1, 2); @@ -166,21 +185,25 @@ const char *debugstr_a(const char *str); const char *debugstr_an(const char *str, size_t n); const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); -#define VKD3D_DBG_LOG(level) \ +#define VKD3D_DBG_LOG(cls, ch) \ do { \ - const enum vkd3d_dbg_level vkd3d_dbg_level = VKD3D_DBG_LEVEL_##level; \ - VKD3D_DBG_PRINTF_##level + const enum vkd3d_debug_class vkd3d_dbg_class = VKD3D_DEBUG_CLASS_##cls; \ + struct vkd3d_debug_channel *vkd3d_dbg_channel = (ch); \ + VKD3D_DBG_PRINTF_##cls -#define VKD3D_DBG_LOG_ONCE(first_time_level, level) \ +#define VKD3D_DBG_LOG_ONCE(first_time_cls, cls, ch) \ do { \ - static bool vkd3d_dbg_next_time; \ - const enum vkd3d_dbg_level vkd3d_dbg_level = vkd3d_dbg_next_time \ - ? VKD3D_DBG_LEVEL_##level : VKD3D_DBG_LEVEL_##first_time_level; \ - vkd3d_dbg_next_time = true; \ - VKD3D_DBG_PRINTF_##level + static bool vkd3d_dbg_next_time; \ + const enum vkd3d_debug_class vkd3d_dbg_class = vkd3d_dbg_next_time \ + ? VKD3D_DEBUG_CLASS_##cls : VKD3D_DEBUG_CLASS_##first_time_cls; \ + struct vkd3d_debug_channel *vkd3d_dbg_channel = (ch); \ + vkd3d_dbg_next_time = true; \ + VKD3D_DBG_PRINTF_##cls #define VKD3D_DBG_PRINTF(...) \ - vkd3d_dbg_printf(VKD3D_DEBUG_ENV_NAME, vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0) + vkd3d_debug_channel_printf(vkd3d_dbg_channel, VKD3D_DEBUG_ENV_NAME, \ + vkd3d_dbg_class, __FUNCTION__, __VA_ARGS__); \ + } while (0) #define VKD3D_DBG_PRINTF_TRACE(...) VKD3D_DBG_PRINTF(__VA_ARGS__) #define VKD3D_DBG_PRINTF_WARN(...) VKD3D_DBG_PRINTF(__VA_ARGS__) @@ -189,8 +212,9 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); #ifdef VKD3D_ABORT_ON_ERR #define VKD3D_DBG_PRINTF_ERR(...) \ - vkd3d_dbg_printf(VKD3D_DEBUG_ENV_NAME, vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); \ - abort(); \ + vkd3d_debug_channel_printf(vkd3d_dbg_channel, VKD3D_DEBUG_ENV_NAME, \ + vkd3d_dbg_class, __FUNCTION__, __VA_ARGS__); \ + abort(); \ } while (0) #else #define VKD3D_DBG_PRINTF_ERR(...) VKD3D_DBG_PRINTF(__VA_ARGS__) @@ -203,37 +227,65 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); #endif #ifndef TRACE -#define TRACE VKD3D_DBG_LOG(TRACE) +#define TRACE VKD3D_DBG_LOG(TRACE, vkd3d_debug_channel_default) +#define TRACE_(ch) VKD3D_DBG_LOG(TRACE, &vkd3d_debug_channel__##ch) +#define TRACE_TEXT(text, size) \ + vkd3d_debug_channel_print_text(vkd3d_debug_channel_default, \ + VKD3D_DEBUG_ENV_NAME, VKD3D_DEBUG_CLASS_TRACE, __FUNCTION__, text, size) +#define TRACE_TEXT_(ch, text, size) \ + vkd3d_debug_channel_print_text(&vkd3d_debug_channel__##ch, \ + VKD3D_DEBUG_ENV_NAME, VKD3D_DEBUG_CLASS_TRACE, __FUNCTION__, text, size) #endif #ifndef WARN -#define WARN VKD3D_DBG_LOG(WARN) +#define WARN VKD3D_DBG_LOG(WARN, vkd3d_debug_channel_default) +#define WARN_(ch) VKD3D_DBG_LOG(WARN, &vkd3d_debug_channel__##ch) #endif #ifndef FIXME -#define FIXME VKD3D_DBG_LOG(FIXME) +#define FIXME VKD3D_DBG_LOG(FIXME, vkd3d_debug_channel_default) +#define FIXME_(ch) VKD3D_DBG_LOG(FIXME, &vkd3d_debug_channel__##ch) #endif #ifndef ERR -#define ERR VKD3D_DBG_LOG(ERR) +#define ERR VKD3D_DBG_LOG(ERR, vkd3d_debug_channel_default) +#define ERR_(ch) VKD3D_DBG_LOG(ERR, &vkd3d_debug_channel__##ch) #endif #ifndef MESSAGE -#define MESSAGE VKD3D_DBG_LOG(MESSAGE) +#define MESSAGE VKD3D_DBG_LOG(MESSAGE, vkd3d_debug_channel_default) +#define MESSAGE_(ch) VKD3D_DBG_LOG(MESSAGE, &vkd3d_debug_channel__##ch) #endif #ifndef TRACE_ON -#define TRACE_ON() (vkd3d_dbg_get_level(VKD3D_DEBUG_ENV_NAME) == VKD3D_DBG_LEVEL_TRACE) +#define TRACE_ON() (vkd3d_debug_channel_get_flags(vkd3d_debug_channel_default, VKD3D_DEBUG_ENV_NAME) \ + & (1u << VKD3D_DEBUG_CLASS_TRACE)) +#define TRACE_ON_(ch) (vkd3d_debug_channel_get_flags(&vkd3d_debug_channel__##ch, VKD3D_DEBUG_ENV_NAME) \ + & (1u << VKD3D_DEBUG_CLASS_TRACE)) #endif #ifndef WARN_ON -#define WARN_ON() (vkd3d_dbg_get_level(VKD3D_DEBUG_ENV_NAME) >= VKD3D_DBG_LEVEL_WARN) +#define WARN_ON() (vkd3d_debug_channel_get_flags(vkd3d_debug_channel_default, VKD3D_DEBUG_ENV_NAME) \ + & (1u << VKD3D_DEBUG_CLASS_WARN)) +#define WARN_ON_(ch) (vkd3d_debug_channel_get_flags(&vkd3d_debug_channel__##ch, VKD3D_DEBUG_ENV_NAME) \ + & (1u << VKD3D_DEBUG_CLASS_WARN)) #endif #ifndef FIXME_ONCE -#define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) +#define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN, vkd3d_debug_channel_default) +#define FIXME_ONCE_(ch) VKD3D_DBG_LOG_ONCE(FIXME, WARN, &vkd3d_debug_channel__##ch) #endif +#define VKD3D_DECLARE_DEBUG_CHANNEL(ch) \ + static struct vkd3d_debug_channel vkd3d_debug_channel__##ch = {0xffu, #ch}; \ + STATIC_ASSERT(sizeof(#ch) <= sizeof(vkd3d_debug_channel__##ch.name)); +#define VKD3D_DEFAULT_DEBUG_CHANNEL(ch) \ + static struct vkd3d_debug_channel vkd3d_debug_channel__##ch = {0xffu, #ch}; \ + STATIC_ASSERT(sizeof(#ch) <= sizeof(vkd3d_debug_channel__##ch.name)); \ + static struct vkd3d_debug_channel * const vkd3d_debug_channel_default = &vkd3d_debug_channel__##ch; + +VKD3D_DEFAULT_DEBUG_CHANNEL(vkd3d) + static inline const char *debugstr_guid(const GUID *guid) { if (!guid) @@ -400,6 +452,48 @@ static inline bool vkd3d_object_range_overflow(size_t start, size_t count, size_ return (~(size_t)0 - start) / size < count; } +/* Based on the implementation in the OpenGL Mathematics library. */ +static inline uint32_t vkd3d_f32_from_f16(uint16_t value) +{ + uint32_t s = (value & 0x8000u) << 16; + uint32_t e = (value >> 10) & 0x1fu; + uint32_t m = value & 0x3ffu; + + if (!e) + { + if (!m) + { + /* Plus or minus zero. */ + return s; + } + else + { + /* Denormalised number; renormalise it. */ + while (!(m & 0x400u)) + { + m <<= 1; + --e; + } + + ++e; + m &= ~0x400u; + } + } + else if (e == 31u) + { + /* Positive or negative infinity for zero 'm'. + * NaN for non-zero 'm'; preserve sign and significand bits. */ + return s | 0x7f800000u | (m << 13); + } + + /* Normalised number. */ + e += 127u - 15u; + m <<= 13; + + /* Assemble s, e and m. */ + return s | (e << 23) | m; +} + static inline uint16_t vkd3d_make_u16(uint8_t low, uint8_t high) { return low | ((uint16_t)high << 8); @@ -420,6 +514,11 @@ static inline int vkd3d_u64_compare(uint64_t x, uint64_t y) return (x > y) - (x < y); } +static inline int vkd3d_ptr_compare(const void *x, const void *y) +{ + return (x > y) - (x < y); +} + #define VKD3D_BITMAP_SIZE(x) (((x) + 0x1f) >> 5) static inline bool bitmap_clear(uint32_t *map, unsigned int idx) diff --git a/libs/vkd3d/include/private/vkd3d_shader_utils.h b/libs/vkd3d/include/private/vkd3d_shader_utils.h index 465734dfbff..7436f0ddebf 100644 --- a/libs/vkd3d/include/private/vkd3d_shader_utils.h +++ b/libs/vkd3d/include/private/vkd3d_shader_utils.h @@ -20,6 +20,14 @@ #define __VKD3D_SHADER_UTILS_H #include "vkd3d_shader.h" +#include <sys/stat.h> + +/* S_ISREG may not be defined when building for Windows. MinGW provides a more + * POSIX-like environment that does define S_ISREG, but the Wine/msvcrt + * headers do not. */ +#ifndef S_ISREG +# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#endif static inline enum vkd3d_result vkd3d_shader_parse_dxbc_source_type(const struct vkd3d_shader_code *dxbc, enum vkd3d_shader_source_type *type, char **messages) @@ -56,4 +64,52 @@ static inline enum vkd3d_result vkd3d_shader_parse_dxbc_source_type(const struct return VKD3D_OK; } +static inline enum vkd3d_result vkd3d_shader_code_from_file(struct vkd3d_shader_code *shader, FILE *f) +{ + size_t size = 4096; + struct stat st; + size_t pos = 0; + uint8_t *data; + size_t ret; + + memset(shader, 0, sizeof(*shader)); + + if (fstat(fileno(f), &st) == -1) + return VKD3D_ERROR; + + if (S_ISREG(st.st_mode)) + size = st.st_size; + + if (!(data = malloc(size))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (;;) + { + if (pos >= size) + { + if (size > SIZE_MAX / 2 || !(data = realloc(data, size * 2))) + { + free(data); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + size *= 2; + } + + if (!(ret = fread(&data[pos], 1, size - pos, f))) + break; + pos += ret; + } + + if (!feof(f)) + { + free(data); + return VKD3D_ERROR; + } + + shader->code = data; + shader->size = pos; + + return VKD3D_OK; +} + #endif /* __VKD3D_SHADER_UTILS_H */ diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h index da4362c1e6f..b14752165a2 100644 --- a/libs/vkd3d/include/vkd3d.h +++ b/libs/vkd3d/include/vkd3d.h @@ -104,6 +104,7 @@ enum vkd3d_api_version VKD3D_API_VERSION_1_17, VKD3D_API_VERSION_1_18, VKD3D_API_VERSION_1_19, + VKD3D_API_VERSION_2_0, VKD3D_FORCE_32_BIT_ENUM(VKD3D_API_VERSION), }; diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index dd2a8a8afee..084bda762c0 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -62,6 +62,7 @@ enum vkd3d_shader_api_version VKD3D_SHADER_API_VERSION_1_17, VKD3D_SHADER_API_VERSION_1_18, VKD3D_SHADER_API_VERSION_1_19, + VKD3D_SHADER_API_VERSION_2_0, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_API_VERSION), }; @@ -132,6 +133,11 @@ enum vkd3d_shader_structure_type * \since 1.18 */ VKD3D_SHADER_STRUCTURE_TYPE_D3DBC_SOURCE_INFO, + /** + * The structure is a vkd3d_shader_scan_denormal_mode_info structure. + * \since 2.0 + */ + VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DENORMAL_MODE_INFO, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), }; @@ -208,7 +214,9 @@ enum vkd3d_shader_compile_option_backward_compatibility * - POSITION to SV_Position for vertex shader outputs, pixel shader inputs, * and geometry shader inputs and outputs; * - COLORN to SV_TargetN for pixel shader outputs; - * - DEPTH to SV_Depth for pixel shader outputs. + * - DEPTH to SV_Depth for pixel shader outputs; + * - VFACE to SV_IsFrontFace for pixel shader inputs; + * - VPOS to SV_Position for pixel shader inputs. */ VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES = 0x00000001, /** @@ -222,6 +230,16 @@ enum vkd3d_shader_compile_option_backward_compatibility * \since 1.14 */ VKD3D_SHADER_COMPILE_OPTION_DOUBLE_AS_FLOAT_ALIAS = 0x00000002, + /** + * Causes all uniform variables in global scope to be const. + * This includes variables declared without either 'uniform' or 'static', + * but does not include uniforms declared as arguments to the entry point. + * + * This option is disabled by default. + * + * \since 2.0 + */ + VKD3D_SHADER_COMPILE_OPTION_CONST_GLOBAL_UNIFORMS = 0x00000004, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY), }; @@ -382,6 +400,27 @@ enum vkd3d_shader_compile_option_name * \since 1.12 */ VKD3D_SHADER_COMPILE_OPTION_INCLUDE_EMPTY_BUFFERS_IN_EFFECTS = 0x0000000d, + /** + * Override the denormal mode for f16 (half) numbers. \a value is a member + * of enum vkd3d_shader_denormal_mode. + * + * \since 2.0 + */ + VKD3D_SHADER_COMPILE_OPTION_DENORMAL_MODE_F16 = 0x0000000e, + /** + * Override the denormal mode for f32 (float) numbers. \a value is a member + * of enum vkd3d_shader_denormal_mode. + * + * \since 2.0 + */ + VKD3D_SHADER_COMPILE_OPTION_DENORMAL_MODE_F32 = 0x0000000f, + /** + * Override the denormal mode for f64 (double) numbers. \a value is a member + * of enum vkd3d_shader_denormal_mode. + * + * \since 2.0 + */ + VKD3D_SHADER_COMPILE_OPTION_DENORMAL_MODE_F64 = 0x00000010, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), }; @@ -1689,6 +1728,8 @@ enum vkd3d_shader_spirv_extension VKD3D_SHADER_SPIRV_EXTENSION_EXT_VIEWPORT_INDEX_LAYER, /** \since 1.12 */ VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK, + /** \since 2.0 */ + VKD3D_SHADER_SPIRV_EXTENSION_KHR_FLOAT_CONTROLS, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SPIRV_EXTENSION), }; @@ -2415,6 +2456,46 @@ struct vkd3d_shader_scan_thread_group_size_info unsigned int x, y, z; }; +/** + * Specifies how denormal floating-point numbers should be treated. + * + * \since 2.0 + */ +enum vkd3d_shader_denormal_mode +{ + /** No particular denormal mode is requested. */ + VKD3D_SHADER_DENORMAL_MODE_ANY = 0, + /** Denormal values should be preserved. */ + VKD3D_SHADER_DENORMAL_MODE_PRESERVE = 1, + /** Denormal values should be flushed to zero. */ + VKD3D_SHADER_DENORMAL_MODE_FLUSH_TO_ZERO = 2, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_DENORMAL_MODE), +}; + +/** + * A chained structure describing how a shader expects denormal floating-point + * values to be handled. + * + * This structure extends vkd3d_shader_compile_info. + * + * \since 2.0 + */ +struct vkd3d_shader_scan_denormal_mode_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DENORMAL_MODE_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** The denormal mode for f16 (half) numbers. */ + enum vkd3d_shader_denormal_mode f16_denormal_mode; + /** The denormal mode for f32 (float) numbers. */ + enum vkd3d_shader_denormal_mode f32_denormal_mode; + /** The denormal mode for f64 (double) numbers. */ + enum vkd3d_shader_denormal_mode f64_denormal_mode; +}; + /** * A chained structure containing legacy Direct3D bytecode compilation parameters. * This structure specifies some information about the source environment that @@ -2970,6 +3051,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported * - vkd3d_shader_parameter_info * - vkd3d_shader_preprocess_info * - vkd3d_shader_scan_combined_resource_sampler_info + * - vkd3d_shader_scan_denormal_mode_info * - vkd3d_shader_scan_descriptor_info * - vkd3d_shader_scan_hull_shader_tessellation_info * - vkd3d_shader_scan_signature_info @@ -3169,6 +3251,7 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver * - vkd3d_shader_parameter_info * - vkd3d_shader_preprocess_info * - vkd3d_shader_scan_combined_resource_sampler_info + * - vkd3d_shader_scan_denormal_mode_info * - vkd3d_shader_scan_descriptor_info * - vkd3d_shader_scan_hull_shader_tessellation_info * - vkd3d_shader_scan_signature_info diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c index b5d74ca3dc7..f83f70cd55f 100644 --- a/libs/vkd3d/libs/vkd3d-common/debug.c +++ b/libs/vkd3d/libs/vkd3d-common/debug.c @@ -40,40 +40,124 @@ #define VKD3D_DEBUG_BUFFER_COUNT 64 #define VKD3D_DEBUG_BUFFER_SIZE 512 -static const char *const debug_level_names[] = +static const uint8_t VKD3D_DEBUG_DEFAULT_FLAGS = (1u << VKD3D_DEBUG_CLASS_MESSAGE) + | (1u << VKD3D_DEBUG_CLASS_ERR) | (1u << VKD3D_DEBUG_CLASS_FIXME); + +static const char *const debug_class_names[] = { - [VKD3D_DBG_LEVEL_NONE ] = "none", - [VKD3D_DBG_LEVEL_MESSAGE] = "message", - [VKD3D_DBG_LEVEL_ERR ] = "err", - [VKD3D_DBG_LEVEL_FIXME] = "fixme", - [VKD3D_DBG_LEVEL_WARN ] = "warn", - [VKD3D_DBG_LEVEL_TRACE] = "trace", + [VKD3D_DEBUG_CLASS_MESSAGE] = "message", + [VKD3D_DEBUG_CLASS_ERR ] = "err", + [VKD3D_DEBUG_CLASS_FIXME ] = "fixme", + [VKD3D_DEBUG_CLASS_WARN ] = "warn", + [VKD3D_DEBUG_CLASS_TRACE ] = "trace", }; -enum vkd3d_dbg_level vkd3d_dbg_get_level(const char *vkd3d_dbg_env_name) +static uint8_t vkd3d_debug_channel_parse_flags(struct vkd3d_debug_channel *channel, const char *str) { - static unsigned int level = ~0u; - const char *vkd3d_debug; + uint8_t flags, default_flags = VKD3D_DEBUG_DEFAULT_FLAGS; + bool channel_specifics = false; + char *opt, *next, *options; + const char *p, *name; unsigned int i; + size_t len; - if (level != ~0u) - return level; - - if (!(vkd3d_debug = getenv(vkd3d_dbg_env_name))) - vkd3d_debug = ""; + if (!(options = vkd3d_strdup(str))) + return default_flags; - for (i = 0; i < ARRAY_SIZE(debug_level_names); ++i) + for (opt = options; opt; opt = next) { - if (!strcmp(debug_level_names[i], vkd3d_debug)) + uint8_t set = 0, clear = 0; + + if ((next = strchr(opt, ','))) + *next++ = 0; + + p = opt + strcspn(opt, "+-"); + if (!*p) + p = opt; /* Assume it's a debug channel name. */ + + if (p > opt) + { + for (i = 0; i < ARRAY_SIZE(debug_class_names); ++i) + { + if (!(name = debug_class_names[i])) + continue; + if ((len = strlen(name)) != (p - opt)) + continue; + if (memcmp(opt, name, len)) + continue; + + if (*p == '+') + set |= 1u << i; + else + clear |= 1u << i; + break; + } + if (i == ARRAY_SIZE(debug_class_names)) + continue; /* Bad class name, skip it. */ + } + else { - level = i; - return level; + if (*p == '-') + clear = ~0; + else + set = ~0; + } + if (*p == '+' || *p == '-') + ++p; + if (!*p) + continue; + + if (!strcmp(p, "all")) + { + default_flags = (default_flags & ~clear) | set; + } + else if(!strcmp(p, channel->name)) + { + if (!channel_specifics) + flags = default_flags; + flags = (flags & ~clear) | set; + channel_specifics = true; } } - /* Default debug level. */ - level = VKD3D_DBG_LEVEL_FIXME; - return level; + vkd3d_free(options); + + if (!channel_specifics) + flags = default_flags; + + return flags; +} + +static void vkd3d_debug_channel_init_flags(struct vkd3d_debug_channel *channel, const char *vkd3d_dbg_env_name) +{ + const char *vkd3d_debug; + + if (!(vkd3d_debug = getenv(vkd3d_dbg_env_name))) + { + channel->flags = VKD3D_DEBUG_DEFAULT_FLAGS; + return; + } + + if (!strcmp(vkd3d_debug, "trace")) + vkd3d_debug = "+all"; + else if (!strcmp(vkd3d_debug, "warn")) + vkd3d_debug = "warn+all"; + else if (!strcmp(vkd3d_debug, "fixme")) + vkd3d_debug = ""; + else if (!strcmp(vkd3d_debug, "err")) + vkd3d_debug = "fixme-all"; + else if (!strcmp(vkd3d_debug, "none")) + vkd3d_debug = "-all"; + + channel->flags = vkd3d_debug_channel_parse_flags(channel, vkd3d_debug); +} + +uint8_t vkd3d_debug_channel_get_flags(struct vkd3d_debug_channel *channel, const char *vkd3d_dbg_env_name) +{ + if (channel->flags & (1u << VKD3D_DEBUG_CLASS_INIT)) + vkd3d_debug_channel_init_flags(channel, vkd3d_dbg_env_name); + + return channel->flags; } static PFN_vkd3d_log log_callback; @@ -106,28 +190,47 @@ static uint64_t get_pthread_threadid(void) } #endif -void vkd3d_dbg_printf(const char *vkd3d_dbg_env_name, - enum vkd3d_dbg_level level, const char *function, const char *fmt, ...) +void vkd3d_debug_channel_printf(struct vkd3d_debug_channel *channel, const char *vkd3d_dbg_env_name, + enum vkd3d_debug_class cls, const char *function, const char *fmt, ...) { + const char *cls_name = debug_class_names[cls]; va_list args; - if (vkd3d_dbg_get_level(vkd3d_dbg_env_name) < level) + if (!(vkd3d_debug_channel_get_flags(channel, vkd3d_dbg_env_name) & (1u << cls))) return; #ifdef _WIN32 - vkd3d_dbg_output("vkd3d:%04lx:%s:%s ", GetCurrentThreadId(), debug_level_names[level], function); + vkd3d_dbg_output("vkd3d:%04lx:%s:%s:%s ", GetCurrentThreadId(), cls_name, channel->name, function); #elif HAVE_GETTID - vkd3d_dbg_output("vkd3d:%u:%s:%s ", gettid(), debug_level_names[level], function); + vkd3d_dbg_output("vkd3d:%u:%s:%s:%s ", gettid(), cls_name, channel->name, function); #elif HAVE_PTHREAD_THREADID_NP - vkd3d_dbg_output("vkd3d:%"PRIu64":%s:%s ", get_pthread_threadid(), debug_level_names[level], function); + vkd3d_dbg_output("vkd3d:%"PRIu64":%s:%s:%s ", get_pthread_threadid(), cls_name, channel->name, function); #else - vkd3d_dbg_output("vkd3d:%s:%s ", debug_level_names[level], function); + vkd3d_dbg_output("vkd3d:%s:%s:%s ", cls_name, channel->name, function); #endif va_start(args, fmt); vkd3d_dbg_voutput(fmt, args); va_end(args); } +void vkd3d_debug_channel_print_text(struct vkd3d_debug_channel *channel, const char *vkd3d_dbg_env_name, + enum vkd3d_debug_class cls, const char *function, const char *text, size_t size) +{ + const char *p, *q, *end = text + size; + + if (!(vkd3d_debug_channel_get_flags(channel, vkd3d_dbg_env_name) & (1u << cls))) + return; + + for (p = text; p < end; p = q) + { + if (!(q = memchr(p, '\n', end - p))) + q = end; + else + ++q; + vkd3d_debug_channel_printf(channel, vkd3d_dbg_env_name, cls, function, "%.*s", (int)(q - p), p); + } +} + void vkd3d_dbg_set_log_callback(PFN_vkd3d_log callback) { log_callback = callback; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index f06870718ec..184bcd62948 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -27,76 +27,20 @@ #include <stdio.h> #include <math.h> -static const char * const shader_register_names[] = -{ - [VKD3DSPR_ADDR ] = "a", - [VKD3DSPR_ATTROUT ] = "oD", - [VKD3DSPR_COLOROUT ] = "oC", - [VKD3DSPR_COMBINED_SAMPLER ] = "s", - [VKD3DSPR_CONST ] = "c", - [VKD3DSPR_CONSTBOOL ] = "b", - [VKD3DSPR_CONSTBUFFER ] = "cb", - [VKD3DSPR_CONSTINT ] = "i", - [VKD3DSPR_COVERAGE ] = "vCoverage", - [VKD3DSPR_DEPTHOUT ] = "oDepth", - [VKD3DSPR_DEPTHOUTGE ] = "oDepthGE", - [VKD3DSPR_DEPTHOUTLE ] = "oDepthLE", - [VKD3DSPR_FORKINSTID ] = "vForkInstanceId", - [VKD3DSPR_FUNCTIONBODY ] = "fb", - [VKD3DSPR_FUNCTIONPOINTER ] = "fp", - [VKD3DSPR_GROUPSHAREDMEM ] = "g", - [VKD3DSPR_GSINSTID ] = "vGSInstanceID", - [VKD3DSPR_IDXTEMP ] = "x", - [VKD3DSPR_IMMCONST ] = "l", - [VKD3DSPR_IMMCONST64 ] = "d", - [VKD3DSPR_IMMCONSTBUFFER ] = "icb", - [VKD3DSPR_INCONTROLPOINT ] = "vicp", - [VKD3DSPR_INPUT ] = "v", - [VKD3DSPR_JOININSTID ] = "vJoinInstanceId", - [VKD3DSPR_LABEL ] = "l", - [VKD3DSPR_LOCALTHREADID ] = "vThreadIDInGroup", - [VKD3DSPR_LOCALTHREADINDEX ] = "vThreadIDInGroupFlattened", - [VKD3DSPR_LOOP ] = "aL", - [VKD3DSPR_NULL ] = "null", - [VKD3DSPR_OUTCONTROLPOINT ] = "vocp", - [VKD3DSPR_OUTPOINTID ] = "vOutputControlPointID", - [VKD3DSPR_OUTPUT ] = "o", - [VKD3DSPR_OUTSTENCILREF ] = "oStencilRef", - [VKD3DSPR_PARAMETER ] = "parameter", - [VKD3DSPR_PATCHCONST ] = "vpc", - [VKD3DSPR_POINT_COORD ] = "vPointCoord", - [VKD3DSPR_PREDICATE ] = "p", - [VKD3DSPR_PRIMID ] = "primID", - [VKD3DSPR_RASTERIZER ] = "rasterizer", - [VKD3DSPR_RESOURCE ] = "t", - [VKD3DSPR_SAMPLEMASK ] = "oMask", - [VKD3DSPR_SAMPLER ] = "s", - [VKD3DSPR_SSA ] = "sr", - [VKD3DSPR_STREAM ] = "m", - [VKD3DSPR_TEMP ] = "r", - [VKD3DSPR_TESSCOORD ] = "vDomainLocation", - [VKD3DSPR_TEXCRDOUT ] = "oT", - [VKD3DSPR_TEXTURE ] = "t", - [VKD3DSPR_THREADGROUPID ] = "vThreadGroupID", - [VKD3DSPR_THREADID ] = "vThreadID", - [VKD3DSPR_UAV ] = "u", - [VKD3DSPR_UNDEF ] = "undef", - [VKD3DSPR_WAVELANECOUNT ] = "vWaveLaneCount", - [VKD3DSPR_WAVELANEINDEX ] = "vWaveLaneIndex", -}; - struct vkd3d_d3d_asm_colours { const char *reset; + const char *enumerant; const char *error; + const char *label; const char *literal; const char *modifier; const char *opcode; const char *reg; const char *swizzle; + const char *type; const char *version; const char *write_mask; - const char *label; }; struct vkd3d_d3d_asm_compiler @@ -120,19 +64,31 @@ static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, en global_flag_info[] = { {VKD3DSGF_REFACTORING_ALLOWED, "refactoringAllowed"}, + {VKD3DSGF_ENABLE_DOUBLE_PRECISION_FLOAT_OPS, "enableDoublePrecisionFloatOps"}, {VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL, "forceEarlyDepthStencil"}, {VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS, "enableRawAndStructuredBuffers"}, - {VKD3DSGF_ENABLE_MINIMUM_PRECISION, "enableMinimumPrecision"}, {VKD3DSGF_SKIP_OPTIMIZATION, "skipOptimization"}, - {VKD3DSGF_ENABLE_DOUBLE_PRECISION_FLOAT_OPS, "enableDoublePrecisionFloatOps"}, + {VKD3DSGF_ENABLE_MINIMUM_PRECISION, "enableMinimumPrecision"}, {VKD3DSGF_ENABLE_11_1_DOUBLE_EXTENSIONS, "enable11_1DoubleExtensions"}, + {VKD3DSGF_ENABLE_11_1_SHADER_EXTENSIONS, "enable11_1ShaderExtensions"}, + {VKD3DSGF_BIND_FOR_DURATION, "allResourcesBound"}, + {VKD3DSGF_ENABLE_VP_AND_RT_ARRAY_INDEX, "viewportAndRTArrayIndex"}, + {VKD3DSGF_ENABLE_STENCIL_REF, "stencilRef"}, + {VKD3DSGF_ENABLE_RELAXED_TYPED_UAV_FORMATS, "UAVLoadAdditionalFormats"}, + {VKD3DSGF_ENABLE_UP_TO_64_UAVS, "64UAVs"}, + {VKD3DSGF_ENABLE_UAVS_AT_EVERY_STAGE, "UAVsAtEveryStage"}, + {VKD3DSGF_ENABLE_RASTERIZER_ORDERED_VIEWS, "ROVs"}, + {VKD3DSGF_ENABLE_WAVE_INTRINSICS, "waveOps"}, + {VKD3DSGF_ENABLE_INT64, "int64Ops"}, + {VKD3DSGF_ENABLE_NATIVE_LOW_PRECISION, "nativeLowPrecision"}, }; for (i = 0; i < ARRAY_SIZE(global_flag_info); ++i) { if (global_flags & global_flag_info[i].flag) { - vkd3d_string_buffer_printf(&compiler->buffer, "%s", global_flag_info[i].name); + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s", + compiler->colours.enumerant, global_flag_info[i].name, compiler->colours.reset); global_flags &= ~global_flag_info[i].flag; if (global_flags) vkd3d_string_buffer_printf(&compiler->buffer, " | "); @@ -140,7 +96,8 @@ static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, en } if (global_flags) - vkd3d_string_buffer_printf(&compiler->buffer, "unknown_flags(%#"PRIx64")", (uint64_t)global_flags); + vkd3d_string_buffer_printf(&compiler->buffer, "%sunknown_flags(%#"PRIx64")%s", + compiler->colours.error, (uint64_t)global_flags, compiler->colours.reset); } static void shader_dump_atomic_op_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t atomic_flags) @@ -429,7 +386,8 @@ static void shader_dump_resource_data_type(struct vkd3d_d3d_asm_compiler *compil vkd3d_string_buffer_printf(&compiler->buffer, "%s", i == 0 ? "" : ","); if (t < ARRAY_SIZE(names) && names[t]) - vkd3d_string_buffer_printf(&compiler->buffer, "%s", names[t]); + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s", + compiler->colours.type, names[t], compiler->colours.reset); else vkd3d_string_buffer_printf(&compiler->buffer, "%s<unhandled data type %#zx>%s", compiler->colours.error, t, compiler->colours.reset); @@ -483,7 +441,7 @@ static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler, } if (semantic->resource.reg.reg.type == VKD3DSPR_UAV) shader_dump_uav_flags(compiler, flags); - vkd3d_string_buffer_printf(buffer, " "); + vkd3d_string_buffer_printf(buffer, "%s ", compiler->colours.reset); shader_dump_resource_data_type(compiler, semantic->resource_data_type); vkd3d_string_buffer_printf(buffer, "%s", suffix); return; @@ -566,6 +524,37 @@ static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler, static void shader_print_src_operand(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, const struct vsir_src_operand *src, const char *suffix); +static bool f16_isfinite(uint16_t f) +{ + return (f & 0x7c00) != 0x7c00; +} + +static bool f16_signbit(uint16_t f) +{ + return f & 0x8000; +} + +static uint16_t f16_negate(uint16_t f) +{ + return f ^ 0x8000; +} + +static void shader_print_f16_literal(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, uint16_t f, const char *suffix) +{ + const char *sign = ""; + + if (f16_isfinite(f) && f16_signbit(f)) + { + sign = "-"; + f = f16_negate(f); + } + + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s", prefix, sign, compiler->colours.literal); + vkd3d_string_buffer_print_f16(&compiler->buffer, f); + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s", compiler->colours.reset, suffix); +} + static void shader_print_float_literal(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, float f, const char *suffix) { @@ -598,6 +587,25 @@ static void shader_print_double_literal(struct vkd3d_d3d_asm_compiler *compiler, vkd3d_string_buffer_printf(&compiler->buffer, "l%s%s", compiler->colours.reset, suffix); } +static void shader_print_i16_literal(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, int16_t i, const char *suffix) +{ + /* Note that we need to handle INT16_MIN here as well. */ + if (i < 0) + vkd3d_string_buffer_printf(&compiler->buffer, "%s-%s%hu%s%s", + prefix, compiler->colours.literal, -(uint16_t)i, compiler->colours.reset, suffix); + else + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%hd%s%s", + prefix, compiler->colours.literal, i, compiler->colours.reset, suffix); +} + +static void shader_print_u16_literal(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, uint16_t i, const char *suffix) +{ + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%hu%s%s", + prefix, compiler->colours.literal, i, compiler->colours.reset, suffix); +} + static void shader_print_int_literal(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, int i, const char *suffix) { @@ -689,12 +697,13 @@ static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler vkd3d_string_buffer_printf(&compiler->buffer, "*]"); } -static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, - const struct vkd3d_shader_register *reg, bool is_declaration, const char *suffix) +static void shader_print_operand(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, + const struct vsir_operand *reg, bool is_declaration, const char *suffix) { struct vkd3d_string_buffer *buffer = &compiler->buffer; unsigned int offset = reg->idx[0].offset; bool is_descriptor = false; + const char *name; static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"}; static const char * const misctype_reg_names[] = {"vPos", "vFace"}; @@ -724,8 +733,8 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const /* fall through */ default: - if (reg->type < ARRAY_SIZE(shader_register_names) && shader_register_names[reg->type]) - vkd3d_string_buffer_printf(buffer, "%s", shader_register_names[reg->type]); + if ((name = vsir_register_type_get_name(reg->type, NULL))) + vkd3d_string_buffer_printf(buffer, "%s", name); else vkd3d_string_buffer_printf(buffer, "%s<unhandled register type %#x>%s", compiler->colours.error, reg->type, compiler->colours.reset); @@ -753,15 +762,27 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const case VSIR_DIMENSION_SCALAR: switch (reg->data_type) { + case VSIR_DATA_BOOL: + shader_print_bool_literal(compiler, "", reg->u.immconst_u32[0], ""); + break; + case VSIR_DATA_F16: + shader_print_f16_literal(compiler, "", reg->u.immconst_u32[0], ""); + break; case VSIR_DATA_F32: if (untyped) shader_print_untyped_literal(compiler, "", reg->u.immconst_u32[0], ""); else shader_print_float_literal(compiler, "", reg->u.immconst_f32[0], ""); break; + case VSIR_DATA_I16: + shader_print_i16_literal(compiler, "", reg->u.immconst_u32[0], ""); + break; case VSIR_DATA_I32: shader_print_int_literal(compiler, "", reg->u.immconst_u32[0], ""); break; + case VSIR_DATA_U16: + shader_print_u16_literal(compiler, "", reg->u.immconst_u32[0], ""); + break; case VSIR_DATA_U32: shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], ""); break; @@ -775,6 +796,18 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const case VSIR_DIMENSION_VEC4: switch (reg->data_type) { + case VSIR_DATA_BOOL: + shader_print_bool_literal(compiler, "", reg->u.immconst_u32[0], ""); + shader_print_bool_literal(compiler, ", ", reg->u.immconst_u32[1], ""); + shader_print_bool_literal(compiler, ", ", reg->u.immconst_u32[2], ""); + shader_print_bool_literal(compiler, ", ", reg->u.immconst_u32[3], ""); + break; + case VSIR_DATA_F16: + shader_print_f16_literal(compiler, "", reg->u.immconst_u32[0], ""); + shader_print_f16_literal(compiler, ", ", reg->u.immconst_u32[1], ""); + shader_print_f16_literal(compiler, ", ", reg->u.immconst_u32[2], ""); + shader_print_f16_literal(compiler, ", ", reg->u.immconst_u32[3], ""); + break; case VSIR_DATA_F32: if (untyped) { @@ -791,12 +824,24 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const shader_print_float_literal(compiler, ", ", reg->u.immconst_f32[3], ""); } break; + case VSIR_DATA_I16: + shader_print_i16_literal(compiler, "", reg->u.immconst_u32[0], ""); + shader_print_i16_literal(compiler, ", ", reg->u.immconst_u32[1], ""); + shader_print_i16_literal(compiler, ", ", reg->u.immconst_u32[2], ""); + shader_print_i16_literal(compiler, ", ", reg->u.immconst_u32[3], ""); + break; case VSIR_DATA_I32: shader_print_int_literal(compiler, "", reg->u.immconst_u32[0], ""); shader_print_int_literal(compiler, ", ", reg->u.immconst_u32[1], ""); shader_print_int_literal(compiler, ", ", reg->u.immconst_u32[2], ""); shader_print_int_literal(compiler, ", ", reg->u.immconst_u32[3], ""); break; + case VSIR_DATA_U16: + shader_print_u16_literal(compiler, "", reg->u.immconst_u32[0], ""); + shader_print_u16_literal(compiler, ", ", reg->u.immconst_u32[1], ""); + shader_print_u16_literal(compiler, ", ", reg->u.immconst_u32[2], ""); + shader_print_u16_literal(compiler, ", ", reg->u.immconst_u32[3], ""); + break; case VSIR_DATA_U32: shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], ""); shader_print_uint_literal(compiler, ", ", reg->u.immconst_u32[1], ""); @@ -926,7 +971,7 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const vkd3d_string_buffer_printf(buffer, "%s", suffix); } -static void shader_print_precision(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg) +static void shader_print_precision(struct vkd3d_d3d_asm_compiler *compiler, const struct vsir_operand *reg) { struct vkd3d_string_buffer *buffer = &compiler->buffer; const char *precision; @@ -960,7 +1005,7 @@ static void shader_print_precision(struct vkd3d_d3d_asm_compiler *compiler, cons vkd3d_string_buffer_printf(buffer, " {%s%s%s}", compiler->colours.modifier, precision, compiler->colours.reset); } -static void shader_print_non_uniform(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg) +static void shader_print_non_uniform(struct vkd3d_d3d_asm_compiler *compiler, const struct vsir_operand *reg) { if (reg->non_uniform) vkd3d_string_buffer_printf(&compiler->buffer, " {%snonuniform%s}", @@ -968,7 +1013,7 @@ static void shader_print_non_uniform(struct vkd3d_d3d_asm_compiler *compiler, co } static void shader_print_reg_type(struct vkd3d_d3d_asm_compiler *compiler, - const char *prefix, const struct vkd3d_shader_register *reg, const char *suffix) + const char *prefix, const struct vsir_operand *reg, const char *suffix) { static const char *dimensions[] = { @@ -994,9 +1039,9 @@ static void shader_print_reg_type(struct vkd3d_d3d_asm_compiler *compiler, else dimension = "??"; - vkd3d_string_buffer_printf(buffer, "%s <%s", prefix, dimension); + vkd3d_string_buffer_printf(buffer, " %s<%s%s", prefix, compiler->colours.type, dimension); shader_print_data_type(compiler, reg->data_type); - vkd3d_string_buffer_printf(buffer, ">%s", suffix); + vkd3d_string_buffer_printf(buffer, "%s>%s", compiler->colours.reset, suffix); } static void shader_print_indexable_temp_data_type(struct vkd3d_d3d_asm_compiler *compiler, @@ -1046,7 +1091,7 @@ static void shader_print_dst_operand(struct vkd3d_d3d_asm_compiler *compiler, { uint32_t write_mask = dst->write_mask; - shader_print_register(compiler, prefix, &dst->reg, is_declaration, ""); + shader_print_operand(compiler, prefix, &dst->reg, is_declaration, ""); if (write_mask && dst->reg.dimension == VSIR_DIMENSION_VEC4) { @@ -1085,7 +1130,7 @@ static void shader_print_src_operand(struct vkd3d_d3d_asm_compiler *compiler, if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) is_abs = true; - shader_print_register(compiler, is_abs ? "|" : "", &src->reg, false, ""); + shader_print_operand(compiler, is_abs ? "|" : "", &src->reg, false, ""); switch (src_modifier) { @@ -1524,12 +1569,12 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, shader_print_dcl_usage(compiler, "_", &ins->declaration.semantic, ins->flags, ""); shader_dump_ins_modifiers(compiler, &ins->declaration.semantic.resource.reg); vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); - shader_print_register(compiler, " ", &ins->declaration.semantic.resource.reg.reg, true, ""); + shader_print_operand(compiler, " ", &ins->declaration.semantic.resource.reg.reg, true, ""); shader_dump_register_space(compiler, ins->declaration.semantic.resource.range.space); break; case VSIR_OP_DCL_CONSTANT_BUFFER: - shader_print_register(compiler, " ", &ins->declaration.cb.src.reg, true, ""); + shader_print_operand(compiler, " ", &ins->declaration.cb.src.reg, true, ""); if (vkd3d_shader_ver_ge(&compiler->shader_version, 6, 0)) shader_print_subscript(compiler, ins->declaration.cb.size, NULL); else if (vkd3d_shader_ver_ge(&compiler->shader_version, 5, 1)) @@ -1626,7 +1671,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, break; case VSIR_OP_DCL_SAMPLER: - shader_print_register(compiler, " ", &ins->declaration.sampler.src.reg, true, + shader_print_operand(compiler, " ", &ins->declaration.sampler.src.reg, true, ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE ? ", comparisonMode" : ""); shader_dump_register_space(compiler, ins->declaration.sampler.range.space); break; @@ -1937,16 +1982,16 @@ static void shader_print_descriptor_name(struct vkd3d_d3d_asm_compiler *compiler switch (t) { case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: - type = shader_register_names[VKD3DSPR_RESOURCE]; + type = vsir_register_type_get_name(VKD3DSPR_RESOURCE, NULL); break; case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: - type = shader_register_names[VKD3DSPR_UAV]; + type = vsir_register_type_get_name(VKD3DSPR_UAV, NULL); break; case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: - type = shader_register_names[VKD3DSPR_CONSTBUFFER]; + type = vsir_register_type_get_name(VKD3DSPR_CONSTBUFFER, NULL); break; case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: - type = shader_register_names[VKD3DSPR_SAMPLER]; + type = vsir_register_type_get_name(VKD3DSPR_SAMPLER, NULL); break; case VKD3D_SHADER_DESCRIPTOR_TYPE_FORCE_32BIT: break; @@ -2021,28 +2066,32 @@ enum vkd3d_result d3d_asm_compile(struct vsir_program *program, const struct vkd static const struct vkd3d_d3d_asm_colours no_colours = { .reset = "", + .enumerant = "", .error = "", + .label = "", .literal = "", .modifier = "", .opcode = "", .reg = "", .swizzle = "", + .type = "", .version = "", .write_mask = "", - .label = "", }; static const struct vkd3d_d3d_asm_colours colours = { .reset = "\x1b[m", + .enumerant = "\x1b[93m", .error = "\x1b[97;41m", + .label = "\x1b[91m", .literal = "\x1b[95m", .modifier = "\x1b[36m", .opcode = "\x1b[96;1m", .reg = "\x1b[96m", .swizzle = "\x1b[93m", + .type = "\x1b[92m", .version = "\x1b[36m", .write_mask = "\x1b[93m", - .label = "\x1b[91m", }; formatting = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT @@ -2090,9 +2139,9 @@ enum vkd3d_result d3d_asm_compile(struct vsir_program *program, const struct vkd if (compiler.flags & VSIR_ASM_FLAG_DUMP_DENORM_MODES) vkd3d_string_buffer_printf(buffer, ".denorm %s, %s, %s\n", - vsir_denorm_mode_get_name(program->f16_denorm_mode, "??"), - vsir_denorm_mode_get_name(program->f32_denorm_mode, "??"), - vsir_denorm_mode_get_name(program->f64_denorm_mode, "??")); + vsir_denorm_mode_get_name(program->f16_denormal_mode, "??"), + vsir_denorm_mode_get_name(program->f32_denormal_mode, "??"), + vsir_denorm_mode_get_name(program->f64_denormal_mode, "??")); if (compiler.flags & VSIR_ASM_FLAG_DUMP_SIGNATURES && (result = dump_dxbc_signatures(&compiler, program)) < 0) diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index 8784ea69e97..37d6f64964d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -560,15 +560,15 @@ static enum vkd3d_shader_register_type parse_register_type( return VKD3DSPR_INVALID; } -static void d3dbc_parse_register(struct vkd3d_shader_sm1_parser *d3dbc, - struct vkd3d_shader_register *reg, uint32_t param, struct vsir_src_operand *rel_addr) +static void d3dbc_parse_operand(struct vkd3d_shader_sm1_parser *d3dbc, + struct vsir_operand *reg, uint32_t param, struct vsir_src_operand *rel_addr) { enum vkd3d_shader_register_type reg_type; unsigned int index_offset, idx_count; reg_type = parse_register_type(d3dbc, param, &index_offset); idx_count = idx_count_from_reg_type(reg_type); - vsir_register_init(reg, reg_type, VSIR_DATA_F32, idx_count); + vsir_operand_init(reg, reg_type, VSIR_DATA_F32, idx_count); reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; reg->non_uniform = false; if (idx_count == 1) @@ -576,7 +576,7 @@ static void d3dbc_parse_register(struct vkd3d_shader_sm1_parser *d3dbc, reg->idx[0].offset = index_offset + (param & VKD3D_SM1_REGISTER_NUMBER_MASK); reg->idx[0].rel_addr = rel_addr; } - if (reg->type == VKD3DSPR_SAMPLER) + if (reg->type == VKD3DSPR_COMBINED_SAMPLER) reg->dimension = VSIR_DIMENSION_NONE; else if (reg->type == VKD3DSPR_DEPTHOUT) reg->dimension = VSIR_DIMENSION_SCALAR; @@ -592,15 +592,18 @@ static void d3dbc_parse_register(struct vkd3d_shader_sm1_parser *d3dbc, static void d3dbc_parse_src_operand(struct vkd3d_shader_sm1_parser *d3dbc, uint32_t param, struct vsir_src_operand *rel_addr, struct vsir_src_operand *src) { - d3dbc_parse_register(d3dbc, &src->reg, param, rel_addr); - src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT); + d3dbc_parse_operand(d3dbc, &src->reg, param, rel_addr); + if (src->reg.dimension == VSIR_DIMENSION_VEC4) + src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT); + else + src->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT; } static void d3dbc_parse_dst_operand(struct vkd3d_shader_sm1_parser *d3dbc, uint32_t param, struct vsir_src_operand *rel_addr, struct vsir_dst_operand *dst) { - d3dbc_parse_register(d3dbc, &dst->reg, param, rel_addr); + d3dbc_parse_operand(d3dbc, &dst->reg, param, rel_addr); dst->modifiers = (param & VKD3D_SM1_DST_MODIFIER_MASK) >> VKD3D_SM1_DST_MODIFIER_SHIFT; dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; @@ -757,8 +760,8 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, element->used_mask &= element->mask; } -static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, - const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask, uint32_t dst_modifiers) +static bool add_signature_element_from_operand(struct vkd3d_shader_sm1_parser *sm1, + const struct vsir_operand *reg, bool is_dcl, unsigned int mask, uint32_t dst_modifiers) { const struct vkd3d_shader_version *version = &sm1->program->shader_version; unsigned int register_index = reg->idx_count > 0 ? reg->idx[0].offset : 0; @@ -868,8 +871,8 @@ static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser * const struct vkd3d_shader_semantic *semantic) { const struct vkd3d_shader_version *version = &sm1->program->shader_version; - const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg; enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; + const struct vsir_operand *reg = &semantic->resource.reg.reg; unsigned int mask = semantic->resource.reg.write_mask; uint32_t modifiers = semantic->resource.reg.modifiers; bool output; @@ -897,11 +900,11 @@ static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser * else if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_TEXTURE) output = false; else /* vpos and vface don't have a semantic. */ - return add_signature_element_from_register(sm1, reg, true, mask, modifiers); + return add_signature_element_from_operand(sm1, reg, true, mask, modifiers); /* sm2 pixel shaders use DCL but don't provide a semantic. */ if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 2) - return add_signature_element_from_register(sm1, reg, true, mask, modifiers); + return add_signature_element_from_operand(sm1, reg, true, mask, modifiers); /* With the exception of vertex POSITION output, none of these are system * values. Pixel POSITION input is not equivalent to SV_Position; the closer @@ -927,42 +930,77 @@ static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, } } -static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, - const struct vkd3d_shader_register *reg, unsigned int mask, bool from_def) +static unsigned int d3dbc_get_src_register_count(enum vkd3d_shader_opcode opcode, unsigned int src_idx) { - struct vsir_program *program = sm1->program; - uint32_t register_index = reg->idx[0].offset; + switch (opcode) + { + case VSIR_OP_M3x2: + return (src_idx == 1) ? 2 : 1; + case VSIR_OP_M3x3: + case VSIR_OP_M4x3: + return (src_idx == 1) ? 3 : 1; + case VSIR_OP_M3x4: + case VSIR_OP_M4x4: + return (src_idx == 1) ? 4 : 1; + default: + return 1; + } +} - switch (reg->type) +static void d3dbc_scan_register(struct vkd3d_shader_sm1_parser *d3dbc, + const struct vsir_operand *reg0, unsigned int mask, bool from_def, unsigned int count) +{ + struct vsir_program *program = d3dbc->program; + struct vsir_operand reg = *reg0; + + for (unsigned int i = 0; i < count; ++i) { - case VKD3DSPR_TEMP: - program->temp_count = max(program->temp_count, register_index + 1); - break; + uint32_t register_index = reg.idx[0].offset; - case VKD3DSPR_CONST: - record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, register_index, from_def); - break; + switch (reg.type) + { + case VKD3DSPR_TEMP: + program->temp_count = max(program->temp_count, register_index + 1); + break; - case VKD3DSPR_CONSTINT: - record_constant_register(sm1, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, register_index, from_def); - break; + case VKD3DSPR_CONST: + record_constant_register(d3dbc, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, register_index, from_def); + break; - case VKD3DSPR_CONSTBOOL: - record_constant_register(sm1, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, register_index, from_def); - break; + case VKD3DSPR_CONSTINT: + record_constant_register(d3dbc, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, register_index, from_def); + break; - default: - break; - } + case VKD3DSPR_CONSTBOOL: + record_constant_register(d3dbc, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, register_index, from_def); + break; + + case VKD3DSPR_DEPTHOUT: + bitmap_set(program->io_dcls, VKD3DSPR_DEPTHOUT); + break; - add_signature_element_from_register(sm1, reg, false, mask, 0); + case VKD3DSPR_RASTOUT: + if (register_index == VSIR_RASTOUT_POINT_SIZE) + program->has_point_size = true; + if (register_index == VSIR_RASTOUT_FOG) + program->has_fog = true; + break; + + default: + break; + } + + add_signature_element_from_operand(d3dbc, ®, false, mask, 0); + + ++reg.idx[0].offset; + } } static void d3dbc_add_combined_sampler_descriptor(struct vkd3d_shader_sm1_parser *d3dbc, unsigned int sampler_idx, enum vkd3d_shader_resource_type resource_type) { - struct vkd3d_shader_register_range range = {.first = sampler_idx, .last = sampler_idx}; const struct vkd3d_shader_d3dbc_source_info *source_info = d3dbc->d3dbc_source_info; + struct vsir_register_range range = {.first = sampler_idx, .last = sampler_idx}; struct vsir_program *program = d3dbc->program; struct vkd3d_shader_descriptor_info1 *d; @@ -1095,18 +1133,13 @@ static void d3dbc_read_dst_operand(struct vkd3d_shader_sm1_parser *d3dbc, d3dbc_parse_src_operand(d3dbc, addr_token, NULL, dst_rel_addr); } d3dbc_parse_dst_operand(d3dbc, token, dst_rel_addr, dst); - - if (dst->reg.type == VKD3DSPR_RASTOUT && dst->reg.idx[0].offset == VSIR_RASTOUT_POINT_SIZE) - d3dbc->program->has_point_size = true; - if (dst->reg.type == VKD3DSPR_RASTOUT && dst->reg.idx[0].offset == VSIR_RASTOUT_FOG) - d3dbc->program->has_fog = true; } static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, struct vkd3d_shader_semantic *semantic) { enum vkd3d_sm1_resource_type resource_type; - struct vkd3d_shader_register_range *range; + struct vsir_register_range *range; uint32_t usage_token, dst_token; if (*ptr >= sm1->end || sm1->end - *ptr < 2) @@ -1148,6 +1181,9 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, d3dbc_add_combined_sampler_descriptor(sm1, range->first, semantic->resource_type); sm1->texture_descriptors |= (1u << range->first); } + + if (semantic->usage == VKD3D_DECL_USAGE_PSIZE) + sm1->program->has_point_size = true; } static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, @@ -1167,7 +1203,8 @@ static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const vsir_src_operand_init(src, VKD3DSPR_IMMCONST, data_type, 0); src->reg.dimension = dimension; memcpy(src->reg.u.immconst_u32, *ptr, count * sizeof(uint32_t)); - src->swizzle = VKD3D_SHADER_NO_SWIZZLE; + if (dimension == VSIR_DIMENSION_VEC4) + src->swizzle = VKD3D_SHADER_NO_SWIZZLE; *ptr += count; } @@ -1372,31 +1409,31 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str { d3dbc_read_dst_operand(sm1, &p, dst); shader_sm1_read_immconst(sm1, &p, &src[0], VSIR_DIMENSION_VEC4, VSIR_DATA_F32); - shader_sm1_scan_register(sm1, &dst->reg, dst->write_mask, true); + d3dbc_scan_register(sm1, &dst->reg, dst->write_mask, true, 1); } else if (ins->opcode == VSIR_OP_DEFB) { d3dbc_read_dst_operand(sm1, &p, dst); shader_sm1_read_immconst(sm1, &p, &src[0], VSIR_DIMENSION_SCALAR, VSIR_DATA_U32); - shader_sm1_scan_register(sm1, &dst->reg, dst->write_mask, true); + d3dbc_scan_register(sm1, &dst->reg, dst->write_mask, true, 1); } else if (ins->opcode == VSIR_OP_DEFI) { d3dbc_read_dst_operand(sm1, &p, dst); shader_sm1_read_immconst(sm1, &p, &src[0], VSIR_DIMENSION_VEC4, VSIR_DATA_I32); - shader_sm1_scan_register(sm1, &dst->reg, dst->write_mask, true); + d3dbc_scan_register(sm1, &dst->reg, dst->write_mask, true, 1); } else if (ins->opcode == VSIR_OP_TEXKILL) { /* TEXKILL, uniquely, encodes its argument as a destination, when it is * semantically a source. Since we have multiple passes which operate * generically on sources or destinations, normalize that. */ - const struct vkd3d_shader_register *reg; struct vsir_dst_operand tmp_dst; + const struct vsir_operand *reg; reg = &tmp_dst.reg; d3dbc_read_dst_operand(sm1, &p, &tmp_dst); - shader_sm1_scan_register(sm1, reg, tmp_dst.write_mask, false); + d3dbc_scan_register(sm1, reg, tmp_dst.write_mask, false, 1); vsir_src_operand_init(&src[0], reg->type, reg->data_type, reg->idx_count); src[0].reg = *reg; @@ -1411,7 +1448,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str if (ins->dst_count) { d3dbc_read_dst_operand(sm1, &p, dst); - shader_sm1_scan_register(sm1, &dst->reg, dst->write_mask, false); + d3dbc_scan_register(sm1, &dst->reg, dst->write_mask, false, 1); } /* Predication token */ @@ -1421,8 +1458,10 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str /* Other source tokens */ for (i = 0; i < ins->src_count; ++i) { + unsigned int src_reg_count = d3dbc_get_src_register_count(ins->opcode, i); + d3dbc_read_src_operand(sm1, &p, &src[i]); - shader_sm1_scan_register(sm1, &src[i].reg, mask_from_swizzle(src[i].swizzle), false); + d3dbc_scan_register(sm1, &src[i].reg, mask_from_swizzle(src[i].swizzle), false, src_reg_count); } } @@ -1527,7 +1566,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, normalisation_level)) return VKD3D_ERROR_OUT_OF_MEMORY; - program->f32_denorm_mode = VSIR_DENORM_FLUSH_TO_ZERO; + program->f32_denormal_mode = VKD3D_SHADER_DENORMAL_MODE_FLUSH_TO_ZERO; vkd3d_shader_parser_init(&sm1->p, message_context, compile_info->source_name); sm1->program = program; @@ -1588,7 +1627,7 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c for (i = 0; i < 3; ++i) { - struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; + struct vsir_register_range range = {.space = 0, .first = i, .last = i}; unsigned int size = get_external_constant_count(&sm1, i); if (size) @@ -1823,7 +1862,7 @@ static void d3dbc_write_comment(struct d3dbc_compiler *d3dbc, set_u32(buffer, offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (end - start) / sizeof(uint32_t))); } -static enum vkd3d_sm1_register_type d3dbc_register_type_from_vsir(const struct vkd3d_shader_register *reg) +static enum vkd3d_sm1_register_type d3dbc_register_type_from_vsir(const struct vsir_operand *reg) { if (reg->type == VKD3DSPR_CONST) { @@ -1844,7 +1883,7 @@ static enum vkd3d_sm1_register_type d3dbc_register_type_from_vsir(const struct v vkd3d_unreachable(); } -static uint32_t sm1_encode_register_type(const struct vkd3d_shader_register *reg) +static uint32_t sm1_encode_register_type(const struct vsir_operand *reg) { enum vkd3d_sm1_register_type sm1_type = d3dbc_register_type_from_vsir(reg); @@ -1925,6 +1964,27 @@ static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, const s | (offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); } +static void validate_register_limits(struct d3dbc_compiler *d3dbc, + const struct vsir_operand *reg, const struct vkd3d_shader_location *loc) +{ + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + uint32_t idx = reg->idx[0].offset; + + if (reg->type == VKD3DSPR_TEMP) + { + if ((version->type == VKD3D_SHADER_TYPE_PIXEL && vkd3d_shader_ver_le(version, 1, 3) && idx >= 2) + || (version->type == VKD3D_SHADER_TYPE_PIXEL && vkd3d_shader_ver_le(version, 1, 4) && idx >= 6) + || (version->type == VKD3D_SHADER_TYPE_VERTEX && vkd3d_shader_ver_le(version, 2, 0) && idx >= 12) + || idx >= 32) + { + vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Register r%u exceeds limits for shader type %#x, version %u.%u.", + idx, version->type, version->major, version->minor); + d3dbc->failed = true; + } + } +} + static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; @@ -1948,6 +2008,8 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct v for (i = 0; i < ins->dst_count; ++i) { + validate_register_limits(d3dbc, &ins->dst[i].reg, &ins->location); + if (ins->dst[i].reg.idx[0].rel_addr) { vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, @@ -1959,6 +2021,8 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct v for (i = 0; i < ins->src_count; ++i) { + validate_register_limits(d3dbc, &ins->src[i].reg, &ins->location); + src = &ins->src[i]; write_sm1_src_register(buffer, src); if (src->reg.idx_count && src->reg.idx[0].rel_addr) @@ -1976,7 +2040,7 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct v static void d3dbc_write_texkill(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { - const struct vkd3d_shader_register *reg = &ins->src[0].reg; + const struct vsir_operand *reg = &ins->src[0].reg; struct vkd3d_shader_instruction tmp; struct vsir_dst_operand dst; @@ -2019,6 +2083,30 @@ static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3 put_f32(buffer, ins->src[0].reg.u.immconst_f32[x]); } +static void d3dbc_write_vsir_defi(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) +{ + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + uint32_t token; + + const struct vsir_dst_operand reg = + { + .reg.type = VKD3DSPR_CONSTINT, + .write_mask = VKD3DSP_WRITEMASK_ALL, + .reg.idx[0].offset = ins->dst[0].reg.idx[0].offset, + .reg.idx_count = 1, + }; + + token = VKD3D_SM1_OP_DEFI; + if (version->major > 1) + token |= 5 << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; + put_u32(buffer, token); + + write_sm1_dst_register(buffer, ®); + for (unsigned int x = 0; x < 4; ++x) + put_u32(buffer, ins->src[0].reg.u.immconst_u32[x]); +} + static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, unsigned int reg_id, enum vkd3d_sm1_resource_type res_type) { @@ -2098,6 +2186,10 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str d3dbc_write_vsir_def(d3dbc, ins); break; + case VSIR_OP_DEFI: + d3dbc_write_vsir_defi(d3dbc, ins); + break; + case VSIR_OP_DCL: d3dbc_write_vsir_dcl(d3dbc, ins); break; @@ -2108,6 +2200,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str case VSIR_OP_ABS: case VSIR_OP_ADD: + case VSIR_OP_BREAK: case VSIR_OP_CMP: case VSIR_OP_DP2ADD: case VSIR_OP_DP3: @@ -2116,6 +2209,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str case VSIR_OP_DSY: case VSIR_OP_ELSE: case VSIR_OP_ENDIF: + case VSIR_OP_ENDREP: case VSIR_OP_FRC: case VSIR_OP_IFC: case VSIR_OP_MAD: @@ -2124,6 +2218,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str case VSIR_OP_MOV: case VSIR_OP_MOVA: case VSIR_OP_MUL: + case VSIR_OP_REP: case VSIR_OP_SINCOS: case VSIR_OP_SLT: case VSIR_OP_TEXLD: diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index 52448e89e37..4a087f9fe86 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -186,7 +186,6 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_ if (data_size < VKD3D_DXBC_HEADER_SIZE) { - WARN("Invalid data size %zu.\n", data_size); vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_SIZE, "DXBC size %zu is smaller than the DXBC header size.", data_size); return VKD3D_ERROR_INVALID_ARGUMENT; @@ -197,7 +196,6 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_ if (tag != TAG_DXBC) { - WARN("Wrong tag.\n"); vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_MAGIC, "Invalid DXBC magic."); return VKD3D_ERROR_INVALID_ARGUMENT; } @@ -226,7 +224,6 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_ TRACE("version: %#x.\n", version); if (version != 0x00000001) { - WARN("Got unexpected DXBC version %#x.\n", version); vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_VERSION, "DXBC version %#x is not supported.", version); return VKD3D_ERROR_INVALID_ARGUMENT; @@ -255,7 +252,6 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_ if (chunk_offset >= data_size || !require_space(chunk_offset, 2, sizeof(uint32_t), data_size)) { - WARN("Invalid chunk offset %#x (data size %zu).\n", chunk_offset, data_size); vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHUNK_OFFSET, "DXBC chunk %u has invalid offset %#x (data size %#zx).", i, chunk_offset, data_size); vkd3d_free(sections); @@ -269,8 +265,6 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_ if (!require_space(chunk_ptr - data, 1, chunk_size, data_size)) { - WARN("Invalid chunk size %#x (data size %zu, chunk offset %#x).\n", - chunk_size, data_size, chunk_offset); vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHUNK_SIZE, "DXBC chunk %u has invalid size %#x (data size %#zx, chunk offset %#x).", i, chunk_offset, data_size, chunk_offset); @@ -381,7 +375,6 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s if (!require_space(0, 2, sizeof(uint32_t), section->data.size)) { - WARN("Invalid data size %#zx.\n", section->data.size); vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_DXBC_INVALID_SIGNATURE, "Section size %zu is smaller than the minimum signature header size.", section->data.size); return VKD3D_ERROR_INVALID_ARGUMENT; @@ -395,7 +388,6 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s if (align(header_size, sizeof(uint32_t)) != header_size || i < 2 || !require_space(2, i - 2, sizeof(uint32_t), section->data.size)) { - WARN("Invalid header size %#x.\n", header_size); vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_DXBC_INVALID_SIGNATURE, "Signature header size %#x is invalid.", header_size); return VKD3D_ERROR_INVALID_ARGUMENT; @@ -524,7 +516,6 @@ static int shdr_parse_features(const struct vkd3d_shader_dxbc_section_desc *sect if (!require_space(0, 1, sizeof(uint64_t), section->data.size)) { - WARN("Invalid data size %#zx.\n", section->data.size); vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_DXBC_INVALID_CHUNK_SIZE, "SFI0 section size %zu is too small to contain flags.", section->data.size); return VKD3D_ERROR_INVALID_ARGUMENT; @@ -1323,7 +1314,6 @@ static int shader_write_root_parameters(struct root_signature_writer_context *co shader_write_root_descriptor1(buffer, &desc->u.v_1_1.parameters[i].u.descriptor); break; default: - FIXME("Unrecognized type %#x.\n", versioned_root_signature_get_parameter_type(desc, i)); vkd3d_shader_error(&context->message_context, NULL, VKD3D_SHADER_ERROR_RS_INVALID_ROOT_PARAMETER_TYPE, "Invalid/unrecognised root signature root parameter type %#x.", versioned_root_signature_get_parameter_type(desc, i)); @@ -1403,7 +1393,6 @@ static int validate_descriptor_table_v_1_0(const struct vkd3d_shader_root_descri } else { - WARN("Invalid descriptor range type %#x.\n", r->range_type); vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_RS_INVALID_DESCRIPTOR_RANGE_TYPE, "Invalid root signature descriptor range type %#x.", r->range_type); return VKD3D_ERROR_INVALID_ARGUMENT; @@ -1412,7 +1401,6 @@ static int validate_descriptor_table_v_1_0(const struct vkd3d_shader_root_descri if (have_srv_uav_cbv && have_sampler) { - WARN("Samplers cannot be mixed with CBVs/SRVs/UAVs in descriptor tables.\n"); vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_RS_MIXED_DESCRIPTOR_RANGE_TYPES, "Encountered both CBV/SRV/UAV and sampler descriptor ranges in the same root descriptor table."); return VKD3D_ERROR_INVALID_ARGUMENT; @@ -1444,7 +1432,6 @@ static int validate_descriptor_table_v_1_1(const struct vkd3d_shader_root_descri } else { - WARN("Invalid descriptor range type %#x.\n", r->range_type); vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_RS_INVALID_DESCRIPTOR_RANGE_TYPE, "Invalid root signature descriptor range type %#x.", r->range_type); return VKD3D_ERROR_INVALID_ARGUMENT; @@ -1453,7 +1440,6 @@ static int validate_descriptor_table_v_1_1(const struct vkd3d_shader_root_descri if (have_srv_uav_cbv && have_sampler) { - WARN("Samplers cannot be mixed with CBVs/SRVs/UAVs in descriptor tables.\n"); vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_RS_MIXED_DESCRIPTOR_RANGE_TYPES, "Encountered both CBV/SRV/UAV and sampler descriptor ranges in the same root descriptor table."); return VKD3D_ERROR_INVALID_ARGUMENT; @@ -1509,7 +1495,6 @@ int vkd3d_shader_serialize_root_signature(const struct vkd3d_shader_versioned_ro && root_signature->version != VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1) { ret = VKD3D_ERROR_INVALID_ARGUMENT; - WARN("Root signature version %#x not supported.\n", root_signature->version); vkd3d_shader_error(&context.message_context, NULL, VKD3D_SHADER_ERROR_RS_INVALID_VERSION, "Root signature version %#x is not supported.", root_signature->version); goto done; diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index 8e6c91f9cd7..df398883e64 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -750,7 +750,6 @@ struct sm6_value const struct sm6_type *type; enum sm6_value_type value_type; unsigned int structure_stride; - bool is_back_ref; bool non_uniform; union { @@ -868,7 +867,7 @@ struct sm6_descriptor_info { enum vkd3d_shader_descriptor_type type; unsigned int id; - struct vkd3d_shader_register_range range; + struct vsir_register_range range; enum dxil_resource_kind kind; enum vsir_data_type resource_data_type; }; @@ -911,6 +910,30 @@ struct dxil_attribute_group size_t attribute_capacity; }; +enum fixup_type +{ + FIXUP_LOAD, + FIXUP_STORE, + FIXUP_ATOMICRMW, + FIXUP_CMPXCHG, +}; + +struct fixup_data +{ + enum fixup_type type; + /* Points to the instruction just before the insertion place. */ + struct vsir_program_iterator prev_it; + size_t value_idx; + + const struct sm6_value *ptr; + const struct sm6_value *src; + const struct sm6_value *cmp; + const struct sm6_value *new; + unsigned int alignment; + enum vkd3d_shader_opcode op; + bool is_volatile; +}; + struct sm6_parser { const uint32_t *ptr, *start, *end; @@ -941,7 +964,6 @@ struct sm6_parser struct vsir_dst_operand *input_params; struct vsir_dst_operand *patch_constant_params; uint32_t io_regs_declared[VKD3D_BITMAP_SIZE(VKD3DSPR_COUNT)]; - struct vsir_src_operand *outpointid_param; struct sm6_function *functions; size_t function_count; @@ -971,6 +993,10 @@ struct sm6_parser size_t attribute_group_count; struct vkd3d_shader_parser p; + + struct fixup_data *fixups; + size_t fixup_count; + size_t fixup_capacity; }; struct dxil_abbrev_operand @@ -1702,7 +1728,6 @@ static bool dxil_record_validate_operand_min_count(const struct dxil_record *rec if (record->operand_count >= min_count) return true; - WARN("Invalid operand count %u for code %u.\n", record->operand_count, record->code); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, "Invalid operand count %u for record code %u.", record->operand_count, record->code); return false; @@ -1714,7 +1739,6 @@ static void dxil_record_validate_operand_max_count(const struct dxil_record *rec if (record->operand_count <= max_count) return; - WARN("Ignoring %u extra operands for code %u.\n", record->operand_count - max_count, record->code); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring %u extra operands for record code %u.", record->operand_count - max_count, record->code); } @@ -1795,7 +1819,7 @@ static enum vkd3d_result sm6_parser_type_table_init(struct sm6_parser *sm6) break; case TYPE_CODE_DOUBLE: - sm6->program->f64_denorm_mode = VSIR_DENORM_PRESERVE; + sm6->program->f64_denormal_mode = VKD3D_SHADER_DENORMAL_MODE_PRESERVE; dxil_record_validate_operand_max_count(record, 0, sm6); type->class = TYPE_CLASS_FLOAT; type->u.width = 64; @@ -1844,6 +1868,7 @@ static enum vkd3d_result sm6_parser_type_table_init(struct sm6_parser *sm6) break; case TYPE_CODE_HALF: + sm6->program->f16_denormal_mode = VKD3D_SHADER_DENORMAL_MODE_PRESERVE; dxil_record_validate_operand_max_count(record, 0, sm6); type->class = TYPE_CLASS_FLOAT; type->u.width = 16; @@ -2445,11 +2470,11 @@ static const struct sm6_type *sm6_parser_get_type(struct sm6_parser *sm6, uint64 { if (type_id >= sm6->type_count) { - WARN("Invalid type index %"PRIu64" at %zu.\n", type_id, sm6->value_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID, "DXIL type id %"PRIu64" is invalid.", type_id); return NULL; } + return &sm6->types[type_id]; } @@ -2786,7 +2811,6 @@ static struct vsir_src_operand *instruction_src_params_alloc(struct vkd3d_shader if (!(params = vsir_program_get_src_operands(sm6->program, count))) { - ERR("Failed to allocate src params.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, "Out of memory allocating instruction src parameters."); return NULL; @@ -2813,10 +2837,10 @@ static struct vsir_dst_operand *instruction_dst_params_alloc(struct vkd3d_shader return dst; } -static void register_init_with_id(struct vkd3d_shader_register *reg, +static void vsir_operand_init_with_id(struct vsir_operand *reg, enum vkd3d_shader_register_type reg_type, enum vsir_data_type data_type, unsigned int id) { - vsir_register_init(reg, reg_type, data_type, 1); + vsir_operand_init(reg, reg_type, data_type, 1); reg->idx[0].offset = id; } @@ -2867,49 +2891,7 @@ static enum vsir_data_type vsir_data_type_from_dxil(const struct sm6_type *type, return VSIR_DATA_U32; } -/* Based on the implementation in the OpenGL Mathematics library. */ -static uint32_t half_to_float(uint16_t value) -{ - uint32_t s = (value & 0x8000u) << 16; - uint32_t e = (value >> 10) & 0x1fu; - uint32_t m = value & 0x3ffu; - - if (!e) - { - if (!m) - { - /* Plus or minus zero */ - return s; - } - else - { - /* Denormalized number -- renormalize it */ - while (!(m & 0x400u)) - { - m <<= 1; - --e; - } - - ++e; - m &= ~0x400u; - } - } - else if (e == 31u) - { - /* Positive or negative infinity for zero 'm'. - * Nan for non-zero 'm' -- preserve sign and significand bits */ - return s | 0x7f800000u | (m << 13); - } - - /* Normalized number */ - e += 127u - 15u; - m <<= 13; - - /* Assemble s, e and m. */ - return s | (e << 23) | m; -} - -static void register_convert_to_minimum_precision(struct vkd3d_shader_register *reg) +static void vsir_operand_convert_to_minimum_precision(struct vsir_operand *reg) { unsigned int i; @@ -2921,7 +2903,9 @@ static void register_convert_to_minimum_precision(struct vkd3d_shader_register * if (reg->type == VKD3DSPR_IMMCONST) { for (i = 0; i < VSIR_DIMENSION_VEC4; ++i) - reg->u.immconst_u32[i] = half_to_float(reg->u.immconst_u32[i]); + { + reg->u.immconst_u32[i] = vkd3d_f32_from_f16(reg->u.immconst_u32[i]); + } } break; @@ -2950,10 +2934,10 @@ static void register_convert_to_minimum_precision(struct vkd3d_shader_register * } } -static void register_index_address_init(struct vkd3d_shader_register_index *idx, const struct sm6_value *address, - struct sm6_parser *sm6); +static void register_index_address_init(struct vsir_register_index *idx, + const struct sm6_value *address, struct sm6_parser *sm6); -static void vsir_register_from_dxil_value(struct vkd3d_shader_register *reg, +static void vsir_operand_from_dxil_value(struct vsir_operand *reg, const struct sm6_value *value, uint32_t type_flags, struct sm6_parser *dxil) { const struct sm6_type *scalar_type; @@ -2966,26 +2950,26 @@ static void vsir_register_from_dxil_value(struct vkd3d_shader_register *reg, { case VALUE_TYPE_SSA: VKD3D_ASSERT(sm6_type_is_numeric(value->type) || sm6_type_is_struct(value->type)); - register_init_with_id(reg, VKD3DSPR_SSA, data_type, value->u.ssa.id); + vsir_operand_init_with_id(reg, VKD3DSPR_SSA, data_type, value->u.ssa.id); reg->dimension = sm6_type_is_numeric(value->type) ? VSIR_DIMENSION_SCALAR : VSIR_DIMENSION_VEC4; break; case VALUE_TYPE_ICB: - vsir_register_init(reg, VKD3DSPR_IMMCONSTBUFFER, data_type, 2); + vsir_operand_init(reg, VKD3DSPR_IMMCONSTBUFFER, data_type, 2); reg->idx[0].offset = value->u.icb.id; register_index_address_init(®->idx[1], value->u.icb.index.index, dxil); reg->idx[1].is_in_bounds = value->u.icb.index.is_in_bounds; break; case VALUE_TYPE_IDXTEMP: - vsir_register_init(reg, VKD3DSPR_IDXTEMP, data_type, 2); + vsir_operand_init(reg, VKD3DSPR_IDXTEMP, data_type, 2); reg->idx[0].offset = value->u.idxtemp.id; register_index_address_init(®->idx[1], value->u.idxtemp.index.index, dxil); reg->idx[1].is_in_bounds = value->u.idxtemp.index.is_in_bounds; break; case VALUE_TYPE_GROUPSHAREDMEM: - vsir_register_init(reg, VKD3DSPR_GROUPSHAREDMEM, data_type, 2); + vsir_operand_init(reg, VKD3DSPR_GROUPSHAREDMEM, data_type, 2); reg->idx[0].offset = value->u.groupsharedmem.id; register_index_address_init(®->idx[1], value->u.groupsharedmem.index.index, dxil); reg->idx[1].is_in_bounds = value->u.groupsharedmem.index.is_in_bounds; @@ -2993,7 +2977,7 @@ static void vsir_register_from_dxil_value(struct vkd3d_shader_register *reg, case VALUE_TYPE_CONSTANT: VKD3D_ASSERT(sm6_type_is_numeric(value->type) || sm6_type_is_struct(value->type)); - vsir_register_init(reg, scalar_type->u.width == 64 ? VKD3DSPR_IMMCONST64 : VKD3DSPR_IMMCONST, + vsir_operand_init(reg, scalar_type->u.width == 64 ? VKD3DSPR_IMMCONST64 : VKD3DSPR_IMMCONST, data_type, 0); reg->dimension = sm6_type_is_numeric(value->type) ? VSIR_DIMENSION_SCALAR : VSIR_DIMENSION_VEC4; reg->u = value->u.constant.immconst; @@ -3001,7 +2985,7 @@ static void vsir_register_from_dxil_value(struct vkd3d_shader_register *reg, case VALUE_TYPE_UNDEFINED: case VALUE_TYPE_INVALID: - vsir_register_init(reg, VKD3DSPR_UNDEF, data_type, 0); + vsir_operand_init(reg, VKD3DSPR_UNDEF, data_type, 0); break; case VALUE_TYPE_FUNCTION: @@ -3010,7 +2994,7 @@ static void vsir_register_from_dxil_value(struct vkd3d_shader_register *reg, vkd3d_unreachable(); } - register_convert_to_minimum_precision(reg); + vsir_operand_convert_to_minimum_precision(reg); reg->non_uniform = value->non_uniform; } @@ -3036,9 +3020,9 @@ static void sm6_parser_init_ssa_value(struct sm6_parser *sm6, struct sm6_value * "The type of SSA value %u is neither numeric nor a structure.", id); } -static void register_make_constant_uint(struct vkd3d_shader_register *reg, unsigned int value) +static void vsir_operand_init_constant_u32(struct vsir_operand *reg, unsigned int value) { - vsir_register_init(reg, VKD3DSPR_IMMCONST, VSIR_DATA_U32, 0); + vsir_operand_init(reg, VKD3DSPR_IMMCONST, VSIR_DATA_U32, 0); reg->u.immconst_u32[0] = value; } @@ -3094,11 +3078,10 @@ static void src_param_init_from_value(struct vsir_src_operand *param, const struct sm6_value *src, uint32_t type_flags, struct sm6_parser *dxil) { src_param_init(param); - vsir_register_from_dxil_value(¶m->reg, src, type_flags, dxil); + vsir_operand_from_dxil_value(¶m->reg, src, type_flags, dxil); } -static void src_param_init_vector_from_reg(struct vsir_src_operand *param, - const struct vkd3d_shader_register *reg) +static void src_param_init_vector_from_reg(struct vsir_src_operand *param, const struct vsir_operand *reg) { param->swizzle = (reg->dimension == VSIR_DIMENSION_VEC4) ? VKD3D_SHADER_NO_SWIZZLE : VKD3D_SHADER_SWIZZLE(X, X, X, X); param->modifiers = VKD3DSPSM_NONE; @@ -3108,10 +3091,10 @@ static void src_param_init_vector_from_reg(struct vsir_src_operand *param, static void src_param_make_constant_uint(struct vsir_src_operand *param, unsigned int value) { src_param_init(param); - register_make_constant_uint(¶m->reg, value); + vsir_operand_init_constant_u32(¶m->reg, value); } -static void register_index_address_init(struct vkd3d_shader_register_index *idx, +static void register_index_address_init(struct vsir_register_index *idx, const struct sm6_value *address, struct sm6_parser *sm6) { struct vsir_src_operand *rel_addr; @@ -3136,7 +3119,7 @@ static void register_index_address_init(struct vkd3d_shader_register_index *idx, } static void sm6_register_from_handle(struct sm6_parser *sm6, - const struct sm6_handle_data *handle, struct vkd3d_shader_register *reg) + const struct sm6_handle_data *handle, struct vsir_operand *reg) { enum vkd3d_shader_register_type reg_type; enum vsir_data_type data_type; @@ -3167,7 +3150,7 @@ static void sm6_register_from_handle(struct sm6_parser *sm6, vkd3d_unreachable(); } - vsir_register_init(reg, reg_type, data_type, 2); + vsir_operand_init(reg, reg_type, data_type, 2); reg->dimension = VSIR_DIMENSION_VEC4; reg->idx[0].offset = handle->d->id; register_index_address_init(®->idx[1], handle->index, sm6); @@ -3177,7 +3160,7 @@ static void sm6_register_from_handle(struct sm6_parser *sm6, static void src_param_init_vector_from_handle(struct sm6_parser *sm6, struct vsir_src_operand *param, const struct sm6_handle_data *handle) { - struct vkd3d_shader_register reg; + struct vsir_operand reg; sm6_register_from_handle(sm6, handle, ®); src_param_init_vector_from_reg(param, ®); @@ -3194,7 +3177,7 @@ static bool instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instructio dst_param_init(param); sm6_parser_init_ssa_value(dxil, dst); - vsir_register_from_dxil_value(¶m->reg, dst, type_flags, dxil); + vsir_operand_from_dxil_value(¶m->reg, dst, type_flags, dxil); return true; } @@ -3210,7 +3193,7 @@ static bool instruction_dst_param_init_ssa_vector(struct vkd3d_shader_instructio dst_param_init_vector(vsir_dst, component_count); sm6_parser_init_ssa_value(dxil, dxil_dst); - vsir_register_from_dxil_value(&vsir_dst->reg, dxil_dst, 0, dxil); + vsir_operand_from_dxil_value(&vsir_dst->reg, dxil_dst, 0, dxil); return true; } @@ -3295,7 +3278,6 @@ static bool sm6_value_validate_is_register(const struct sm6_value *value, struct { if (!sm6_value_is_register(value)) { - WARN("Operand of type %u is not a register.\n", value->value_type); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "A register operand passed to a DXIL instruction is not a register."); return false; @@ -3307,7 +3289,6 @@ static bool sm6_value_validate_is_handle(const struct sm6_value *value, struct s { if (!sm6_value_is_handle(value)) { - WARN("Handle parameter of type %u is not a handle.\n", value->value_type); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCE_HANDLE, "A handle parameter passed to a DX intrinsic function is not a handle."); return false; @@ -3326,7 +3307,6 @@ static bool sm6_value_validate_is_texture_handle(const struct sm6_value *value, kind = value->u.handle.d->kind; if (!resource_kind_is_texture(kind)) { - WARN("Resource kind %u for op %u is not a texture.\n", kind, op); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCE_HANDLE, "Resource kind %u for texture operation %u is not a texture.", kind, op); return false; @@ -3346,7 +3326,6 @@ static bool sm6_value_validate_is_texture_2dms_handle(const struct sm6_value *va kind = value->u.handle.d->kind; if (!resource_kind_is_multisampled(kind)) { - WARN("Resource kind %u for op %u is not a 2DMS texture.\n", kind, op); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCE_HANDLE, "Resource kind %u for texture operation %u is not a 2DMS texture.", kind, op); return false; @@ -3366,7 +3345,6 @@ static bool sm6_value_validate_is_sampler_handle(const struct sm6_value *value, kind = value->u.handle.d->kind; if (kind != RESOURCE_KIND_SAMPLER) { - WARN("Resource kind %u for op %u is not a sampler.\n", kind, op); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCE_HANDLE, "Resource kind %u for sample operation %u is not a sampler.", kind, op); return false; @@ -3379,7 +3357,6 @@ static bool sm6_value_validate_is_pointer(const struct sm6_value *value, struct { if (!sm6_type_is_pointer(value->type)) { - WARN("Operand result type class %u is not a pointer.\n", value->type->class); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "A pointer operand passed to a DXIL instruction is not a pointer."); return false; @@ -3387,22 +3364,10 @@ static bool sm6_value_validate_is_pointer(const struct sm6_value *value, struct return true; } -static bool sm6_value_validate_is_backward_ref(const struct sm6_value *value, struct sm6_parser *sm6) -{ - if (!value->is_back_ref) - { - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, - "Forward-referenced pointer declarations are not supported."); - return false; - } - return true; -} - static bool sm6_value_validate_is_numeric(const struct sm6_value *value, struct sm6_parser *sm6) { if (!sm6_type_is_numeric(value->type)) { - WARN("Operand result type class %u is not numeric.\n", value->type->class); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "A numeric operand passed to a DXIL instruction is not numeric."); return false; @@ -3415,7 +3380,6 @@ static bool sm6_value_validate_is_bool(const struct sm6_value *value, struct sm6 const struct sm6_type *type = value->type; if (!sm6_type_is_bool(type)) { - WARN("Operand of type class %u is not bool.\n", type->class); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "A bool operand of type class %u passed to a DXIL instruction is not a bool.", type->class); return false; @@ -3427,7 +3391,6 @@ static bool sm6_value_validate_is_pointer_to_i32(const struct sm6_value *value, { if (!sm6_type_is_pointer(value->type) || !sm6_type_is_i32(value->type->u.pointer.type)) { - WARN("Operand result type %u is not a pointer to i32.\n", value->type->class); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "An int32 pointer operand passed to a DXIL instruction is not an int32 pointer."); return false; @@ -3439,7 +3402,6 @@ static bool sm6_value_validate_is_i32(const struct sm6_value *value, struct sm6_ { if (!sm6_type_is_i32(value->type)) { - WARN("Operand result type %u is not i32.\n", value->type->class); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "An int32 operand passed to a DXIL instruction is not an int32."); return false; @@ -3452,14 +3414,13 @@ static struct sm6_value *sm6_parser_get_value_safe(struct sm6_parser *sm6, unsig if (idx < sm6->value_count) return &sm6->values[idx]; - WARN("Invalid value index %u.\n", idx); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Invalid value index %u.", idx); return NULL; } static void sm6_parser_pre_init_or_validate_referenced_value(struct sm6_parser *dxil, - size_t operand, const struct sm6_type *fwd_type) + size_t operand, const struct sm6_type *fwd_type, bool expect_ssa) { struct sm6_value *value; @@ -3475,7 +3436,7 @@ static void sm6_parser_pre_init_or_validate_referenced_value(struct sm6_parser * vkd3d_shader_parser_warning(&dxil->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, "The type of a source value does not match the predefined type."); } - else + else if (expect_ssa) { value->type = fwd_type; value->value_type = VALUE_TYPE_SSA; @@ -3485,10 +3446,18 @@ static void sm6_parser_pre_init_or_validate_referenced_value(struct sm6_parser * vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE, "The type of SSA value %u is neither numeric nor a structure.", value->u.ssa.id); } + else + { + value->type = fwd_type; + + if (!sm6_type_is_pointer(value->type)) + vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE, + "A pointer forward reference doesn't have a pointer type."); + } } static size_t sm6_parser_get_value_idx_by_ref(struct sm6_parser *sm6, const struct dxil_record *record, - const struct sm6_type *fwd_type, unsigned int *rec_idx) + const struct sm6_type *fwd_type, unsigned int *rec_idx, bool expect_ssa) { unsigned int idx; uint64_t val_ref; @@ -3520,7 +3489,7 @@ static size_t sm6_parser_get_value_idx_by_ref(struct sm6_parser *sm6, const stru *rec_idx = idx; if (fwd_type) - sm6_parser_pre_init_or_validate_referenced_value(sm6, operand, fwd_type); + sm6_parser_pre_init_or_validate_referenced_value(sm6, operand, fwd_type, expect_ssa); return operand; } @@ -3551,16 +3520,25 @@ static const struct sm6_value *sm6_parser_get_value_by_rotated_signed_idx(struct if ((operand = sm6_parser_get_value_index(dxil, rotated_idx)) == SIZE_MAX) return NULL; - sm6_parser_pre_init_or_validate_referenced_value(dxil, operand, fwd_type); + sm6_parser_pre_init_or_validate_referenced_value(dxil, operand, fwd_type, true); return &dxil->values[operand]; } -static const struct sm6_value *sm6_parser_get_value_by_ref(struct sm6_parser *sm6, +static const struct sm6_value *sm6_parser_get_value_by_ref(struct sm6_parser *dxil, + const struct dxil_record *record, const struct sm6_type *type, unsigned int *rec_idx) +{ + size_t operand = sm6_parser_get_value_idx_by_ref(dxil, record, type, rec_idx, true); + + return operand == SIZE_MAX ? NULL : &dxil->values[operand]; +} + +static const struct sm6_value *sm6_parser_get_pointer_value_by_ref(struct sm6_parser *dxil, const struct dxil_record *record, const struct sm6_type *type, unsigned int *rec_idx) { - size_t operand = sm6_parser_get_value_idx_by_ref(sm6, record, type, rec_idx); - return operand == SIZE_MAX ? NULL : &sm6->values[operand]; + size_t operand = sm6_parser_get_value_idx_by_ref(dxil, record, type, rec_idx, false); + + return operand == SIZE_MAX ? NULL : &dxil->values[operand]; } static bool sm6_parser_declare_function(struct sm6_parser *sm6, const struct dxil_record *record) @@ -3640,7 +3618,6 @@ static struct sm6_index *sm6_get_value_index(struct sm6_parser *sm6, struct sm6_ return &value->u.groupsharedmem.index; default: - WARN("Cannot index into value of type %#x.\n", value->value_type); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Cannot index into value of type %#x.", value->value_type); return NULL; @@ -3669,14 +3646,12 @@ static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, c if ((value = record->operands[i + 1]) >= sm6->cur_max_value) { - WARN("Invalid value index %"PRIu64".\n", value); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Invalid value index %"PRIu64".", value); return VKD3D_ERROR_INVALID_SHADER; } else if (value == sm6->value_count) { - WARN("Invalid value self-reference at %"PRIu64".\n", value); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Invalid value self-reference for a constexpr GEP."); return VKD3D_ERROR_INVALID_SHADER; @@ -3684,15 +3659,10 @@ static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, c operands[j] = &sm6->values[value]; if (value > sm6->value_count) - { operands[j]->type = elem_type; - } else if (operands[j]->type != elem_type) - { - WARN("Type mismatch.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, "Type mismatch in constexpr GEP elements."); - } } *dst = *operands[0]; @@ -3703,7 +3673,6 @@ static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, c if (index->index) { - WARN("Unsupported stacked GEP.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "A GEP instruction on the result of a previous GEP is unsupported."); return VKD3D_ERROR_INVALID_SHADER; @@ -3711,14 +3680,12 @@ static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, c if (!sm6_value_is_constant_zero(operands[1])) { - WARN("Expected constant zero.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "The pointer dereference index for a constexpr GEP instruction is not constant zero."); return VKD3D_ERROR_INVALID_SHADER; } if (!sm6_value_is_constant(operands[2]) || !sm6_type_is_integer(operands[2]->type)) { - WARN("Element index is not constant int.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "A constexpr GEP element index is not a constant integer."); return VKD3D_ERROR_INVALID_SHADER; @@ -3727,27 +3694,20 @@ static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, c ptr_type = operands[0]->type; if (!sm6_type_is_pointer(ptr_type)) { - WARN("Constexpr GEP base value is not a pointer.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "A constexpr GEP base value is not a pointer."); return VKD3D_ERROR_INVALID_SHADER; } if (!pointee_type) - { pointee_type = ptr_type->u.pointer.type; - } else if (pointee_type != ptr_type->u.pointer.type) - { - WARN("Explicit pointee type mismatch.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, "Explicit pointee type for constexpr GEP does not match the element type."); - } offset = sm6_value_get_constant_uint(operands[2], sm6); if (!(gep_type = sm6_type_get_element_type_at_index(pointee_type, offset))) { - WARN("Failed to get element type.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Failed to get the element type of a constexpr GEP."); return VKD3D_ERROR_INVALID_SHADER; @@ -3755,7 +3715,6 @@ static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, c if (!(dst->type = sm6_type_get_pointer_to_type(gep_type, ptr_type->u.pointer.addr_space, sm6))) { - WARN("Failed to get pointer type for type %u.\n", gep_type->class); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, "Module does not define a pointer type for a constexpr GEP result."); return VKD3D_ERROR_INVALID_SHADER; @@ -3805,7 +3764,6 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const dst = sm6_parser_get_current_value(sm6); dst->type = type; - dst->is_back_ref = true; switch (record->code) { @@ -3999,7 +3957,6 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const if ((value = record->operands[0]) != CAST_BITCAST) { - WARN("Unhandled constexpr cast op %"PRIu64".\n", value); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Constexpr cast op %"PRIu64" is unhandled.", value); return VKD3D_ERROR_INVALID_SHADER; @@ -4008,7 +3965,6 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const ptr_type = sm6_parser_get_type(sm6, record->operands[1]); if (!sm6_type_is_pointer(ptr_type)) { - WARN("Constexpr cast at constant idx %zu is not a pointer.\n", value_idx); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Constexpr cast source operand is not a pointer."); return VKD3D_ERROR_INVALID_SHADER; @@ -4016,14 +3972,12 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const if ((value = record->operands[2]) >= sm6->cur_max_value) { - WARN("Invalid value index %"PRIu64".\n", value); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Invalid value index %"PRIu64".", value); return VKD3D_ERROR_INVALID_SHADER; } else if (value == value_idx) { - WARN("Invalid value self-reference at %"PRIu64".\n", value); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Invalid value self-reference for a constexpr cast."); return VKD3D_ERROR_INVALID_SHADER; @@ -4099,7 +4053,7 @@ static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const stru { enum vsir_data_type data_type = vsir_data_type_from_dxil(elem_type, 0, sm6); - if (!(sm6->program->global_flags & VKD3DSGF_FORCE_NATIVE_LOW_PRECISION)) + if (!(sm6->program->global_flags & VKD3DSGF_ENABLE_NATIVE_LOW_PRECISION)) { if (data_type == VSIR_DATA_F16) data_type = VSIR_DATA_F32; @@ -4138,7 +4092,7 @@ static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6 dst->value_type = VALUE_TYPE_GROUPSHAREDMEM; dst->u.groupsharedmem.id = sm6->tgsm_count++; dst->structure_stride = 0; - vsir_register_from_dxil_value(&ins->declaration.tgsm_raw.reg.reg, dst, 0, sm6); + vsir_operand_from_dxil_value(&ins->declaration.tgsm_raw.reg.reg, dst, 0, sm6); ins->declaration.tgsm_raw.alignment = alignment; byte_count = elem_type->u.width / CHAR_BIT; /* Convert minimum precision types to their 32-bit equivalent. */ @@ -4166,7 +4120,7 @@ static void sm6_parser_declare_tgsm_structured(struct sm6_parser *sm6, const str /* Convert minimum precision types to their 32-bit equivalent. */ if (dst->structure_stride == 2) dst->structure_stride = 4; - vsir_register_from_dxil_value(&ins->declaration.tgsm_structured.reg.reg, dst, 0, sm6); + vsir_operand_from_dxil_value(&ins->declaration.tgsm_structured.reg.reg, dst, 0, sm6); if (dst->structure_stride != 4) vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, "Structured TGSM byte stride %u is not supported.", dst->structure_stride); @@ -4222,8 +4176,6 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ if (!(type = sm6_type_get_pointer_to_type(type, address_space, sm6))) { - WARN("Failed to get pointer type for type class %u, address space %"PRIu64".\n", - type->class, address_space); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, "Module does not define a pointer type for a global variable."); return false; @@ -4233,7 +4185,6 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ { if (!sm6_type_is_pointer(type)) { - WARN("Type is not a pointer.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "The type of a global variable is not a pointer."); return false; @@ -4245,7 +4196,6 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ { if (init - 1 >= sm6->value_capacity) { - WARN("Invalid value index %"PRIu64" for initialiser.", init - 1); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Global variable initialiser value index %"PRIu64" is invalid.", init - 1); return false; @@ -4281,11 +4231,9 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ dst = sm6_parser_get_current_value(sm6); dst->type = type; - dst->is_back_ref = true; if (is_constant && !init) { - WARN("Constant array has no initialiser.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "A constant global variable has no initialiser."); return false; @@ -4302,7 +4250,6 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ { if (!sm6_type_is_numeric(scalar_type)) { - WARN("Unsupported type class %u.\n", scalar_type->class); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "TGSM variables of type class %u are not supported.", scalar_type->class); return false; @@ -4333,7 +4280,6 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init if (!(value = sm6_parser_get_value_safe(sm6, index)) || (!sm6_value_is_constant_array(value) && !sm6_value_is_undef(value))) { - WARN("Invalid initialiser index %zu.\n", index); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Global variable initialiser value index %zu is invalid.", index); return NULL; @@ -4363,7 +4309,6 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init /* Arrays of bool are not used in DXIL. dxc will emit an array of int32 instead if necessary. */ if (!(size = elem_type->u.width / CHAR_BIT)) { - WARN("Invalid data type width %u.\n", elem_type->u.width); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "An immediate constant buffer is declared with boolean elements."); return NULL; @@ -4403,7 +4348,7 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init icb->data_type = VSIR_DATA_F32; for (i = 0; i < count; ++i) { - icb->data[i] = half_to_float(elements[i]); + icb->data[i] = vkd3d_f32_from_f16(elements[i]); } break; @@ -4465,7 +4410,6 @@ static bool resolve_forward_zero_initialiser(size_t index, struct sm6_parser *sm if (!(value = sm6_parser_get_value_safe(sm6, index)) || (!sm6_value_is_constant_array(value) && !sm6_value_is_constant(value) && !sm6_value_is_undef(value))) { - WARN("Invalid initialiser index %zu.\n", index); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "TGSM initialiser value index %zu is invalid.", index); return false; @@ -4595,7 +4539,7 @@ static void dst_param_io_init(struct vsir_dst_operand *param, const struct signa param->shift = 0; /* DXIL types do not have signedness. Load signed elements as unsigned. */ component_type = e->component_type == VKD3D_SHADER_COMPONENT_INT ? VKD3D_SHADER_COMPONENT_UINT : e->component_type; - vsir_register_init(¶m->reg, reg_type, vsir_data_type_from_component_type(component_type), 0); + vsir_operand_init(¶m->reg, reg_type, vsir_data_type_from_component_type(component_type), 0); param->reg.dimension = dimension; } @@ -4636,12 +4580,15 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: *dimension = VSIR_DIMENSION_SCALAR; return VKD3DSPR_DEPTHOUTLE; + case VKD3D_SHADER_SV_STENCIL_REF: + *dimension = VSIR_DIMENSION_SCALAR; + return VKD3DSPR_OUTSTENCILREF; default: return VKD3DSPR_INVALID; } } -static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shader_signature *s, +static bool sm6_parser_init_signature(struct sm6_parser *sm6, const struct shader_signature *s, bool is_input, enum vkd3d_shader_register_type reg_type, struct vsir_dst_operand *params) { enum vkd3d_shader_type shader_type = sm6->program->shader_version.type; @@ -4695,8 +4642,8 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade { if (reg_type == VKD3DSPR_OUTPUT) { - VKD3D_ASSERT(sm6->outpointid_param); - param->reg.idx[count].rel_addr = sm6->outpointid_param; + if (!(param->reg.idx[count].rel_addr = vsir_program_create_outpointid_param(sm6->program))) + return false; } param->reg.idx[count++].offset = 0; } @@ -4708,6 +4655,8 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade param->reg.idx[count++].offset = i; param->reg.idx_count = count; } + + return true; } static int sm6_parser_init_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) @@ -4719,7 +4668,8 @@ static int sm6_parser_init_output_signature(struct sm6_parser *sm6, const struct return VKD3D_ERROR_OUT_OF_MEMORY; } - sm6_parser_init_signature(sm6, output_signature, false, VKD3DSPR_OUTPUT, sm6->output_params); + if (!sm6_parser_init_signature(sm6, output_signature, false, VKD3DSPR_OUTPUT, sm6->output_params)) + return VKD3D_ERROR_OUT_OF_MEMORY; return VKD3D_OK; } @@ -4733,7 +4683,8 @@ static int sm6_parser_init_input_signature(struct sm6_parser *sm6, const struct return VKD3D_ERROR_OUT_OF_MEMORY; } - sm6_parser_init_signature(sm6, input_signature, true, VKD3DSPR_INPUT, sm6->input_params); + if (!sm6_parser_init_signature(sm6, input_signature, true, VKD3DSPR_INPUT, sm6->input_params)) + return VKD3D_ERROR_OUT_OF_MEMORY; return VKD3D_OK; } @@ -4751,8 +4702,9 @@ static int sm6_parser_init_patch_constant_signature(struct sm6_parser *sm6, return VKD3D_ERROR_OUT_OF_MEMORY; } - sm6_parser_init_signature(sm6, patch_constant_signature, is_input, VKD3DSPR_PATCHCONST, - sm6->patch_constant_params); + if (!sm6_parser_init_signature(sm6, patch_constant_signature, is_input, + VKD3DSPR_PATCHCONST, sm6->patch_constant_params)) + return VKD3D_ERROR_OUT_OF_MEMORY; return VKD3D_OK; } @@ -4777,6 +4729,7 @@ struct function_emission_state { struct sm6_function *function; const struct dxil_record *record; + struct fixup_data *fixup; unsigned int temp_idx; }; @@ -4784,18 +4737,105 @@ static struct vkd3d_shader_instruction *sm6_parser_add_function_instruction(stru struct function_emission_state *state) { struct sm6_function *function = state->function; + struct fixup_data *fixup = state->fixup; struct vkd3d_shader_instruction *ins; - if (!(ins = shader_instruction_array_append(&function->instructions))) - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, - "Out of memory allocating instruction."); + if (fixup) + { + if (!vsir_program_iterator_insert_after(&fixup->prev_it, 1)) + { + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory allocating fixup instruction."); + return NULL; + } + + ins = vsir_program_iterator_next(&fixup->prev_it); + VKD3D_ASSERT(ins); + } + else + { + if (!(ins = shader_instruction_array_append(&function->instructions))) + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory allocating instruction."); + } return ins; } +static struct fixup_data *sm6_parser_add_fixup(struct sm6_parser *dxil, struct function_emission_state *state, + enum fixup_type type) +{ + struct fixup_data *fixup; + + if (!vkd3d_array_reserve((void **)&dxil->fixups, &dxil->fixup_capacity, + dxil->fixup_count + 1, sizeof(*dxil->fixups))) + { + vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory allocating a fixup."); + return NULL; + } + + fixup = &dxil->fixups[dxil->fixup_count++]; + + fixup->type = type; + fixup->prev_it = vsir_program_iterator(&state->function->instructions); + vsir_program_iterator_tail(&fixup->prev_it); + fixup->value_idx = dxil->value_count; + + return fixup; +} + +static void sm6_parser_fixup_load(struct sm6_parser *dxil, struct function_emission_state *state); +static void sm6_parser_fixup_store(struct sm6_parser *dxil, struct function_emission_state *state); +static void sm6_parser_fixup_atomicrmw(struct sm6_parser *dxil, struct function_emission_state *state); +static void sm6_parser_fixup_cmpxchg(struct sm6_parser *dxil, struct function_emission_state *state); + +static void sm6_parser_apply_fixups(struct sm6_parser *dxil, struct sm6_function *function) +{ + size_t i, prev_value_count = dxil->value_count; + + /* Apply the fixups in reversed order, otherwise the first ones invalidate + * iterators for the later ones. */ + + for (i = dxil->fixup_count - 1; i != SIZE_MAX; --i) + { + struct fixup_data *fixup = &dxil->fixups[i]; + struct function_emission_state state = {0}; + + state.fixup = fixup; + state.function = function; + + dxil->value_count = fixup->value_idx; + + switch (fixup->type) + { + case FIXUP_LOAD: + sm6_parser_fixup_load(dxil, &state); + break; + + case FIXUP_STORE: + sm6_parser_fixup_store(dxil, &state); + break; + + case FIXUP_ATOMICRMW: + sm6_parser_fixup_atomicrmw(dxil, &state); + break; + + case FIXUP_CMPXCHG: + sm6_parser_fixup_cmpxchg(dxil, &state); + break; + } + + dxil->program->temp_count = max(dxil->program->temp_count, state.temp_idx); + } + + dxil->value_count = prev_value_count; + dxil->fixup_count = 0; +} + static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, - const struct vkd3d_shader_register *operand_regs, unsigned int component_count, - struct function_emission_state *state, struct vkd3d_shader_register *reg); + const struct vsir_operand *operand_regs, unsigned int component_count, + struct function_emission_state *state, struct vsir_operand *reg); static void sm6_parser_emit_alloca(struct sm6_parser *sm6, struct function_emission_state *state) { @@ -4829,7 +4869,6 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, struct function_emiss if (!sm6_type_is_array(type[0]) || !sm6_type_is_numeric(elem_type = type[0]->u.array.elem_type)) { - WARN("Type is not a numeric array.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Result type of an ALLOCA instruction is not a numeric array."); return; @@ -4838,7 +4877,6 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, struct function_emiss * forward reference. We only support a constant size, so no forward ref support is needed. */ if (!sm6_type_is_integer(type[1])) { - WARN("Size operand type is not scalar integer.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "The type of the allocation size operand of an ALLOCA instruction is not scalar integer."); return; @@ -4846,7 +4884,6 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, struct function_emiss if (!(dst->type = sm6_type_get_pointer_to_type(type[0], ADDRESS_SPACE_DEFAULT, sm6))) { - WARN("Failed to get pointer type for type class %u.\n", type[0]->class); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, "Module does not define a pointer type for an ALLOCA instruction."); return; @@ -4903,47 +4940,32 @@ static enum vkd3d_shader_opcode map_dx_atomicrmw_op(uint64_t code) } } -static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, struct function_emission_state *state) +static void sm6_parser_emit_atomicrmw(struct sm6_parser *dxil, struct function_emission_state *state) { - struct sm6_value *dst = sm6_parser_get_current_value(sm6); + struct sm6_value *dst = sm6_parser_get_current_value(dxil); const struct dxil_record *record = state->record; - struct vkd3d_shader_register regs[2], reg; - struct vkd3d_shader_instruction *ins; - struct vsir_src_operand *src_params; - struct vsir_dst_operand *dst_params; - struct vkd3d_shader_register coord; const struct sm6_value *ptr, *src; enum vkd3d_shader_opcode op; + struct fixup_data *fixup; unsigned int i = 0; bool is_volatile; uint64_t code; - if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) - || !sm6_value_validate_is_pointer_to_i32(ptr, sm6) - || !sm6_value_validate_is_backward_ref(ptr, sm6)) - return; - - vsir_register_from_dxil_value(®, ptr, 0, sm6); - - if (reg.type != VKD3DSPR_GROUPSHAREDMEM) - { - WARN("Register is not groupshared.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, - "The destination register for an atomicrmw instruction is not groupshared memory."); + if (!(ptr = sm6_parser_get_pointer_value_by_ref(dxil, record, NULL, &i)) + || !sm6_value_validate_is_pointer_to_i32(ptr, dxil)) return; - } dst->type = ptr->type->u.pointer.type; - if (!(src = sm6_parser_get_value_by_ref(sm6, record, dst->type, &i))) + if (!(src = sm6_parser_get_value_by_ref(dxil, record, dst->type, &i))) return; - if (!dxil_record_validate_operand_count(record, i + 4, i + 4, sm6)) + if (!dxil_record_validate_operand_count(record, i + 4, i + 4, dxil)) return; if ((op = map_dx_atomicrmw_op(code = record->operands[i++])) == VSIR_OP_INVALID) { - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, + vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_NOT_IMPLEMENTED, "Operation %"PRIu64" for an atomicrmw instruction is unhandled.", code); return; } @@ -4959,24 +4981,57 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, struct function_em if ((code = record->operands[i]) != 1) WARN("Ignoring synchronisation scope %"PRIu64".\n", code); + sm6_parser_init_ssa_value(dxil, dst); + + if (!(fixup = sm6_parser_add_fixup(dxil, state, FIXUP_ATOMICRMW))) + return; + + fixup->ptr = ptr; + fixup->src = src; + fixup->op = op; + fixup->is_volatile = is_volatile; +} + +static void sm6_parser_fixup_atomicrmw(struct sm6_parser *dxil, struct function_emission_state *state) +{ + struct sm6_value *dst = sm6_parser_get_current_value(dxil); + struct fixup_data *fixup = state->fixup; + struct vkd3d_shader_instruction *ins; + struct vsir_src_operand *src_params; + struct vsir_dst_operand *dst_params; + const struct sm6_value *ptr, *src; + struct vsir_operand regs[2], reg; + struct vsir_operand coord; + + ptr = fixup->ptr; + src = fixup->src; + vsir_operand_from_dxil_value(®, ptr, 0, dxil); + + if (reg.type != VKD3DSPR_GROUPSHAREDMEM) + { + vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "The destination register for an atomicrmw instruction is not groupshared memory."); + return; + } + if (ptr->structure_stride) { if (reg.idx[1].rel_addr) regs[0] = reg.idx[1].rel_addr->reg; else - register_make_constant_uint(®s[0], reg.idx[1].offset); - register_make_constant_uint(®s[1], 0); - if (!sm6_parser_emit_reg_composite_construct(sm6, regs, 2, state, &coord)) + vsir_operand_init_constant_u32(®s[0], reg.idx[1].offset); + vsir_operand_init_constant_u32(®s[1], 0); + if (!sm6_parser_emit_reg_composite_construct(dxil, regs, 2, state, &coord)) return; } - if (!(ins = sm6_parser_add_function_instruction(sm6, state))) + if (!(ins = sm6_parser_add_function_instruction(dxil, state))) return; - vsir_instruction_init(ins, &sm6->p.location, op); - ins->flags = is_volatile ? VKD3DARF_VOLATILE : 0; + vsir_instruction_init(ins, &dxil->p.location, fixup->op); + ins->flags = fixup->is_volatile ? VKD3DARF_VOLATILE : 0; - if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) + if (!(src_params = instruction_src_params_alloc(ins, 2, dxil))) { vkd3d_shader_instruction_make_nop(ins); return; @@ -4985,16 +5040,16 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, struct function_em src_param_init_vector_from_reg(&src_params[0], &coord); else src_param_make_constant_uint(&src_params[0], 0); - src_param_init_from_value(&src_params[1], src, 0, sm6); + src_param_init_from_value(&src_params[1], src, 0, dxil); - sm6_parser_init_ssa_value(sm6, dst); + sm6_parser_init_ssa_value(dxil, dst); - if (!(dst_params = instruction_dst_params_alloc(ins, 2, sm6))) + if (!(dst_params = instruction_dst_params_alloc(ins, 2, dxil))) { vkd3d_shader_instruction_make_nop(ins); return; } - vsir_register_from_dxil_value(&dst_params[0].reg, dst, 0, sm6); + vsir_operand_from_dxil_value(&dst_params[0].reg, dst, 0, dxil); dst_param_init(&dst_params[0]); dst_params[1].reg = reg; @@ -5016,18 +5071,14 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty if (!is_int && !sm6_type_is_floating_point(type_a)) { - WARN("Argument type %u is not bool, int16/32/64 or floating point.\n", type_a->class); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "An argument to a binary operation is not bool, int16/32/64 or floating point."); return VSIR_OP_INVALID; } + if (type_a != type_b) - { - WARN("Type mismatch, type %u width %u vs type %u width %u.\n", type_a->class, - type_a->u.width, type_b->class, type_b->u.width); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, "Type mismatch in binary operation arguments."); - } *aux_opcode = VSIR_OP_NOP; @@ -5093,11 +5144,8 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty } if (!is_valid) - { - WARN("Invalid operation %u for type %u, width %u.\n", op, type_a->class, type_a->u.width); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_OPERATION, "Binary operation %u is invalid on type class %u, width %u.", op, type_a->class, type_a->u.width); - } return op; } @@ -5149,7 +5197,7 @@ static void sm6_parser_emit_binop(struct sm6_parser *sm6, struct function_emissi src_param_init_from_value(&src_params[0], b, DXIL_TYPE_SIGNED, sm6); dst_param_init(&dst_params[0]); - register_init_with_id(&dst_params[0].reg, VKD3DSPR_SSA, src_params[0].reg.data_type, aux_id); + vsir_operand_init_with_id(&dst_params[0].reg, VKD3DSPR_SSA, src_params[0].reg.data_type, aux_id); } if (!(ins = sm6_parser_add_function_instruction(sm6, state))) @@ -5192,15 +5240,10 @@ static void sm6_parser_emit_binop(struct sm6_parser *sm6, struct function_emissi } /* The above flags are very common and cause warning spam. */ if (flags && silence_warning) - { TRACE("Ignoring flags %#"PRIx64".\n", flags); - } else if (flags) - { - WARN("Ignoring flags %#"PRIx64".\n", flags); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring flags %#"PRIx64" for a binary operation.", flags); - } if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) { @@ -5217,7 +5260,7 @@ static void sm6_parser_emit_binop(struct sm6_parser *sm6, struct function_emissi else { src_param_init(&src_params[1]); - register_init_with_id(&src_params[1].reg, VKD3DSPR_SSA, src_params[0].reg.data_type, aux_id); + vsir_operand_init_with_id(&src_params[1].reg, VKD3DSPR_SSA, src_params[0].reg.data_type, aux_id); } dst->type = a->type; @@ -5247,6 +5290,32 @@ static bool sm6_function_validate_block_index(const struct sm6_function *functio return true; } +static bool dxil_copy_template_operand(struct sm6_parser *dxil, + struct vsir_operand *operand, const struct vsir_operand *template) +{ + *operand = *template; + + for (unsigned int i = 0; i < operand->idx_count; ++i) + { + operand->idx[i].rel_addr = NULL; + } + + for (unsigned int i = 0; i < operand->idx_count; ++i) + { + if (!template->idx[i].rel_addr) + continue; + if (!(operand->idx[i].rel_addr = vsir_program_get_src_operands(dxil->program, 1))) + { + vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory allocating indirect index for operand copy."); + return false; + } + *operand->idx[i].rel_addr = *template->idx[i].rel_addr; + } + + return true; +} + static void sm6_parser_emit_br(struct sm6_parser *dxil, struct function_emission_state *state) { const struct dxil_record *record = state->record; @@ -5314,8 +5383,8 @@ static void sm6_parser_emit_br(struct sm6_parser *dxil, struct function_emission } static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, - const struct vkd3d_shader_register *operand_regs, unsigned int component_count, - struct function_emission_state *state, struct vkd3d_shader_register *reg) + const struct vsir_operand *operand_regs, unsigned int component_count, + struct function_emission_state *state, struct vsir_operand *reg) { struct vkd3d_shader_instruction *ins; struct vsir_src_operand *src_params; @@ -5330,11 +5399,11 @@ static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, } for (i = 0; i < component_count; ++i) - all_constant &= register_is_constant(&operand_regs[i]); + all_constant &= vsir_operand_is_constant(&operand_regs[i]); if (all_constant) { - vsir_register_init(reg, VKD3DSPR_IMMCONST, operand_regs[0].data_type, 0); + vsir_operand_init(reg, VKD3DSPR_IMMCONST, operand_regs[0].data_type, 0); reg->dimension = VSIR_DIMENSION_VEC4; for (i = 0; i < component_count; ++i) reg->u.immconst_u32[i] = operand_regs[i].u.immconst_u32[0]; @@ -5343,7 +5412,7 @@ static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, return true; } - register_init_with_id(reg, VKD3DSPR_TEMP, operand_regs[0].data_type, state->temp_idx++); + vsir_operand_init_with_id(reg, VKD3DSPR_TEMP, operand_regs[0].data_type, state->temp_idx++); reg->dimension = VSIR_DIMENSION_VEC4; for (i = 0; i < component_count; ++i) @@ -5374,35 +5443,35 @@ error: } static bool sm6_parser_emit_composite_construct(struct sm6_parser *sm6, const struct sm6_value **operands, - unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg) + unsigned int component_count, struct function_emission_state *state, struct vsir_operand *reg) { - struct vkd3d_shader_register operand_regs[VKD3D_VEC4_SIZE]; + struct vsir_operand operand_regs[VKD3D_VEC4_SIZE]; unsigned int i; for (i = 0; i < component_count; ++i) { - vsir_register_from_dxil_value(&operand_regs[i], operands[i], 0, sm6); + vsir_operand_from_dxil_value(&operand_regs[i], operands[i], 0, sm6); } return sm6_parser_emit_reg_composite_construct(sm6, operand_regs, component_count, state, reg); } static bool sm6_parser_emit_coordinate_construct(struct sm6_parser *sm6, const struct sm6_value **operands, - unsigned int max_operands, const struct sm6_value *z_operand, struct function_emission_state *state, - struct vkd3d_shader_register *reg) + unsigned int max_operands, const struct sm6_value *z_operand, + struct function_emission_state *state, struct vsir_operand *reg) { - struct vkd3d_shader_register operand_regs[VKD3D_VEC4_SIZE]; + struct vsir_operand operand_regs[VKD3D_VEC4_SIZE]; unsigned int component_count; for (component_count = 0; component_count < max_operands; ++component_count) { if (!z_operand && operands[component_count]->value_type == VALUE_TYPE_UNDEFINED) break; - vsir_register_from_dxil_value(&operand_regs[component_count], operands[component_count], 0, sm6); + vsir_operand_from_dxil_value(&operand_regs[component_count], operands[component_count], 0, sm6); } if (z_operand) - vsir_register_from_dxil_value(&operand_regs[component_count++], z_operand, 0, sm6); + vsir_operand_from_dxil_value(&operand_regs[component_count++], z_operand, 0, sm6); return sm6_parser_emit_reg_composite_construct(sm6, operand_regs, component_count, state, reg); } @@ -5635,9 +5704,9 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr struct vsir_src_operand *src_params; struct vsir_dst_operand *dst_params; const struct sm6_value *resource; - struct vkd3d_shader_register reg; enum vkd3d_shader_opcode opcode; enum dxil_resource_kind kind; + struct vsir_operand reg; resource = operands[0]; if (!sm6_value_validate_is_handle(resource, sm6)) @@ -5658,14 +5727,13 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr } else { - vsir_register_from_dxil_value(®, operands[coord_idx], 0, sm6); + vsir_operand_from_dxil_value(®, operands[coord_idx], 0, sm6); } for (i = coord_idx + coord_count; i < coord_idx + 3; ++i) { if (operands[i]->value_type != VALUE_TYPE_UNDEFINED) { - WARN("Ignoring unexpected operand.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring an unexpected defined operand value for atomic instruction %u.", opcode); break; @@ -5697,7 +5765,7 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr } dst_param_init(&dst_params[0]); - vsir_register_from_dxil_value(&dst_params[0].reg, dst, 0, sm6); + vsir_operand_from_dxil_value(&dst_params[0].reg, dst, 0, sm6); dst_param_init(&dst_params[1]); sm6_register_from_handle(sm6, &resource->u.handle, &dst_params[1].reg); } @@ -5771,7 +5839,7 @@ static void sm6_parser_emit_dx_calculate_lod(struct sm6_parser *sm6, enum dx_int const struct sm6_value *resource, *sampler; struct vkd3d_shader_instruction *ins; struct vsir_src_operand *src_params; - struct vkd3d_shader_register coord; + struct vsir_operand coord; unsigned int clamp; resource = operands[0]; @@ -5841,7 +5909,7 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr if (data_type_is_64_bit(src_param->reg.data_type)) src_param->swizzle = vsir_swizzle_64_from_32(src_param->swizzle); else - register_convert_to_minimum_precision(&src_param->reg); + vsir_operand_convert_to_minimum_precision(&src_param->reg); if (!instruction_dst_param_init_ssa_vector(ins, sm6_type_max_vector_size(type), sm6)) vkd3d_shader_instruction_make_nop(ins); @@ -5859,7 +5927,7 @@ static void sm6_parser_dcl_register_builtin(struct sm6_parser *dxil, enum vkd3d_ if (!(ins = sm6_parser_add_instruction(dxil, opcode))) return; dst_param = &ins->declaration.dst; - vsir_register_init(&dst_param->reg, reg_type, data_type, 0); + vsir_operand_init(&dst_param->reg, reg_type, data_type, 0); dst_param_init_vector(dst_param, component_count); } } @@ -5882,7 +5950,7 @@ static void sm6_parser_emit_dx_input_register_mov(struct sm6_parser *dxil, struc } sm6_parser_dcl_register_builtin(dxil, VSIR_OP_DCL_INPUT, reg_type, data_type, 1); - vsir_register_init(&src_param->reg, reg_type, data_type, 0); + vsir_operand_init(&src_param->reg, reg_type, data_type, 0); if (!scalar) src_param->reg.dimension = VSIR_DIMENSION_VEC4; src_param_init(src_param); @@ -5969,7 +6037,7 @@ static void sm6_parser_emit_dx_stream(struct sm6_parser *dxil, enum dx_intrinsic vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Output stream index %u is invalid.", i); - register_init_with_id(&src_param->reg, VKD3DSPR_STREAM, VSIR_DATA_UNUSED, i); + vsir_operand_init_with_id(&src_param->reg, VKD3DSPR_STREAM, VSIR_DATA_UNUSED, i); src_param_init(src_param); if (op == DX_EMIT_THEN_CUT_STREAM) @@ -6022,7 +6090,7 @@ static void sm6_parser_emit_dx_domain_location(struct sm6_parser *dxil, enum dx_ } sm6_parser_dcl_register_builtin(dxil, VSIR_OP_DCL_INPUT, VKD3DSPR_TESSCOORD, VSIR_DATA_F32, 3); - vsir_register_init(&src_param->reg, VKD3DSPR_TESSCOORD, VSIR_DATA_F32, 0); + vsir_operand_init(&src_param->reg, VKD3DSPR_TESSCOORD, VSIR_DATA_F32, 0); src_param->reg.dimension = VSIR_DIMENSION_VEC4; src_param_init_scalar(src_param, component_idx); @@ -6034,10 +6102,10 @@ static void sm6_parser_emit_dx_dot(struct sm6_parser *dxil, enum dx_intrinsic_op const struct sm6_value **operands, struct function_emission_state *state) { struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_register regs[2]; struct vsir_src_operand *src_params; enum vkd3d_shader_opcode opcode; unsigned int component_count; + struct vsir_operand regs[2]; switch (op) { @@ -6120,7 +6188,11 @@ static void sm6_parser_emit_dx_eval_attrib(struct sm6_parser *sm6, enum dx_intri return; } - src_params[0].reg = sm6->input_params[row_index].reg; + if (!(dxil_copy_template_operand(sm6, &src_params[0].reg, &sm6->input_params[row_index].reg))) + { + vkd3d_shader_instruction_make_nop(ins); + return; + } src_param_init_scalar(&src_params[0], column_index); if (e->register_count > 1) register_index_address_init(&src_params[0].reg.idx[0], operands[1], sm6); @@ -6195,7 +6267,7 @@ static void sm6_parser_emit_dx_compute_builtin(struct sm6_parser *dxil, enum dx_ return; } - vsir_register_init(&src_param->reg, reg_type, VSIR_DATA_U32, 0); + vsir_operand_init(&src_param->reg, reg_type, VSIR_DATA_U32, 0); src_param->reg.dimension = VSIR_DIMENSION_VEC4; if (component_count > 1) component_idx = sm6_value_get_constant_uint(operands[0], dxil); @@ -6442,7 +6514,11 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin return; } - src_param->reg = params[row_index].reg; + if (!(dxil_copy_template_operand(sm6, &src_param->reg, ¶ms[row_index].reg))) + { + vkd3d_shader_instruction_make_nop(ins); + return; + } src_param_init_scalar(src_param, column_index); count = 0; @@ -6464,7 +6540,7 @@ static void sm6_parser_emit_dx_make_double(struct sm6_parser *dxil, enum dx_intr { struct vkd3d_shader_instruction *ins; struct vsir_src_operand *src_params; - struct vkd3d_shader_register reg; + struct vsir_operand reg; if (!sm6_parser_emit_composite_construct(dxil, &operands[0], 2, state, ®)) return; @@ -6604,8 +6680,8 @@ static void sm6_parser_emit_dx_raw_buffer_store(struct sm6_parser *sm6, enum dx_ struct vkd3d_shader_instruction *ins; struct vsir_src_operand *src_params; struct vsir_dst_operand *dst_param; - struct vkd3d_shader_register data; const struct sm6_value *resource; + struct vsir_operand data; bool raw; resource = operands[0]; @@ -6719,9 +6795,9 @@ static void sm6_parser_emit_dx_buffer_store(struct sm6_parser *sm6, enum dx_intr unsigned int write_mask, component_count; struct vkd3d_shader_instruction *ins; struct vsir_src_operand *src_params; - struct vkd3d_shader_register texel; struct vsir_dst_operand *dst_param; const struct sm6_value *resource; + struct vsir_operand texel; resource = operands[0]; if (!sm6_value_validate_is_handle(resource, sm6)) @@ -6801,7 +6877,7 @@ static void sm6_parser_emit_dx_get_sample_count(struct sm6_parser *dxil, enum dx return; } - vsir_register_init(&src_param->reg, VKD3DSPR_RASTERIZER, VSIR_DATA_F32, 0); + vsir_operand_init(&src_param->reg, VKD3DSPR_RASTERIZER, VSIR_DATA_F32, 0); src_param->reg.dimension = VSIR_DIMENSION_VEC4; src_param_init(src_param); @@ -6847,7 +6923,7 @@ static void sm6_parser_emit_dx_get_sample_pos(struct sm6_parser *dxil, enum dx_i else { src_param_init_vector(&src_params[0], 2); - vsir_register_init(&src_params[0].reg, VKD3DSPR_RASTERIZER, VSIR_DATA_F32, 0); + vsir_operand_init(&src_params[0].reg, VKD3DSPR_RASTERIZER, VSIR_DATA_F32, 0); src_params[0].reg.dimension = VSIR_DIMENSION_VEC4; src_param_init_from_value(&src_params[1], operands[0], 0, dxil); } @@ -6876,10 +6952,10 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *dxil, enum dx_intrinsic const struct sm6_value **operands, struct function_emission_state *state) { unsigned int clamp_idx = 0, component_count = VKD3D_VEC4_SIZE; - struct vkd3d_shader_register coord, ddx, ddy; const struct sm6_value *resource, *sampler; struct vkd3d_shader_instruction *ins; struct vsir_src_operand *src_params; + struct vsir_operand coord, ddx, ddy; resource = operands[0]; sampler = operands[1]; @@ -7009,7 +7085,11 @@ static void sm6_parser_emit_dx_sample_index(struct sm6_parser *dxil, enum dx_int return; } - src_param->reg = dxil->input_params[element_idx].reg; + if (!(dxil_copy_template_operand(dxil, &src_param->reg, &dxil->input_params[element_idx].reg))) + { + vkd3d_shader_instruction_make_nop(ins); + return; + } src_param_init(src_param); if (!instruction_dst_param_init_ssa_scalar(ins, 0, dxil)) @@ -7070,6 +7150,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *dxil, enum dx_int const struct shader_signature *signature; struct vkd3d_shader_instruction *ins; unsigned int row_index, column_index; + const struct vsir_operand *template; struct vsir_src_operand *src_param; struct vsir_dst_operand *dst_param; const struct signature_element *e; @@ -7115,8 +7196,12 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *dxil, enum dx_int } dst_param_init_scalar(dst_param, column_index); - dst_param->reg = is_patch_constant ? dxil->patch_constant_params[row_index].reg - : dxil->output_params[row_index].reg; + template = is_patch_constant ? &dxil->patch_constant_params[row_index].reg : &dxil->output_params[row_index].reg; + if (!(dxil_copy_template_operand(dxil, &dst_param->reg, template))) + { + vkd3d_shader_instruction_make_nop(ins); + return; + } if (e->register_count > 1) register_index_address_init(&dst_param->reg.idx[0], operands[1], dxil); @@ -7136,10 +7221,10 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *dxil, enum dx_int static void sm6_parser_emit_dx_texture_gather(struct sm6_parser *dxil, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { - struct vkd3d_shader_register coord, offset; const struct sm6_value *resource, *sampler; struct vkd3d_shader_instruction *ins; struct vsir_src_operand *src_params; + struct vsir_operand coord, offset; unsigned int swizzle; bool extended_offset; @@ -7210,9 +7295,9 @@ static void sm6_parser_emit_dx_texture_load(struct sm6_parser *dxil, enum dx_int const struct sm6_value *resource, *mip_level_or_sample_count; struct vkd3d_shader_instruction *ins; struct vsir_src_operand *src_params; - struct vkd3d_shader_register coord; enum dxil_resource_kind kind; bool is_multisample, is_uav; + struct vsir_operand coord; unsigned int i; resource = operands[0]; @@ -7256,12 +7341,12 @@ static void sm6_parser_emit_dx_texture_load(struct sm6_parser *dxil, enum dx_int static void sm6_parser_emit_dx_texture_store(struct sm6_parser *dxil, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { - struct vkd3d_shader_register coord, texel; unsigned int write_mask, component_count; struct vkd3d_shader_instruction *ins; struct vsir_src_operand *src_params; struct vsir_dst_operand *dst_param; const struct sm6_value *resource; + struct vsir_operand coord, texel; resource = operands[0]; if (!sm6_value_validate_is_texture_handle(resource, op, dxil)) @@ -7347,7 +7432,6 @@ static enum vkd3d_shader_opcode sm6_dx_map_wave_bit_op(enum dxil_wave_bit_op_kin case WAVE_BIT_OP_XOR: return VSIR_OP_WAVE_ACTIVE_BIT_XOR; default: - FIXME("Unhandled wave bit op %u.\n", op); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, "Wave bit operation %u is unhandled.", op); return VSIR_OP_INVALID; @@ -7402,7 +7486,6 @@ static enum vkd3d_shader_opcode sm6_dx_map_wave_op(enum dxil_wave_op_kind op, bo return VSIR_OP_WAVE_OP_MAX; return is_signed ? VSIR_OP_WAVE_OP_IMAX : VSIR_OP_WAVE_OP_UMAX; default: - FIXME("Unhandled wave op %u.\n", op); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, "Wave operation %u is unhandled.", op); return VSIR_OP_INVALID; @@ -7683,7 +7766,6 @@ static bool sm6_parser_validate_dx_op(struct sm6_parser *sm6, enum dx_intrinsic_ const struct sm6_value *value = operands[i]; if (!sm6_parser_validate_operand_type(sm6, value, info->operand_info[i], dst->type)) { - WARN("Failed to validate operand %u for dx intrinsic id %u, '%s'.\n", i + 1, op, name); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Operand %u for call to dx intrinsic function '%s' is invalid.", i + 1, name); return false; @@ -7691,7 +7773,6 @@ static bool sm6_parser_validate_dx_op(struct sm6_parser *sm6, enum dx_intrinsic_ } if (info->operand_info[operand_count]) { - WARN("Missing operands for dx intrinsic id %u, '%s'.\n", op, name); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, "Call to dx intrinsic function '%s' has missing operands.", name); return false; @@ -7750,7 +7831,6 @@ static void sm6_parser_emit_call(struct sm6_parser *sm6, struct function_emissio return; if (!sm6_value_is_function_dcl(fn_value)) { - WARN("Function target value is not a function declaration.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Function call target value is not a function declaration."); return; @@ -7766,7 +7846,6 @@ static void sm6_parser_emit_call(struct sm6_parser *sm6, struct function_emissio operand_count = type->u.function->param_count; if (operand_count > ARRAY_SIZE(operands)) { - WARN("Ignoring %zu operands.\n", operand_count - ARRAY_SIZE(operands)); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring %zu operands for function call.", operand_count - ARRAY_SIZE(operands)); operand_count = ARRAY_SIZE(operands); @@ -7778,15 +7857,11 @@ static void sm6_parser_emit_call(struct sm6_parser *sm6, struct function_emissio return; } if ((j = record->operand_count - i)) - { - WARN("Ignoring %u operands beyond the function parameter list.\n", j); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring %u function call operands beyond the parameter list.", j); - } if (!fn_value->u.function.is_prototype) { - FIXME("Unhandled call to local function.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Call to a local function is unsupported."); return; @@ -7796,7 +7871,6 @@ static void sm6_parser_emit_call(struct sm6_parser *sm6, struct function_emissio if (!operand_count) { - WARN("Missing dx intrinsic function id.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, "The id for a dx intrinsic function is missing."); return; @@ -7805,7 +7879,6 @@ static void sm6_parser_emit_call(struct sm6_parser *sm6, struct function_emissio op_value = operands[0]; if (!sm6_value_is_constant(op_value) || !sm6_type_is_integer(op_value->type)) { - WARN("dx intrinsic function id is not a constant int.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Expected a constant integer dx intrinsic function id."); return; @@ -8130,27 +8203,14 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *dxil, struct function_emi const struct dxil_record *record = state->record; uint64_t success_ordering, failure_ordering; const struct sm6_value *ptr, *cmp, *new; - struct vkd3d_shader_instruction *ins; - struct vsir_src_operand *src_params; - struct vsir_dst_operand *dst_params; - struct vkd3d_shader_register reg; + struct fixup_data *fixup; unsigned int i = 0; bool is_volatile; uint64_t code; - if (!(ptr = sm6_parser_get_value_by_ref(dxil, record, NULL, &i)) - || !sm6_value_validate_is_pointer_to_i32(ptr, dxil) - || !sm6_value_validate_is_backward_ref(ptr, dxil)) - return; - - vsir_register_from_dxil_value(®, ptr, 0, dxil); - - if (reg.type != VKD3DSPR_GROUPSHAREDMEM) - { - vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, - "The destination register for a cmpxchg instruction is not groupshared memory."); + if (!(ptr = sm6_parser_get_pointer_value_by_ref(dxil, record, NULL, &i)) + || !sm6_value_validate_is_pointer_to_i32(ptr, dxil)) return; - } if (!(dst->type = sm6_type_get_cmpxchg_result_struct(dxil))) { @@ -8190,11 +8250,44 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *dxil, struct function_emi if (record->operand_count > i && record->operands[i]) FIXME("Ignoring weak cmpxchg.\n"); + sm6_parser_init_ssa_value(dxil, dst); + + if (!(fixup = sm6_parser_add_fixup(dxil, state, FIXUP_CMPXCHG))) + return; + + fixup->ptr = ptr; + fixup->cmp = cmp; + fixup->new = new; + fixup->is_volatile = is_volatile; +} + +static void sm6_parser_fixup_cmpxchg(struct sm6_parser *dxil, struct function_emission_state *state) +{ + struct sm6_value *dst = sm6_parser_get_current_value(dxil); + const struct sm6_value *ptr, *cmp, *new; + struct fixup_data *fixup = state->fixup; + struct vkd3d_shader_instruction *ins; + struct vsir_src_operand *src_params; + struct vsir_dst_operand *dst_params; + struct vsir_operand reg; + + ptr = fixup->ptr; + cmp = fixup->cmp; + new = fixup->new; + vsir_operand_from_dxil_value(®, ptr, 0, dxil); + + if (reg.type != VKD3DSPR_GROUPSHAREDMEM) + { + vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "The destination register for a cmpxchg instruction is not groupshared memory."); + return; + } + if (!(ins = sm6_parser_add_function_instruction(dxil, state))) return; vsir_instruction_init(ins, &dxil->p.location, VSIR_OP_IMM_ATOMIC_CMP_EXCH); - ins->flags = is_volatile ? VKD3DARF_VOLATILE : 0; + ins->flags = fixup->is_volatile ? VKD3DARF_VOLATILE : 0; if (!(src_params = instruction_src_params_alloc(ins, 3, dxil))) { @@ -8214,7 +8307,7 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *dxil, struct function_emi return; } - vsir_register_from_dxil_value(&dst_params[0].reg, dst, 0, dxil); + vsir_operand_from_dxil_value(&dst_params[0].reg, dst, 0, dxil); dst_param_init(&dst_params[0]); dst_params[1].reg = reg; dst_param_init(&dst_params[1]); @@ -8278,7 +8371,7 @@ static void sm6_parser_emit_extractval(struct sm6_parser *dxil, struct function_ return; } - vsir_register_from_dxil_value(&src_param->reg, src, 0, dxil); + vsir_operand_from_dxil_value(&src_param->reg, src, 0, dxil); src_param_init_scalar(src_param, elem_idx); if (!instruction_dst_param_init_ssa_scalar(ins, 0, dxil)) @@ -8376,18 +8469,15 @@ static void sm6_parser_emit_load(struct sm6_parser *dxil, struct function_emissi struct sm6_value *dst = sm6_parser_get_current_value(dxil); const struct sm6_type *elem_type = NULL, *pointee_type; const struct dxil_record *record = state->record; - unsigned int alignment, operand_count, i = 0; - struct vkd3d_shader_instruction *ins; - struct vsir_src_operand *src_params; - struct vkd3d_shader_register reg; + unsigned int alignment, i = 0; const struct sm6_value *ptr; + struct fixup_data *fixup; uint64_t alignment_code; - if (!(ptr = sm6_parser_get_value_by_ref(dxil, record, NULL, &i))) + if (!(ptr = sm6_parser_get_pointer_value_by_ref(dxil, record, NULL, &i))) return; if (!sm6_value_validate_is_register(ptr, dxil) || !sm6_value_validate_is_pointer(ptr, dxil) - || !sm6_value_validate_is_backward_ref(ptr, dxil) || !dxil_record_validate_operand_count(record, i + 2, i + 3, dxil)) return; @@ -8412,7 +8502,26 @@ static void sm6_parser_emit_load(struct sm6_parser *dxil, struct function_emissi if (record->operands[i]) WARN("Ignoring volatile modifier.\n"); - vsir_register_from_dxil_value(®, ptr, 0, dxil); + sm6_parser_init_ssa_value(dxil, dst); + + if (!(fixup = sm6_parser_add_fixup(dxil, state, FIXUP_LOAD))) + return; + + fixup->ptr = ptr; + fixup->alignment = alignment; +} + +static void sm6_parser_fixup_load(struct sm6_parser *dxil, struct function_emission_state *state) +{ + struct fixup_data *fixup = state->fixup; + struct vkd3d_shader_instruction *ins; + struct vsir_src_operand *src_params; + const struct sm6_value *ptr; + unsigned int operand_count; + struct vsir_operand reg; + + ptr = fixup->ptr; + vsir_operand_from_dxil_value(®, ptr, 0, dxil); if (!(ins = sm6_parser_add_function_instruction(dxil, state))) return; @@ -8435,7 +8544,7 @@ static void sm6_parser_emit_load(struct sm6_parser *dxil, struct function_emissi /* Struct offset is always zero as there is no struct, just an array. */ src_param_make_constant_uint(&src_params[1], 0); src_param_init_from_value(&src_params[2], ptr, 0, dxil); - src_params[2].reg.alignment = alignment; + src_params[2].reg.alignment = fixup->alignment; /* The offset is already in src_params[0]. */ src_params[2].reg.idx_count = 1; } @@ -8453,7 +8562,7 @@ static void sm6_parser_emit_load(struct sm6_parser *dxil, struct function_emissi if (operand_count > 1) src_param_make_constant_uint(&src_params[0], 0); src_param_init_from_value(&src_params[operand_count - 1], ptr, 0, dxil); - src_params[operand_count - 1].reg.alignment = alignment; + src_params[operand_count - 1].reg.alignment = fixup->alignment; } if (!instruction_dst_param_init_ssa_scalar(ins, 0, dxil)) @@ -8597,18 +8706,14 @@ static void sm6_parser_emit_ret(struct sm6_parser *dxil, struct function_emissio static void sm6_parser_emit_store(struct sm6_parser *dxil, struct function_emission_state *state) { const struct dxil_record *record = state->record; - unsigned int i = 0, alignment, operand_count; - struct vkd3d_shader_instruction *ins; - struct vsir_src_operand *src_params; - struct vsir_dst_operand *dst_param; const struct sm6_value *ptr, *src; - struct vkd3d_shader_register reg; + unsigned int i = 0, alignment; + struct fixup_data *fixup; uint64_t alignment_code; - if (!(ptr = sm6_parser_get_value_by_ref(dxil, record, NULL, &i)) + if (!(ptr = sm6_parser_get_pointer_value_by_ref(dxil, record, NULL, &i)) || !sm6_value_validate_is_register(ptr, dxil) - || !sm6_value_validate_is_pointer(ptr, dxil) - || !sm6_value_validate_is_backward_ref(ptr, dxil)) + || !sm6_value_validate_is_pointer(ptr, dxil)) return; /* Forward-referenced sources are stored as value/type pairs, even @@ -8632,7 +8737,27 @@ static void sm6_parser_emit_store(struct sm6_parser *dxil, struct function_emiss if (record->operands[i]) WARN("Ignoring volatile modifier.\n"); - vsir_register_from_dxil_value(®, ptr, 0, dxil); + if (!(fixup = sm6_parser_add_fixup(dxil, state, FIXUP_STORE))) + return; + + fixup->ptr = ptr; + fixup->src = src; + fixup->alignment = alignment; +} + +static void sm6_parser_fixup_store(struct sm6_parser *dxil, struct function_emission_state *state) +{ + struct fixup_data *fixup = state->fixup; + struct vkd3d_shader_instruction *ins; + struct vsir_src_operand *src_params; + struct vsir_dst_operand *dst_param; + const struct sm6_value *ptr, *src; + unsigned int operand_count; + struct vsir_operand reg; + + ptr = fixup->ptr; + src = fixup->src; + vsir_operand_from_dxil_value(®, ptr, 0, dxil); if (!(ins = sm6_parser_add_function_instruction(dxil, state))) return; @@ -8680,7 +8805,7 @@ static void sm6_parser_emit_store(struct sm6_parser *dxil, struct function_emiss dst_param_init(dst_param); dst_param->reg = reg; - dst_param->reg.alignment = alignment; + dst_param->reg.alignment = fixup->alignment; /* Groupshared stores contain the address in the src params. */ if (dst_param->reg.type != VKD3DSPR_IDXTEMP) dst_param->reg.idx_count = 1; @@ -8924,14 +9049,12 @@ static void sm6_parser_metadata_attachment_block_init(struct sm6_parser *sm6, co record = block->records[i]; if (record->code != METADATA_ATTACHMENT) { - WARN("Ignoring record with code %u.\n", record->code); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_ATTACHMENT, "Ignoring a metadata attachment record with code %u.", record->code); continue; } if (!(record->operand_count & 1)) { - WARN("Ignoring function attachment.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_ATTACHMENT, "Ignoring a metadata function attachment."); continue; @@ -8940,7 +9063,6 @@ static void sm6_parser_metadata_attachment_block_init(struct sm6_parser *sm6, co index = record->operands[0]; if (!target_block->record_count || index >= target_block->record_count - 1) { - WARN("Invalid record index %"PRIu64".\n", index); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Invalid record index %"PRIu64" for a metadata attachment.", index); continue; @@ -8948,11 +9070,8 @@ static void sm6_parser_metadata_attachment_block_init(struct sm6_parser *sm6, co /* 'index' is an instruction index, but records[0] is DECLAREBLOCKS, not an instruction. */ target_record = target_block->records[index + 1]; if (target_record->attachment) - { - WARN("Overwriting record attachment.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_ATTACHMENT, "The target record for a metadata attachment already has an attachment."); - } target_record->attachment = record; } } @@ -9004,7 +9123,6 @@ static bool metadata_node_get_unary_uint(const struct sm6_metadata_node *node, u { if (node->operand_count != 1) { - FIXME("Ignoring node with %u operands.\n", node->operand_count); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_ATTACHMENT, "Ignoring metadata attachment node with %u operands; expected unary.", node->operand_count); return false; @@ -9012,7 +9130,6 @@ static bool metadata_node_get_unary_uint(const struct sm6_metadata_node *node, u if (!sm6_metadata_value_is_value(node->operands[0]) || !sm6_metadata_get_uint_value(sm6, node->operands[0], operand)) { - WARN("Failed to get operand value.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_ATTACHMENT, "Failed to get a metadata attachment operand value; ignoring the attachment."); return false; @@ -9198,7 +9315,6 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, dst = sm6_parser_get_current_value(sm6); fwd_type = dst->type; dst->type = NULL; - dst->is_back_ref = true; is_terminator = false; record = block->records[i]; @@ -9276,11 +9392,8 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, } if (dst->type && fwd_type && dst->type != fwd_type) - { - WARN("Type mismatch.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, "The type of a result value does not match the type defined by a forward reference."); - } sm6->value_count += !!dst->type; } @@ -9291,6 +9404,11 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, return VKD3D_ERROR_INVALID_SHADER; } + sm6_parser_apply_fixups(sm6, function); + + if (sm6->p.status < 0) + return sm6->p.status; + return VKD3D_OK; } @@ -9595,14 +9713,12 @@ static enum vkd3d_result metadata_value_create_node(struct sm6_metadata_value *m m->type = VKD3D_METADATA_NODE; if (!(m->value_type = sm6->metadata_type)) { - WARN("Metadata type not found.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_METADATA, "The type for metadata values was not found."); return VKD3D_ERROR_INVALID_SHADER; } if (!(node = vkd3d_malloc(offsetof(struct sm6_metadata_node, operands[record->operand_count])))) { - ERR("Failed to allocate metadata node with %u operands.\n", record->operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, "Out of memory allocating a metadata node with %u operands.", record->operand_count); return VKD3D_ERROR_OUT_OF_MEMORY; @@ -9620,7 +9736,6 @@ static enum vkd3d_result metadata_value_create_node(struct sm6_metadata_value *m ref = record->operands[i] - offset; if (record->operands[i] >= offset && ref >= end_count) { - WARN("Invalid metadata index %"PRIu64".\n", ref); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_METADATA, "Metadata index %"PRIu64" is invalid.", ref); vkd3d_free(node); @@ -9629,7 +9744,6 @@ static enum vkd3d_result metadata_value_create_node(struct sm6_metadata_value *m if (!node->is_distinct && ref == dst_idx) { - WARN("Metadata self-reference at index %u.\n", dst_idx); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_METADATA, "Metadata index %u is self-referencing.", dst_idx); vkd3d_free(node); @@ -9639,7 +9753,6 @@ static enum vkd3d_result metadata_value_create_node(struct sm6_metadata_value *m node->operands[i] = (record->operands[i] >= offset) ? &table->values[ref] : NULL; if (record->code == METADATA_NAMED_NODE && !sm6_metadata_value_is_node(node->operands[i])) { - WARN("Named node operand is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_METADATA, "The operand of a metadata named node is not a node."); vkd3d_free(node); @@ -9667,7 +9780,6 @@ static enum vkd3d_result sm6_parser_metadata_init(struct sm6_parser *sm6, const if (!(values = vkd3d_calloc(count, sizeof(*values)))) { - ERR("Failed to allocate metadata tables.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, "Out of memory allocating metadata tables."); return VKD3D_ERROR_OUT_OF_MEMORY; @@ -9686,7 +9798,6 @@ static enum vkd3d_result sm6_parser_metadata_init(struct sm6_parser *sm6, const case METADATA_NAMED_NODE: if (!name) { - WARN("Named node has no name.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_METADATA, "A metadata named node has no name."); return VKD3D_ERROR_INVALID_SHADER; @@ -9727,7 +9838,6 @@ static enum vkd3d_result sm6_parser_metadata_init(struct sm6_parser *sm6, const /* Check the next record to avoid freeing 'name' in all exit paths. */ if (i + 1 == block->record_count || block->records[i + 1]->code != METADATA_NAMED_NODE) { - WARN("Name is not followed by a named node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_METADATA, "A metadata node name is not followed by a named node."); return VKD3D_ERROR_INVALID_SHADER; @@ -9767,7 +9877,6 @@ static enum vkd3d_result sm6_parser_metadata_init(struct sm6_parser *sm6, const if (!sm6_value_is_constant(value) && !sm6_value_is_undef(value) && !sm6_value_is_constant_array(value) && !sm6_value_is_function_dcl(value)) { - WARN("Value at index %u is not a constant or a function declaration.\n", value_idx); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_METADATA, "Metadata value at index %u is not a constant or a function declaration.", value_idx); return VKD3D_ERROR_INVALID_SHADER; @@ -9775,16 +9884,12 @@ static enum vkd3d_result sm6_parser_metadata_init(struct sm6_parser *sm6, const m->u.value = value; if (value->type != m->value_type) - { - WARN("Type mismatch.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, "The type of a metadata value does not match its referenced value at index %u.", value_idx); - } break; default: - FIXME("Unhandled metadata type %u.\n", record->code); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_METADATA, "Metadata type %u is unhandled.", record->code); return VKD3D_ERROR_INVALID_SHADER; @@ -9869,6 +9974,7 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] = [SEMANTIC_KIND_DEPTH] = VKD3D_SHADER_SV_DEPTH, [SEMANTIC_KIND_DEPTHLESSEQUAL] = VKD3D_SHADER_SV_DEPTH_LESS_EQUAL, [SEMANTIC_KIND_DEPTHGREATEREQUAL] = VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL, + [SEMANTIC_KIND_STENCILREF] = VKD3D_SHADER_SV_STENCIL_REF, }; static enum vkd3d_shader_sysval_semantic sysval_semantic_from_dxil_semantic_kind(enum dxil_semantic_kind kind, @@ -9928,11 +10034,8 @@ static const struct sm6_metadata_value *sm6_parser_find_named_metadata(struct sm if (!node->operand_count) return NULL; if (node->operand_count > 1) - { - FIXME("Ignoring %u extra operands for %s.\n", node->operand_count - 1, name); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring %u extra operands for metadata node %s.", node->operand_count - 1, name); - } return node->operands[0]; } @@ -9940,7 +10043,7 @@ static const struct sm6_metadata_value *sm6_parser_find_named_metadata(struct sm } static bool sm6_parser_resources_load_register_range(struct sm6_parser *sm6, - const struct sm6_metadata_node *node, struct vkd3d_shader_register_range *range) + const struct sm6_metadata_node *node, struct vsir_register_range *range) { unsigned int size; @@ -9950,11 +10053,8 @@ static bool sm6_parser_resources_load_register_range(struct sm6_parser *sm6, return false; } if (!sm6_type_is_pointer(node->operands[1]->value_type)) - { - WARN("Resource type is not a pointer.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, "Resource metadata value type is not a pointer."); - } if (!sm6_metadata_get_uint_value(sm6, node->operands[3], &range->space)) { @@ -10034,11 +10134,8 @@ static bool resources_load_additional_values(struct resource_additional_values * info->byte_stride = 0; if (node->operand_count & 1) - { - WARN("Operand count is not even.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Operand count for resource descriptor tag/value pairs is not even."); - } operand_count = node->operand_count & ~1u; for (i = 0; i < operand_count; i += 2) @@ -10046,7 +10143,6 @@ static bool resources_load_additional_values(struct resource_additional_values * if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &tag) || !sm6_metadata_get_uint_value(sm6, node->operands[i + 1], &value)) { - WARN("Failed to load tag/value pair at index %u.\n", i); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "Resource descriptor tag/value pair at index %u is not an integer pair.", i); return false; @@ -10057,7 +10153,6 @@ static bool resources_load_additional_values(struct resource_additional_values * case RESOURCE_TAG_ELEMENT_TYPE: if (value && kind != RESOURCE_KIND_TYPEDBUFFER && !resource_kind_is_texture(kind)) { - WARN("Invalid type %u for an untyped resource.\n", value); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "An untyped resource has type %u.", value); return false; @@ -10068,7 +10163,6 @@ static bool resources_load_additional_values(struct resource_additional_values * case RESOURCE_TAG_ELEMENT_STRIDE: if (value && kind != RESOURCE_KIND_STRUCTUREDBUFFER) { - WARN("Invalid stride %u for an unstructured resource.\n", value); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "An unstructured resource has a byte stride."); return false; @@ -10078,22 +10172,17 @@ static bool resources_load_additional_values(struct resource_additional_values * case RESOURCE_TAG_SAMPLER_FEEDBACK_KIND: /* MinMip = 0, MipRegionUsed = 1 */ - FIXME("Unhandled sampler feedback kind %u.\n", value); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "Sampler feedback kind %u is unhandled.", value); break; case RESOURCE_TAG_ENABLE_ATOMIC_64: if (value) - { - FIXME("Unsupported 64-bit atomic ops.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "64-bit atomic ops on resources are not supported."); - } break; default: - FIXME("Unhandled tag %u, value %u.\n", tag, value); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Tag %u for resource descriptor additional value %u is unhandled.", tag, value); break; @@ -10113,7 +10202,6 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc if (!(resource_type = shader_resource_type_from_dxil_resource_kind(kind))) { - FIXME("Unhandled resource kind %u.\n", kind); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "Resource kind %u is unhandled.", kind); return NULL; @@ -10129,7 +10217,6 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc if (!sm6_metadata_value_is_node(m)) { - WARN("Resource metadata list is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "Resource descriptor metadata list is not a node."); return NULL; @@ -10141,10 +10228,8 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc if (kind == RESOURCE_KIND_TYPEDBUFFER || resource_kind_is_texture(kind)) { if (resource_values.data_type == VSIR_DATA_UNUSED) - { vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "A typed resource has no data type."); - } ins->opcode = is_uav ? VSIR_OP_DCL_UAV_TYPED : VSIR_OP_DCL; for (i = 0; i < VKD3D_VEC4_SIZE; ++i) @@ -10169,18 +10254,14 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc /* TODO: 16-bit resources. */ if (ins->declaration.structured_resource.byte_stride % 4u) - { - WARN("Byte stride %u is not a multiple of 4.\n", ins->declaration.structured_resource.byte_stride); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "Structured resource byte stride %u is not a multiple of 4.", ins->declaration.structured_resource.byte_stride); - } return &ins->declaration.structured_resource.resource; } else { - FIXME("Unhandled resource kind %u.\n", kind); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "Resource kind %u is unhandled.", kind); } @@ -10190,13 +10271,13 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc static void init_resource_declaration(struct vkd3d_shader_resource *resource, enum vkd3d_shader_register_type reg_type, enum vsir_data_type data_type, - unsigned int id, const struct vkd3d_shader_register_range *range) + unsigned int id, const struct vsir_register_range *range) { struct vsir_dst_operand *param = &resource->reg; param->modifiers = 0; param->shift = 0; - vsir_register_init(¶m->reg, reg_type, data_type, 3); + vsir_operand_init(¶m->reg, reg_type, data_type, 3); param->reg.idx[0].offset = id; param->reg.idx[1].offset = range->first; param->reg.idx[2].offset = range->last; @@ -10212,21 +10293,16 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, if (node->operand_count < 9) { - WARN("Invalid operand count %u.\n", node->operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, "Invalid operand count %u for an SRV descriptor.", node->operand_count); return VKD3D_ERROR_INVALID_SHADER; } if (node->operand_count > 9) - { - WARN("Ignoring %u extra operands.\n", node->operand_count - 9); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring %u extra operands for an SRV descriptor.", node->operand_count - 9); - } if (!sm6_metadata_get_uint_value(sm6, node->operands[6], &kind)) { - WARN("Failed to load resource type.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "SRV resource type metadata value is not an integer."); return VKD3D_ERROR_INVALID_SHADER; @@ -10250,7 +10326,6 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, { if (!sm6_metadata_get_uint_value(sm6, node->operands[7], &ins->declaration.semantic.sample_count)) { - WARN("Failed to load sample count.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "SRV sample count metadata value is not an integer."); return VKD3D_ERROR_INVALID_SHADER; @@ -10258,7 +10333,6 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, } else if (!sm6_metadata_value_is_zero_or_undef(node->operands[7])) { - WARN("Ignoring sample count value.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring an SRV descriptor sample count metadata value which is not constant zero or undefined."); } @@ -10274,23 +10348,18 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6, if (node->operand_count < 11) { - WARN("Invalid operand count %u.\n", node->operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, "Invalid operand count %u for a UAV descriptor.", node->operand_count); return VKD3D_ERROR_INVALID_SHADER; } if (node->operand_count > 11) - { - WARN("Ignoring %u extra operands.\n", node->operand_count - 11); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring %u extra operands for a UAV descriptor.", node->operand_count - 11); - } for (i = 6; i < 10; ++i) { if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &values[i - 6])) { - WARN("Failed to load uint value at index %u.\n", i); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "A UAV descriptor operand metadata value is not an integer."); return VKD3D_ERROR_INVALID_SHADER; @@ -10323,26 +10392,21 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6, static enum vkd3d_result sm6_parser_resources_load_cbv(struct sm6_parser *sm6, const struct sm6_metadata_node *node, struct sm6_descriptor_info *d, struct vkd3d_shader_instruction *ins) { - struct vkd3d_shader_register *reg; + struct vsir_operand *reg; unsigned int buffer_size; if (node->operand_count < 7) { - WARN("Invalid operand count %u.\n", node->operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, "Invalid operand count %u for a CBV descriptor.", node->operand_count); return VKD3D_ERROR_INVALID_SHADER; } if (node->operand_count > 7 && node->operands[7]) - { - WARN("Ignoring %u extra operands.\n", node->operand_count - 7); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring %u extra operands for a CBV descriptor.", node->operand_count - 7); - } if (!sm6_metadata_get_uint_value(sm6, node->operands[6], &buffer_size)) { - WARN("Failed to load buffer size.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "Constant buffer size metadata value is not an integer."); return VKD3D_ERROR_INVALID_SHADER; @@ -10355,7 +10419,7 @@ static enum vkd3d_result sm6_parser_resources_load_cbv(struct sm6_parser *sm6, ins->declaration.cb.src.modifiers = VKD3DSPSM_NONE; reg = &ins->declaration.cb.src.reg; - vsir_register_init(reg, VKD3DSPR_CONSTBUFFER, VSIR_DATA_F32, 3); + vsir_operand_init(reg, VKD3DSPR_CONSTBUFFER, VSIR_DATA_F32, 3); reg->idx[0].offset = d->id; reg->idx[1].offset = d->range.first; reg->idx[2].offset = d->range.last; @@ -10370,29 +10434,24 @@ static enum vkd3d_result sm6_parser_resources_load_cbv(struct sm6_parser *sm6, static enum vkd3d_result sm6_parser_resources_load_sampler(struct sm6_parser *sm6, const struct sm6_metadata_node *node, struct sm6_descriptor_info *d, struct vkd3d_shader_instruction *ins) { - struct vkd3d_shader_register *reg; + struct vsir_operand *reg; unsigned int kind; if (node->operand_count < 7) { - WARN("Invalid operand count %u.\n", node->operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, "Invalid operand count %u for a sampler descriptor.", node->operand_count); return VKD3D_ERROR_INVALID_SHADER; } if (node->operand_count > 7 && node->operands[7]) - { - WARN("Ignoring %u extra operands.\n", node->operand_count - 7); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring %u extra operands for a sampler descriptor.", node->operand_count - 7); - } vsir_instruction_init(ins, &sm6->p.location, VSIR_OP_DCL_SAMPLER); ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; if (!sm6_metadata_get_uint_value(sm6, node->operands[6], &kind)) { - WARN("Failed to load sampler mode.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "Sampler mode metadata value is not an integer."); return VKD3D_ERROR_INVALID_SHADER; @@ -10405,7 +10464,6 @@ static enum vkd3d_result sm6_parser_resources_load_sampler(struct sm6_parser *sm ins->flags = VKD3DSI_SAMPLER_COMPARISON_MODE; break; default: - FIXME("Ignoring sampler kind %u.\n", kind); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring sampler kind %u.", kind); break; @@ -10415,7 +10473,7 @@ static enum vkd3d_result sm6_parser_resources_load_sampler(struct sm6_parser *sm ins->declaration.sampler.src.modifiers = VKD3DSPSM_NONE; reg = &ins->declaration.sampler.src.reg; - vsir_register_init(reg, VKD3DSPR_SAMPLER, VSIR_DATA_UNUSED, 3); + vsir_operand_init(reg, VKD3DSPR_SAMPLER, VSIR_DATA_UNUSED, 3); reg->idx[0].offset = d->id; reg->idx[1].offset = d->range.first; reg->idx[2].offset = d->range.last; @@ -10443,7 +10501,6 @@ static enum vkd3d_result sm6_parser_descriptor_type_init(struct sm6_parser *sm6, m = descriptor_node->operands[i]; if (!sm6_metadata_value_is_node(m)) { - WARN("Resource descriptor is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "Resource descriptor is not a metadata node."); return VKD3D_ERROR_INVALID_SHADER; @@ -10452,7 +10509,6 @@ static enum vkd3d_result sm6_parser_descriptor_type_init(struct sm6_parser *sm6, node = m->u.node; if (node->operand_count < 6) { - WARN("Invalid operand count %u.\n", node->operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, "Invalid operand count %u for a descriptor.", node->operand_count); return VKD3D_ERROR_INVALID_SHADER; @@ -10461,7 +10517,6 @@ static enum vkd3d_result sm6_parser_descriptor_type_init(struct sm6_parser *sm6, if (!vkd3d_array_reserve((void **)&sm6->descriptors, &sm6->descriptor_capacity, sm6->descriptor_count + 1, sizeof(*sm6->descriptors))) { - ERR("Failed to allocate descriptor array.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, "Out of memory allocating the descriptor array."); return VKD3D_ERROR_OUT_OF_MEMORY; @@ -10471,7 +10526,6 @@ static enum vkd3d_result sm6_parser_descriptor_type_init(struct sm6_parser *sm6, if (!sm6_metadata_get_uint_value(sm6, node->operands[0], &d->id)) { - WARN("Failed to load resource id.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "Resource id metadata value is not an integer."); return VKD3D_ERROR_INVALID_SHADER; @@ -10534,7 +10588,6 @@ static enum vkd3d_result sm6_parser_resources_init(struct sm6_parser *sm6) node = m->u.node; if (node->operand_count != SHADER_DESCRIPTOR_TYPE_COUNT) { - WARN("Unexpected descriptor type count %u.\n", node->operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "Descriptor type count %u is invalid.", node->operand_count); return VKD3D_ERROR_INVALID_SHADER; @@ -10547,7 +10600,6 @@ static enum vkd3d_result sm6_parser_resources_init(struct sm6_parser *sm6) if (!sm6_metadata_value_is_node(m)) { - WARN("Resource list is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCES, "Resource list is not a metadata node."); return VKD3D_ERROR_INVALID_SHADER; @@ -10570,7 +10622,6 @@ static void signature_element_read_additional_element_values(struct signature_el if (!sm6_metadata_value_is_node(node->operands[10])) { - WARN("Additional values list is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element additional values list is not a metadata node."); return; @@ -10578,11 +10629,8 @@ static void signature_element_read_additional_element_values(struct signature_el node = node->operands[10]->u.node; if (node->operand_count & 1) - { - WARN("Operand count is not even.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Operand count for signature element additional tag/value pairs is not even."); - } operand_count = node->operand_count & ~1u; for (i = 0; i < operand_count; i += 2) @@ -10590,7 +10638,6 @@ static void signature_element_read_additional_element_values(struct signature_el if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &tag) || !sm6_metadata_get_uint_value(sm6, node->operands[i + 1], &value)) { - WARN("Failed to extract tag/value pair.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element tag/value pair at index %u is not an integer pair.", i); continue; @@ -10604,16 +10651,12 @@ static void signature_element_read_additional_element_values(struct signature_el case ADDITIONAL_TAG_RELADDR_MASK: /* A mask of components accessed via relative addressing. Seems to replace TPF 'dcl_index_range'. */ if (value > VKD3DSP_WRITEMASK_ALL) - { - WARN("Invalid relative addressed mask %#x.\n", value); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_MASK, "Mask %#x of relative-addressed components is invalid.", value); - } break; case ADDITIONAL_TAG_USED_MASK: if (value > VKD3DSP_WRITEMASK_ALL) { - WARN("Invalid used mask %#x.\n", value); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_MASK, "Mask %#x of used components is invalid.", value); value &= VKD3DSP_WRITEMASK_ALL; @@ -10621,7 +10664,6 @@ static void signature_element_read_additional_element_values(struct signature_el e->used_mask = value; break; default: - FIXME("Unhandled tag %u, value %u.\n", tag, value); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Tag %#x for signature element additional value %#x is unhandled.", tag, value); break; @@ -10645,7 +10687,6 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const if (!sm6_metadata_value_is_node(m)) { - WARN("Signature element list is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element list is not a metadata node."); return VKD3D_ERROR_INVALID_SHADER; @@ -10656,20 +10697,18 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const if (!(elements = vkd3d_calloc(operand_count, sizeof(*elements)))) { - ERR("Failed to allocate %u signature elements.\n", operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, "Out of memory allocating %u signature elements.", operand_count); return VKD3D_ERROR_OUT_OF_MEMORY; } - native_16bit = program->global_flags & VKD3DSGF_FORCE_NATIVE_LOW_PRECISION; + native_16bit = program->global_flags & VKD3DSGF_ENABLE_NATIVE_LOW_PRECISION; for (i = 0; i < operand_count; ++i) { m = node->operands[i]; if (!sm6_metadata_value_is_node(m)) { - WARN("Signature element is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element is not a metadata node."); goto invalid; @@ -10678,17 +10717,13 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const element_node = m->u.node; if (element_node->operand_count < 10) { - WARN("Invalid operand count %u.\n", element_node->operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Invalid signature element operand count %u.", element_node->operand_count); goto invalid; } if (element_node->operand_count > 11) - { - WARN("Ignoring %u extra operands.\n", element_node->operand_count - 11); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring %u extra operands for a signature element.", element_node->operand_count - 11); - } for (j = 0; j < 10; ++j) { @@ -10697,7 +10732,6 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const continue; if (!sm6_metadata_get_uint_value(sm6, element_node->operands[j], &values[j])) { - WARN("Failed to load uint value at index %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element value at index %u is not an integer.", j); goto invalid; @@ -10708,7 +10742,6 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const if (values[0] != i) { - FIXME("Unsupported element id %u not equal to its index %u.\n", values[0], i); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "A non-sequential and non-zero-based element id is not supported."); goto invalid; @@ -10716,7 +10749,6 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const if (!sm6_metadata_value_is_string(element_node->operands[1])) { - WARN("Element name is not a string.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element name is not a metadata string."); goto invalid; @@ -10730,7 +10762,6 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const e->sysval_semantic = sysval_semantic_from_dxil_semantic_kind(j, tessellator_domain); if (j != SEMANTIC_KIND_ARBITRARY && j != SEMANTIC_KIND_TARGET && e->sysval_semantic == VKD3D_SHADER_SV_NONE) { - WARN("Unhandled semantic kind %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "DXIL semantic kind %u is unhandled.", j); goto invalid; @@ -10738,7 +10769,6 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const if ((e->interpolation_mode = values[5]) >= VKD3DSIM_COUNT) { - WARN("Unhandled interpolation mode %u.\n", e->interpolation_mode); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Interpolation mode %u is unhandled.", e->interpolation_mode); goto invalid; @@ -10755,7 +10785,6 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const if (register_type_from_dxil_semantic_kind(e->sysval_semantic, is_input, &dimension) == VKD3DSPR_INVALID) { - WARN("Unhandled I/O register semantic kind %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "DXIL semantic kind %u is unhandled for an I/O register.", j); goto invalid; @@ -10763,7 +10792,6 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const } else if (e->register_index > MAX_REG_OUTPUT || e->register_count > MAX_REG_OUTPUT - e->register_index) { - WARN("Invalid row start %u with row count %u.\n", e->register_index, e->register_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "A signature element starting row of %u with count %u is invalid.", e->register_index, e->register_count); @@ -10773,7 +10801,6 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const index = values[9]; if (index != UINT8_MAX && (index >= VKD3D_VEC4_SIZE || column_count > VKD3D_VEC4_SIZE - index)) { - WARN("Invalid column start %u with count %u.\n", index, column_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "A signature element starting column %u with count %u is invalid.", index, column_count); goto invalid; @@ -10796,7 +10823,6 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const m = element_node->operands[4]; if (!sm6_metadata_value_is_node(m)) { - WARN("Semantic index list is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element semantic index list is not a metadata node."); goto invalid; @@ -10807,7 +10833,6 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const { if (!sm6_metadata_get_uint_value(sm6, element_node->operands[j], &index)) { - WARN("Failed to get semantic index for row %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element semantic index for row %u is not an integer.", j); } @@ -10817,7 +10842,6 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const } else if (index != e->semantic_index + j) { - WARN("Semantic index %u for row %u is not of an incrementing sequence.\n", index, j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element semantic index %u for row %u is not of an incrementing sequence.", index, j); } @@ -10858,7 +10882,6 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons if (!sm6_metadata_value_is_node(m)) { - WARN("Signature table is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature table is not a metadata node."); return VKD3D_ERROR_INVALID_SHADER; @@ -10876,14 +10899,6 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons &program->patch_constant_signature, tessellator_domain, false)) < 0) return ret; - if (sm6->program->shader_version.type == VKD3D_SHADER_TYPE_HULL - && !(sm6->outpointid_param = vsir_program_create_outpointid_param(sm6->program))) - { - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, - "Failed to allocate outpointid parameter."); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - if ((ret = sm6_parser_init_input_signature(sm6, &program->input_signature)) < 0) return ret; @@ -10903,7 +10918,6 @@ static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm if (!sm6_metadata_get_uint64_value(sm6, m, (uint64_t*)&global_flags)) { - WARN("Failed to load global flags.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Global flags metadata value is not an integer."); return; @@ -10930,7 +10944,6 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co if (version->type != VKD3D_SHADER_TYPE_COMPUTE) { - WARN("Shader of type %#x has thread group dimensions.\n", version->type); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Shader has thread group dimensions but is not a compute shader."); return VKD3D_ERROR_INVALID_SHADER; @@ -10938,7 +10951,6 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co if (!m || !sm6_metadata_value_is_node(m)) { - WARN("Thread group dimension value is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Thread group dimension metadata value is not a node."); return VKD3D_ERROR_INVALID_SHADER; @@ -10947,7 +10959,6 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co node = m->u.node; if (node->operand_count != 3) { - WARN("Invalid operand count %u.\n", node->operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, "Thread group dimension operand count %u is invalid.", node->operand_count); return VKD3D_ERROR_INVALID_SHADER; @@ -10957,7 +10968,6 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co { if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &group_sizes[i])) { - WARN("Thread group dimension is not an integer value.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Thread group dimension metadata value is not an integer."); return VKD3D_ERROR_INVALID_SHADER; @@ -10965,7 +10975,7 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co if (!group_sizes[i] || group_sizes[i] > dx_max_thread_group_size[i]) { char dim = "XYZ"[i]; - WARN("Invalid thread group %c dimension %u.\n", dim, group_sizes[i]); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Thread group %c dimension %u is invalid.", dim, group_sizes[i]); return VKD3D_ERROR_INVALID_SHADER; @@ -11009,11 +11019,8 @@ static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, struct vkd3d_shader_instruction *ins; if (tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID || tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) - { - WARN("Unhandled domain %u.\n", tessellator_domain); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Domain shader tessellator domain %u is unhandled.", tessellator_domain); - } if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_TESSELLATOR_DOMAIN))) return; @@ -11025,11 +11032,8 @@ static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, unsigned int count, bool allow_zero, const char *type) { if ((!count && !allow_zero) || count > 32) - { - WARN("%s control point count %u invalid.\n", type, count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "%s control point count %u is invalid.", type, count); - } } static void sm6_parser_emit_dcl_tessellator_partitioning(struct sm6_parser *sm6, @@ -11038,11 +11042,8 @@ static void sm6_parser_emit_dcl_tessellator_partitioning(struct sm6_parser *sm6, struct vkd3d_shader_instruction *ins; if (!tessellator_partitioning || tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) - { - WARN("Unhandled partitioning %u.\n", tessellator_partitioning); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Hull shader tessellator partitioning %u is unhandled.", tessellator_partitioning); - } if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_TESSELLATOR_PARTITIONING))) return; @@ -11057,11 +11058,8 @@ static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser * struct vkd3d_shader_instruction *ins; if (!primitive || primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) - { - WARN("Unhandled output primitive %u.\n", primitive); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Hull shader tessellator output primitive %u is unhandled.", primitive); - } if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE))) return; @@ -11077,7 +11075,6 @@ static void sm6_parser_emit_dcl_max_tessellation_factor(struct sm6_parser *sm6, if (!sm6_metadata_get_float_value(sm6, m, &max_tessellation_factor)) { - WARN("Max tess factor property is not a float value.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Hull shader max tessellation factor property operand is not a float."); return; @@ -11085,11 +11082,8 @@ static void sm6_parser_emit_dcl_max_tessellation_factor(struct sm6_parser *sm6, /* Exclude non-finite values. */ if (!(max_tessellation_factor >= 1.0f && max_tessellation_factor <= 64.0f)) - { - WARN("Invalid max tess factor %f.\n", max_tessellation_factor); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Hull shader max tessellation factor %f is invalid.", max_tessellation_factor); - } if (!(ins = sm6_parser_add_instruction(sm6, VSIR_OP_DCL_HS_MAX_TESSFACTOR))) return; @@ -11105,7 +11099,6 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s if (!m || !sm6_metadata_value_is_node(m)) { - WARN("Missing or invalid GS properties.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Geometry shader properties node is missing or invalid."); return; @@ -11114,27 +11107,20 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s node = m->u.node; if (node->operand_count < ARRAY_SIZE(operands)) { - WARN("Invalid operand count %u.\n", node->operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, "Geometry shader properties operand count %u is invalid.", node->operand_count); return; } if (node->operand_count > ARRAY_SIZE(operands)) - { - WARN("Ignoring %zu extra operands.\n", node->operand_count - ARRAY_SIZE(operands)); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring %zu extra operands for geometry shader properties.", node->operand_count - ARRAY_SIZE(operands)); - } for (i = 0; i < node->operand_count; ++i) { if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) - { - WARN("GS property at index %u is not a uint value.\n", i); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Geometry shader properties operand at index %u is not an integer.", i); - } } switch (i = operands[0]) @@ -11173,7 +11159,6 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s break; } - WARN("Unhandled input primitive %u.\n", i); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Geometry shader input primitive %u is unhandled.", i); break; @@ -11186,25 +11171,18 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s i = operands[1]; /* Max total scalar count sets an upper limit. We would need to scan outputs to be more precise. */ if (i > MAX_GS_OUTPUT_TOTAL_SCALARS) - { - WARN("GS output vertex count %u invalid.\n", i); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Geometry shader output vertex count %u is invalid.", i); - } sm6_parser_emit_dcl_count(sm6, VSIR_OP_DCL_VERTICES_OUT, i); sm6->program->vertices_out_count = i; if (operands[2] > 1) - { - FIXME("Unhandled stream mask %#x.\n", operands[2]); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Geometry shader stream mask %#x is unhandled.", operands[2]); - } output_primitive = operands[3]; if (output_primitive == VKD3D_PT_UNDEFINED || output_primitive >= VKD3D_PT_COUNT) { - WARN("Unhandled output primitive %u.\n", output_primitive); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Geometry shader output primitive %u is unhandled.", output_primitive); output_primitive = VKD3D_PT_TRIANGLELIST; @@ -11214,11 +11192,8 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s i = operands[4]; if (!i || i > MAX_GS_INSTANCE_COUNT) - { - WARN("GS instance count %u invalid.\n", i); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Geometry shader instance count %u is invalid.", i); - } sm6_parser_emit_dcl_count(sm6, VSIR_OP_DCL_GS_INSTANCES, i); } @@ -11231,7 +11206,6 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa if (!m || !sm6_metadata_value_is_node(m)) { - WARN("Missing or invalid DS properties.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Domain shader properties node is missing or invalid."); return 0; @@ -11240,27 +11214,20 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa node = m->u.node; if (node->operand_count < ARRAY_SIZE(operands)) { - WARN("Invalid operand count %u.\n", node->operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, "Domain shader properties operand count %u is invalid.", node->operand_count); return 0; } if (node->operand_count > ARRAY_SIZE(operands)) - { - WARN("Ignoring %zu extra operands.\n", node->operand_count - ARRAY_SIZE(operands)); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring %zu extra operands for domain shader properties.", node->operand_count - ARRAY_SIZE(operands)); - } for (i = 0; i < node->operand_count; ++i) { if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) - { - WARN("DS property at index %u is not a uint value.\n", i); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Domain shader properties operand at index %u is not an integer.", i); - } } sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); @@ -11280,7 +11247,6 @@ static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_pa if (!m || !sm6_metadata_value_is_node(m)) { - WARN("Missing or invalid HS properties.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Hull shader properties node is missing or invalid."); return 0; @@ -11289,38 +11255,26 @@ static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_pa node = m->u.node; if (node->operand_count < 7) { - WARN("Invalid operand count %u.\n", node->operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, "Hull shader properties operand count %u is invalid.", node->operand_count); return 0; } if (node->operand_count > 7) - { - WARN("Ignoring %u extra operands.\n", node->operand_count - 7); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Ignoring %u extra operands for hull shader properties.", node->operand_count - 7); - } m = node->operands[0]; if (!sm6_metadata_value_is_value(m) || !sm6_value_is_function_dcl(m->u.value)) - { - WARN("Patch constant function node is not a function value.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Hull shader patch constant function node is not a function value."); - } else - { sm6->patch_constant_function = m->u.value->u.function.name; - } for (i = 1; i < min(node->operand_count, ARRAY_SIZE(operands)); ++i) { if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) - { - WARN("HS property at index %u is not a uint value.\n", i); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Hull shader properties operand at index %u is not an integer.", i); - } } sm6_parser_validate_control_point_count(sm6, operands[1], false, "Hull shader input"); @@ -11347,7 +11301,6 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) if (!entry_node || entry_node->operand_count < 2 || !(m = entry_node->operands[0])) { - WARN("No entry point definition found.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_ENTRY_POINT, "No entry point definition found in the metadata."); return VKD3D_ERROR_INVALID_SHADER; @@ -11355,7 +11308,6 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) if (m->type != VKD3D_METADATA_VALUE) { - WARN("Entry point definition is not a value.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_ENTRY_POINT, "Entry point definition is not a metadata value."); return VKD3D_ERROR_INVALID_SHADER; @@ -11364,7 +11316,6 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) value = m->u.value; if (!sm6_value_is_function_dcl(value)) { - WARN("Entry point value is not a function definition.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_ENTRY_POINT, "Entry point metadata value does not contain a function definition."); return VKD3D_ERROR_INVALID_SHADER; @@ -11373,17 +11324,13 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) sm6->entry_point = value->u.function.name; if (!sm6_metadata_value_is_string(entry_node->operands[1]) || strcmp(sm6->entry_point, entry_node->operands[1]->u.string_value)) - { - WARN("Entry point function name %s mismatch.\n", sm6->entry_point); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_ENTRY_POINT_MISMATCH, "Entry point function name %s does not match the name in metadata.", sm6->entry_point); - } if (entry_node->operand_count >= 5 && (m = entry_node->operands[4])) { if (!sm6_metadata_value_is_node(m)) { - WARN("Shader properties list is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Shader properties tag/value list is not a metadata node."); return VKD3D_ERROR_INVALID_SHADER; @@ -11391,18 +11338,14 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) node = m->u.node; if (node->operand_count & 1) - { - WARN("Operand count is not even.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, "Operand count for shader properties tag/value pairs is not even."); - } operand_count = node->operand_count & ~1u; for (i = 0; i < operand_count; i += 2) { if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &tag)) { - WARN("Tag is not an integer value.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Shader properties tag at index %u is not an integer.", i); return VKD3D_ERROR_INVALID_SHADER; @@ -11427,7 +11370,6 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) return ret; break; default: - FIXME("Unhandled tag %#x.\n", tag); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, "Shader properties tag %#x is unhandled.", tag); break; @@ -11552,8 +11494,14 @@ static void sm6_parser_cleanup_attribute_groups(struct sm6_parser *dxil) vkd3d_free(dxil->attribute_groups); } +static void dxil_cleanup_fixups(struct sm6_parser *dxil) +{ + vkd3d_free(dxil->fixups); +} + static void sm6_parser_cleanup(struct sm6_parser *sm6) { + dxil_cleanup_fixups(sm6); dxil_block_destroy(&sm6->root_block); dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); sm6_type_table_cleanup(sm6->types, sm6->type_count); @@ -11566,7 +11514,7 @@ static void sm6_parser_cleanup(struct sm6_parser *sm6) vkd3d_free(sm6->values); } -static enum vsir_denorm_mode sm6_function_get_denorm_mode(const struct sm6_function *function, +static enum vkd3d_shader_denormal_mode sm6_function_get_denorm_mode(const struct sm6_function *function, struct sm6_parser *dxil) { unsigned int attribs_id = function->declaration->u.function.attribs_id; @@ -11574,13 +11522,13 @@ static enum vsir_denorm_mode sm6_function_get_denorm_mode(const struct sm6_funct size_t i, j, k; if (!attribs_id) - return VSIR_DENORM_FLUSH_TO_ZERO; + return VKD3D_SHADER_DENORMAL_MODE_FLUSH_TO_ZERO; if (attribs_id > dxil->parameter_attribute_count) { vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_INVALID_ATTRIBUTE, "Invalid attribute id %u.", attribs_id); - return VSIR_DENORM_FLUSH_TO_ZERO; + return VKD3D_SHADER_DENORMAL_MODE_FLUSH_TO_ZERO; } parameter_attribute = &dxil->parameter_attributes[attribs_id - 1]; @@ -11604,22 +11552,22 @@ static enum vsir_denorm_mode sm6_function_get_denorm_mode(const struct sm6_funct continue; if (!strcmp(attribute->value.string, "preserve")) - return VSIR_DENORM_PRESERVE; + return VKD3D_SHADER_DENORMAL_MODE_PRESERVE; if (!strcmp(attribute->value.string, "ftz")) - return VSIR_DENORM_FLUSH_TO_ZERO; + return VKD3D_SHADER_DENORMAL_MODE_FLUSH_TO_ZERO; if (!strcmp(attribute->value.string, "any")) - return VSIR_DENORM_ANY; + return VKD3D_SHADER_DENORMAL_MODE_ANY; vkd3d_shader_parser_error(&dxil->p, VKD3D_SHADER_ERROR_DXIL_INVALID_ATTRIBUTE, "Invalid value for attribute `fp32-denorm-mode'."); - return VSIR_DENORM_FLUSH_TO_ZERO; + return VKD3D_SHADER_DENORMAL_MODE_FLUSH_TO_ZERO; } } } - return VSIR_DENORM_FLUSH_TO_ZERO; + return VKD3D_SHADER_DENORMAL_MODE_FLUSH_TO_ZERO; } static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6, const char *name) @@ -11650,7 +11598,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro count = byte_code_size / sizeof(*byte_code); if (count < 6) { - WARN("Invalid data size %zu.\n", byte_code_size); vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE, "DXIL chunk size %zu is smaller than the DXIL header size.", byte_code_size); return VKD3D_ERROR_INVALID_SHADER; @@ -11663,7 +11610,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro if (token_count < 6 || count < token_count) { - WARN("Invalid token count %u (word count %zu).\n", token_count, count); vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, "DXIL chunk token count %#x is invalid (word count %zu).", token_count, count); return VKD3D_ERROR_INVALID_SHADER; @@ -11681,7 +11627,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro chunk_offset = byte_code[4]; if (chunk_offset < 16 || chunk_offset >= byte_code_size) { - WARN("Invalid bitcode chunk offset %#x (data size %zu).\n", chunk_offset, byte_code_size); vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET, "DXIL bitcode chunk has invalid offset %#x (data size %#zx).", chunk_offset, byte_code_size); return VKD3D_ERROR_INVALID_SHADER; @@ -11689,8 +11634,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro chunk_size = byte_code[5]; if (chunk_size > byte_code_size - chunk_offset) { - WARN("Invalid bitcode chunk size %#x (data size %zu, chunk offset %#x).\n", - chunk_size, byte_code_size, chunk_offset); vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, "DXIL bitcode chunk has invalid size %#x (data size %#zx, chunk offset %#x).", chunk_size, byte_code_size, chunk_offset); @@ -11699,27 +11642,20 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro sm6->start = (const uint32_t *)((const char*)&byte_code[2] + chunk_offset); if ((magic = sm6->start[0]) != BITCODE_MAGIC) - { - WARN("Unknown magic number 0x%08x.\n", magic); vkd3d_shader_warning(message_context, &location, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER, "DXIL bitcode chunk magic number 0x%08x is not the expected 0x%08x.", magic, BITCODE_MAGIC); - } sm6->end = &sm6->start[(chunk_size + sizeof(*sm6->start) - 1) / sizeof(*sm6->start)]; if ((version.type = version_token >> 16) >= VKD3D_SHADER_TYPE_COUNT) - { - FIXME("Unknown shader type %#x.\n", version.type); vkd3d_shader_warning(message_context, &location, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE, "Unknown shader type %#x.", version.type); - } version.major = VKD3D_SM6_VERSION_MAJOR(version_token); version.minor = VKD3D_SM6_VERSION_MINOR(version_token); if ((abbr = sm6->start[1] & 3) != ENTER_SUBBLOCK) { - WARN("Initial block abbreviation %u is not ENTER_SUBBLOCK.\n", abbr); vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, "DXIL bitcode chunk has invalid initial block abbreviation %u.", abbr); return VKD3D_ERROR_INVALID_SHADER; @@ -11773,16 +11709,12 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro length = sm6->ptr - sm6->start - block->start; if (length != block->length) - { - WARN("Invalid block length %zu; expected %u.\n", length, block->length); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH, "Root block ends with length %zu but indicated length is %u.", length, block->length); - } if (sm6->ptr != sm6->end) { size_t expected_length = sm6->end - sm6->start; length = sm6->ptr - sm6->start; - WARN("Invalid module length %zu; expected %zu.\n", length, expected_length); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH, "Module ends with length %zu but indicated length is %zu.", length, expected_length); } @@ -11816,7 +11748,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro function_count = dxil_block_compute_function_count(&sm6->root_block); if (!(sm6->functions = vkd3d_calloc(function_count, sizeof(*sm6->functions)))) { - ERR("Failed to allocate function array.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, "Out of memory allocating DXIL function array."); ret = VKD3D_ERROR_OUT_OF_MEMORY; @@ -11825,7 +11756,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro if (sm6_parser_compute_max_value_count(sm6, &sm6->root_block, 0) == SIZE_MAX) { - WARN("Value array count overflowed.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, "Overflow occurred in the DXIL module value count."); ret = VKD3D_ERROR_INVALID_SHADER; @@ -11833,7 +11763,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro } if (!(sm6->values = vkd3d_calloc(sm6->value_capacity, sizeof(*sm6->values)))) { - ERR("Failed to allocate value array.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, "Out of memory allocating DXIL value array."); ret = VKD3D_ERROR_OUT_OF_MEMORY; @@ -11863,7 +11792,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro if (j == ARRAY_SIZE(sm6->metadata_tables)) { - FIXME("Too many metadata tables.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_METADATA, "A metadata table count greater than %zu is unsupported.", ARRAY_SIZE(sm6->metadata_tables)); ret = VKD3D_ERROR_INVALID_SHADER; @@ -11895,18 +11823,17 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro if (!(fn = sm6_parser_get_function(sm6, sm6->entry_point))) { - WARN("Failed to find entry point %s.\n", sm6->entry_point); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_ENTRY_POINT, "The definition of the entry point function '%s' was not found.", sm6->entry_point); ret = VKD3D_ERROR_INVALID_SHADER; goto fail; } - program->f32_denorm_mode = sm6_function_get_denorm_mode(fn, sm6); + program->f32_denormal_mode = sm6_function_get_denorm_mode(fn, sm6); if (version.type == VKD3D_SHADER_TYPE_HULL) { - enum vsir_denorm_mode cp_denorm_mode; + enum vkd3d_shader_denormal_mode cp_denorm_mode; sm6_parser_add_instruction(sm6, VSIR_OP_HS_CONTROL_POINT_PHASE); @@ -11915,7 +11842,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro if (!(fn = sm6_parser_get_function(sm6, sm6->patch_constant_function))) { - WARN("Failed to find patch constant function '%s'.\n", sm6->patch_constant_function); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, "Failed to find the patch constant function '%s' for a hull shader.", sm6->patch_constant_function); @@ -11925,11 +11851,11 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro cp_denorm_mode = sm6_function_get_denorm_mode(fn, sm6); - if (sm6->p.status >= 0 && program->f32_denorm_mode != cp_denorm_mode) + if (sm6->p.status >= 0 && program->f32_denormal_mode != cp_denorm_mode) { vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_ATTRIBUTE, "Patch constant denorm mode %u doesn't match control point denorm mode %u.", - program->f32_denorm_mode, cp_denorm_mode); + program->f32_denormal_mode, cp_denorm_mode); } sm6_parser_add_instruction(sm6, VSIR_OP_HS_FORK_PHASE); @@ -11946,11 +11872,8 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro } if (sm6->function_count > expected_function_count) - { - FIXME("%zu unhandled functions.\n", sm6->function_count - expected_function_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, "%zu functions were not emitted.", sm6->function_count - expected_function_count); - } dxil_block_destroy(&sm6->root_block); diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index 1eb8d8d44b2..5140513b346 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -1303,6 +1303,15 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) case HLSL_CLASS_PIXEL_SHADER: return "PixelShader"; + case HLSL_CLASS_HULL_SHADER: + return "HullShader"; + + case HLSL_CLASS_DOMAIN_SHADER: + return "DomainShader"; + + case HLSL_CLASS_COMPUTE_SHADER: + return "ComputeShader"; + case HLSL_CLASS_STRING: return "String"; @@ -2417,7 +2426,10 @@ static uint32_t write_shader_blob(const struct hlsl_ir_compile *compile, struct ret = hlsl_emit_vsir(ctx, &compile_info, compile->decl, &compile->initializers, &program, &rdef); ctx->profile = profile; if (ret < 0) + { + vsir_program_cleanup(&program); return 0; + } ret = vsir_program_compile(&program, &rdef, vkd3d_shader_init_config_flags(), &compile_info, &source, ctx->message_context); @@ -3847,13 +3859,15 @@ static void fx_parser_skip(struct fx_parser *parser, size_t size) parser->ptr += size; } -static void VKD3D_PRINTF_FUNC(3, 4) fx_parser_error(struct fx_parser *parser, enum vkd3d_shader_error error, - const char *format, ...) +#define fx_parser_error(parser, error, ...) \ + fx_parser_error_(parser, error, __FUNCTION__, __VA_ARGS__) +static void VKD3D_PRINTF_FUNC(4, 5) fx_parser_error_(struct fx_parser *parser, enum vkd3d_shader_error error, + const char *function, const char *format, ...) { va_list args; va_start(args, format); - vkd3d_shader_verror(parser->message_context, NULL, error, format, args); + vkd3d_shader_verror(parser->message_context, NULL, error, function, format, args); va_end(args); parser->failed = true; @@ -5023,6 +5037,7 @@ fxlc_opcodes[] = { 0x216, "iadd" }, { 0x219, "imul" }, { 0x21a, "udiv" }, + { 0x21b, "umod" }, { 0x21d, "imin" }, { 0x21e, "imax" }, { 0x21f, "umin" }, @@ -5066,19 +5081,18 @@ struct fx_4_ctab_entry struct fxlc_arg { - uint32_t reg_type; - uint32_t address; - bool indexed; - struct + uint32_t index_count; + struct fxlc_register { - uint32_t reg_type; + uint32_t type; uint32_t address; - } index; + } registers[/* .index_count + 1 */]; }; struct fxlvm_code { - const uint32_t *ptr, *end; + const uint32_t *code; + size_t pos, size; bool failed; union @@ -5099,13 +5113,35 @@ struct fxlvm_code static uint32_t fxlvm_read_u32(struct fxlvm_code *code) { - if (code->end == code->ptr) + if (code->pos >= code->size) { code->failed = true; return 0; } - return *code->ptr++; + return code->code[code->pos++]; +} + +static bool fxlvm_skip(struct fxlvm_code *code, size_t count) +{ + if (code->size - code->pos < count) + { + code->pos = code->size; + code->failed = true; + return false; + } + + code->pos += count; + return true; +} + +static const struct fxlc_arg *fxlvm_read_argument(struct fxlvm_code *code) +{ + const void *ptr = &code->code[code->pos]; + size_t indices; + + indices = fxlvm_read_u32(code) + 1; + return fxlvm_skip(code, indices * 2) ? ptr : NULL; } static const uint32_t *find_d3dbc_section(const uint32_t *ptr, uint32_t count, uint32_t tag, uint32_t *size) @@ -5144,8 +5180,7 @@ static void fx_parse_print_swizzle(struct fx_parser *parser, const struct fxlvm_ vkd3d_string_buffer_printf(&parser->buffer, ".%.*s", comp_count, &comp[addr % 4]); } -static void fx_print_fxlc_register(struct fx_parser *parser, uint32_t reg_type, - uint32_t address, uint32_t index_type, uint32_t index_address, struct fxlvm_code *code) +static void fx_print_fxlc_register(struct fx_parser *parser, const struct fxlc_register *reg, struct fxlvm_code *code) { static const char *table_names[FX_FXLC_REG_MAX + 1] = { @@ -5155,12 +5190,12 @@ static void fx_print_fxlc_register(struct fx_parser *parser, uint32_t reg_type, [FX_FXLC_REG_OUTPUT] = "expr", [FX_FXLC_REG_TEMP] = "r", }; - uint32_t reg_index = address / 4; + uint32_t reg_index = reg->address / 4; if (parser->source_type == VKD3D_SHADER_SOURCE_TX - && (reg_type == FX_FXLC_REG_INPUT || reg_type == FX_FXLC_REG_OUTPUT)) + && (reg->type == FX_FXLC_REG_INPUT || reg->type == FX_FXLC_REG_OUTPUT)) { - if (reg_type == FX_FXLC_REG_INPUT) + if (reg->type == FX_FXLC_REG_INPUT) { if (reg_index == 0) vkd3d_string_buffer_printf(&parser->buffer, "vPos"); @@ -5174,67 +5209,29 @@ static void fx_print_fxlc_register(struct fx_parser *parser, uint32_t reg_type, } else { - vkd3d_string_buffer_printf(&parser->buffer, "%s%u", table_names[reg_type], reg_index); - } - if (index_type != FX_FXLC_REG_UNUSED) - { - vkd3d_string_buffer_printf(&parser->buffer, "[%s%u.%c]", table_names[index_type], - index_address / 4, "xyzw"[index_address % 4]); + vkd3d_string_buffer_printf(&parser->buffer, "%s%u", table_names[reg->type], reg_index); } - fx_parse_print_swizzle(parser, code, address); } -static void fx_parse_fxlc_constant_argument(struct fx_parser *parser, - const struct fxlc_arg *arg, const struct fxlvm_code *code) +static const struct fx_4_ctab_entry *find_register_entry(const struct fxlvm_code *code, uint32_t register_index) { - uint32_t register_index = arg->address / 4; /* Address counts in components. */ + if (!code->ctab) + return NULL; - if (code->ctab_count) + for (size_t i = 0; i < code->ctab_count; ++i) { - uint32_t i, offset; + const struct fx_4_ctab_entry *entry = &code->constants[i]; - for (i = 0; i < code->ctab_count; ++i) - { - const struct fx_4_ctab_entry *c = &code->constants[i]; - - if (register_index < c->register_index || register_index - c->register_index >= c->register_count) - continue; - - vkd3d_string_buffer_printf(&parser->buffer, "%s", &code->ctab[c->name]); - - /* Register offset within variable */ - offset = arg->address - c->register_index * 4; + if (register_index < entry->register_index) + continue; - if (offset / 4) - vkd3d_string_buffer_printf(&parser->buffer, "[%u]", offset / 4); - fx_parse_print_swizzle(parser, code, offset); - return; - } + if (register_index >= entry->register_index + entry->register_count) + continue; - vkd3d_string_buffer_printf(&parser->buffer, "(var-not-found)"); + return entry; } - else - { - vkd3d_string_buffer_printf(&parser->buffer, "c%u", register_index); - fx_parse_print_swizzle(parser, code, arg->address); - } -} - -static void fx_parse_fxlc_argument(struct fx_parser *parser, struct fxlc_arg *arg, struct fxlvm_code *code) -{ - uint32_t flags; - memset(arg, 0, sizeof(*arg)); - - flags = fxlvm_read_u32(code); - if (flags) - { - arg->indexed = true; - arg->index.reg_type = fxlvm_read_u32(code); - arg->index.address = fxlvm_read_u32(code); - } - arg->reg_type = fxlvm_read_u32(code); - arg->address = fxlvm_read_u32(code); + return NULL; } static void fx_print_fxlc_literal(struct fx_parser *parser, uint32_t address, struct fxlvm_code *code) @@ -5247,69 +5244,126 @@ static void fx_print_fxlc_literal(struct fx_parser *parser, uint32_t address, st static void fx_print_fxlc_argument(struct fx_parser *parser, const struct fxlc_arg *arg, struct fxlvm_code *code) { - uint32_t count; + bool pending_brace = false; + unsigned index_level = 0; - if (arg->reg_type > FX_FXLC_REG_MAX) + for (size_t i = 0; i <= arg->index_count; ++i) { - fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, - "Unexpected register type %u.", arg->reg_type); - return; - } + /* register index chain goes outside in, so start at the end. */ + const struct fxlc_register *reg = &arg->registers[arg->index_count - i]; - if (arg->index.reg_type > FX_FXLC_REG_MAX) - { - fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, - "Unexpected index register type %u.", arg->index.reg_type); - return; - } + if (reg->type > FX_FXLC_REG_MAX) + { + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, + "Unexpected register type %u.", reg->type); + return; + } - if (arg->indexed) - { - fx_print_fxlc_register(parser, arg->reg_type, arg->address, arg->index.reg_type, - arg->index.address, code); - return; - } + if (index_level++) + { + vkd3d_string_buffer_printf(&parser->buffer, pending_brace ? " + " : "["); + pending_brace = false; + } - switch (arg->reg_type) - { - case FX_FXLC_REG_LITERAL: - count = code->scalar ? 1 : code->comp_count; - if (arg->address >= code->cli_count || count > code->cli_count - arg->address) + switch (reg->type) + { + case FX_FXLC_REG_LITERAL: { - vkd3d_string_buffer_printf(&parser->buffer, "(<out-of-bounds>)"); - parser->failed = true; + uint32_t count = code->scalar ? 1 : code->comp_count; + + if (reg->address >= code->cli_count || count > code->cli_count - reg->address) + { + vkd3d_string_buffer_printf(&parser->buffer, "(<out-of-bounds>)"); + parser->failed = true; + break; + } + + vkd3d_string_buffer_printf(&parser->buffer, "("); + fx_print_fxlc_literal(parser, reg->address, code); + for (unsigned int j = 1; j < code->comp_count; ++j) + { + vkd3d_string_buffer_printf(&parser->buffer, ", "); + fx_print_fxlc_literal(parser, reg->address + (code->scalar ? 0 : j), code); + } + vkd3d_string_buffer_printf(&parser->buffer, ")"); break; } - - vkd3d_string_buffer_printf(&parser->buffer, "("); - fx_print_fxlc_literal(parser, arg->address, code); - for (unsigned int i = 1; i < code->comp_count; ++i) + case FX_FXLC_REG_CB: { - vkd3d_string_buffer_printf(&parser->buffer, ", "); - fx_print_fxlc_literal(parser, arg->address + (code->scalar ? 0 : i), code); + uint32_t register_index = reg->address / 4; /* Address counts in components. */ + const struct fx_4_ctab_entry *entry; + + if (!(entry = find_register_entry(code, register_index))) + { + vkd3d_string_buffer_printf(&parser->buffer, "c%u", register_index); + break; + } + + vkd3d_string_buffer_printf(&parser->buffer, "%s", &code->ctab[entry->name]); + if (register_index -= entry->register_index) + { + vkd3d_string_buffer_printf(&parser->buffer, "[%d", register_index); + pending_brace = true; + } + break; } - vkd3d_string_buffer_printf(&parser->buffer, ")"); - break; - case FX_FXLC_REG_CB: - fx_parse_fxlc_constant_argument(parser, arg, code); - break; + case FX_FXLC_REG_OUTPUT: + vkd3d_string_buffer_printf(&parser->buffer, "expr"); + break; - case FX_FXLC_REG_INPUT: - case FX_FXLC_REG_OUTPUT: - case FX_FXLC_REG_TEMP: - fx_print_fxlc_register(parser, arg->reg_type, arg->address, FX_FXLC_REG_UNUSED, 0, code); - break; + case FX_FXLC_REG_INPUT: + case FX_FXLC_REG_TEMP: + fx_print_fxlc_register(parser, reg, code); + break; - default: - vkd3d_string_buffer_printf(&parser->buffer, "<unknown register %u>", arg->reg_type); - break; + default: + vkd3d_string_buffer_printf(&parser->buffer, "<unknown register %u>", reg->type); + break; + } + } + + for (size_t i = 0; i <= arg->index_count; ++i) + { + const struct fxlc_register *reg = &arg->registers[i]; + + if (pending_brace) + { + vkd3d_string_buffer_printf(&parser->buffer, "]"); + pending_brace = false; + } + + switch (reg->type) + { + case FX_FXLC_REG_LITERAL: + break; + case FX_FXLC_REG_CB: + { + const struct fx_4_ctab_entry *entry; + + if ((entry = find_register_entry(code, reg->address / 4))) + { + fx_parse_print_swizzle(parser, code, reg->address - entry->register_index * 4); + break; + } + } + /* fall-through */ + default: + fx_parse_print_swizzle(parser, code, reg->address); + break; + } + + if (--index_level) + { + vkd3d_string_buffer_printf(&parser->buffer, "]"); + pending_brace = false; + } } } static void fx_parse_fxlvm_expression(struct fx_parser *parser, struct fxlvm_code *code) { - struct fxlc_arg args[9]; + const struct fxlc_arg *args[9]; uint32_t ins_count; size_t i, j; @@ -5338,9 +5392,8 @@ static void fx_parse_fxlvm_expression(struct fx_parser *parser, struct fxlvm_cod /* Sources entries are followed by the destination, first read them all. Output format is "opcode dst, src[0]...src[n]". */ - for (j = 0; j < src_count; ++j) - fx_parse_fxlc_argument(parser, &args[j], code); - fx_parse_fxlc_argument(parser, &args[src_count], code); + for (j = 0; j <= src_count; ++j) + args[j] = fxlvm_read_argument(code); opcode = (instr >> FX_FXLC_OPCODE_SHIFT) & FX_FXLC_OPCODE_MASK; code->comp_count = instr & FX_FXLC_COMP_COUNT_MASK; @@ -5349,14 +5402,14 @@ static void fx_parse_fxlvm_expression(struct fx_parser *parser, struct fxlvm_cod vkd3d_string_buffer_printf(&parser->buffer, "%s ", get_fxlc_opcode_name(opcode)); code->scalar = false; - fx_print_fxlc_argument(parser, &args[src_count], code); + fx_print_fxlc_argument(parser, args[src_count], code); vkd3d_string_buffer_printf(&parser->buffer, ", "); for (j = 0; j < src_count; ++j) { /* Scalar modifier applies only to the first source. */ code->scalar = j == 0 && !!(instr & FX_FXLC_IS_SCALAR_MASK); - fx_print_fxlc_argument(parser, &args[j], code); + fx_print_fxlc_argument(parser, args[j], code); if (j < src_count - 1) vkd3d_string_buffer_printf(&parser->buffer, ", "); } @@ -5387,10 +5440,10 @@ static void fx_2_parse_fxlvm_expression(struct fx_parser *parser, const uint32_t /* CTAB does not contain variable names */ /* Code blob */ - code.ptr = find_d3dbc_section(blob, count, TAG_FXLC, &count); - code.end = code.ptr + count; + code.code = find_d3dbc_section(blob, count, TAG_FXLC, &count); + code.size = count; - if (!code.ptr) + if (!code.code) { fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Failed to locate expression code section."); return; @@ -5459,8 +5512,8 @@ static void fx_4_parse_fxlvm_expression(struct fx_parser *parser, uint32_t offse ctab_offset + consts_offset, code.ctab_count * sizeof(*code.constants)); } - code.ptr = fxlc.data.code; - code.end = (uint32_t *)((uint8_t *)fxlc.data.code + fxlc.data.size); + code.code = fxlc.data.code; + code.size = fxlc.data.size / sizeof(uint32_t); fx_parse_fxlvm_expression(parser, &code); } diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c index 96c64a0e4c4..f250ecaa646 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -70,16 +70,17 @@ struct vkd3d_glsl_generator static void shader_glsl_print_subscript(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, const struct vsir_src_operand *rel_addr, unsigned int offset); -static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( - struct vkd3d_glsl_generator *generator, - enum vkd3d_shader_error error, const char *fmt, ...) +#define vkd3d_glsl_compiler_error(gen, error, ...) \ + vkd3d_glsl_compiler_error_(gen, error, __FUNCTION__, __VA_ARGS__) +static void VKD3D_PRINTF_FUNC(4, 5) vkd3d_glsl_compiler_error_(struct vkd3d_glsl_generator *gen, + enum vkd3d_shader_error error, const char *function, const char *fmt, ...) { va_list args; va_start(args, fmt); - vkd3d_shader_verror(generator->message_context, &generator->location, error, fmt, args); + vkd3d_shader_verror(gen->message_context, &gen->location, error, function, fmt, args); va_end(args); - generator->failed = true; + gen->failed = true; } static const char *shader_glsl_get_prefix(enum vkd3d_shader_type type) @@ -187,7 +188,7 @@ static void shader_glsl_print_image_name(struct vkd3d_string_buffer *buffer, } static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, - struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_register *reg) + struct vkd3d_glsl_generator *gen, const struct vsir_operand *reg) { switch (reg->type) { @@ -391,7 +392,7 @@ static void shader_glsl_print_bitcast(struct vkd3d_string_buffer *dst, struct vk static void shader_glsl_print_src(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, const struct vsir_src_operand *vsir_src, uint32_t mask, enum vsir_data_type data_type) { - const struct vkd3d_shader_register *reg = &vsir_src->reg; + const struct vsir_operand *reg = &vsir_src->reg; struct vkd3d_string_buffer *register_name; enum vsir_data_type src_data_type; unsigned int size; @@ -2322,15 +2323,32 @@ static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator static void shader_glsl_handle_global_flags(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, enum vsir_global_flags flags) { + static const uint64_t ignored_flags = VKD3DSGF_REFACTORING_ALLOWED | VKD3DSGF_BIND_FOR_DURATION; + if (flags & VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL) { vkd3d_string_buffer_printf(buffer, "layout(early_fragment_tests) in;\n"); flags &= ~VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; } - if (flags & ~VKD3DSGF_REFACTORING_ALLOWED) + if (flags & ignored_flags) + { + TRACE("Ignoring global flags %#"PRIx64".\n", flags & ignored_flags); + flags &= ~ignored_flags; + } + + if (flags) vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)flags); + + if (gen->program->f16_denormal_mode != VKD3D_SHADER_DENORMAL_MODE_ANY + || gen->program->f32_denormal_mode != VKD3D_SHADER_DENORMAL_MODE_ANY + || gen->program->f64_denormal_mode != VKD3D_SHADER_DENORMAL_MODE_ANY) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, + "Cannot emit denormal modes. The target environment does not support float controls."); + return; + } } static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index c5c723be913..968f1a5ca02 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -22,40 +22,41 @@ #include "hlsl.h" #include <stdio.h> -void hlsl_note(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, - enum vkd3d_shader_log_level level, const char *fmt, ...) +void hlsl_note_(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_log_level level, const char *function, const char *fmt, ...) { va_list args; va_start(args, fmt); - vkd3d_shader_vnote(ctx->message_context, loc, level, fmt, args); + vkd3d_shader_vnote(ctx->message_context, loc, level, function, fmt, args); va_end(args); } -void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, - enum vkd3d_shader_error error, const char *fmt, ...) +void hlsl_error_(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *function, const char *fmt, ...) { va_list args; va_start(args, fmt); - vkd3d_shader_verror(ctx->message_context, loc, error, fmt, args); + vkd3d_shader_verror(ctx->message_context, loc, error, function, fmt, args); va_end(args); if (!ctx->result) ctx->result = VKD3D_ERROR_INVALID_SHADER; } -void hlsl_warning(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, - enum vkd3d_shader_error error, const char *fmt, ...) +void hlsl_warning_(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *function, const char *fmt, ...) { va_list args; va_start(args, fmt); - vkd3d_shader_vwarning(ctx->message_context, loc, error, fmt, args); + vkd3d_shader_vwarning(ctx->message_context, loc, error, function, fmt, args); va_end(args); } -void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, const char *fmt, ...) +void hlsl_fixme_(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + const char *function, const char *fmt, ...) { struct vkd3d_string_buffer *string; va_list args; @@ -64,7 +65,8 @@ void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, c string = hlsl_get_string_buffer(ctx); vkd3d_string_buffer_printf(string, "Aborting due to not yet implemented feature: "); vkd3d_string_buffer_vprintf(string, fmt, args); - vkd3d_shader_error(ctx->message_context, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "%s", string->buffer); + vkd3d_shader_error_(ctx->message_context, loc, + VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, function, "%s", string->buffer); hlsl_release_string_buffer(ctx, string); va_end(args); @@ -96,18 +98,39 @@ char *hlsl_sprintf_alloc(struct hlsl_ctx *ctx, const char *fmt, ...) void hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl) { struct hlsl_scope *scope = ctx->cur_scope; + bool allow_redefinition = false; struct hlsl_ir_var *var; + /* If a variable is declared in a loop's initialization, it is considered + * to be declared outside the loop, and redefinition is allowed. */ + if (scope->loop) + { + allow_redefinition = true; + scope = scope->upper; + VKD3D_ASSERT(scope); + } + if (decl->name) { LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) { if (var->name && !strcmp(decl->name, var->name)) { - hlsl_error(ctx, &decl->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, - "Identifier \"%s\" was already declared in this scope.", var->name); - hlsl_note(ctx, &var->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously declared here.", var->name); - break; + if (allow_redefinition) + { + hlsl_warning(ctx, &decl->loc, VKD3D_SHADER_WARNING_HLSL_REDEFINED, + "Identifier \"%s\" was already declared in this scope.", var->name); + hlsl_note(ctx, &var->loc, VKD3D_SHADER_LOG_ERROR, + "\"%s\" was previously declared here.", var->name); + } + else + { + hlsl_error(ctx, &decl->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "Identifier \"%s\" was already declared in this scope.", var->name); + hlsl_note(ctx, &var->loc, VKD3D_SHADER_LOG_ERROR, + "\"%s\" was previously declared here.", var->name); + break; + } } } } @@ -119,7 +142,7 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) { struct hlsl_ir_var *var; - LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + LIST_FOR_EACH_ENTRY_REV(var, &scope->vars, struct hlsl_ir_var, scope_entry) { if (var->name && !strcmp(name, var->name)) return var; @@ -955,7 +978,7 @@ struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_de /* Initializes a deref from another deref (prefix) and a component index. * *block is initialized to contain the new constant node instructions used by the deref's path. */ -static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_block *block, +bool hlsl_init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_deref *deref, const struct hlsl_deref *prefix, unsigned int index, const struct vkd3d_shader_location *loc) { @@ -1023,6 +1046,10 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co return type->e.record.fields[c->value.u[0].u].type; } + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + return type->e.resource.format; + default: vkd3d_unreachable(); } @@ -1431,11 +1458,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, return NULL; } dst_field->name = hlsl_strdup(ctx, src_field->name); - if (src_field->semantic.name) - { - dst_field->semantic.name = hlsl_strdup(ctx, src_field->semantic.name); - dst_field->semantic.index = src_field->semantic.index; - } + hlsl_clone_semantic(ctx, &dst_field->semantic, &src_field->semantic); } break; } @@ -1726,7 +1749,7 @@ void hlsl_block_add_store_component(struct hlsl_ctx *ctx, struct hlsl_block *blo return; init_node(&store->node, HLSL_IR_STORE, NULL, &rhs->loc); - if (!init_deref_from_component_index(ctx, &comp_path_block, &store->lhs, lhs, comp, &rhs->loc)) + if (!hlsl_init_deref_from_component_index(ctx, &comp_path_block, &store->lhs, lhs, comp, &rhs->loc)) { vkd3d_free(store); return; @@ -2009,7 +2032,8 @@ static struct hlsl_ir_node *hlsl_new_error_expr(struct hlsl_ctx *ctx) } struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, - struct hlsl_block *else_block, enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc) + struct hlsl_block *else_block, enum hlsl_if_flatten_type flatten_type, bool is_loop_condition, + const struct vkd3d_shader_location *loc) { struct hlsl_ir_if *iff; @@ -2023,14 +2047,16 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond hlsl_block_init(&iff->else_block); if (else_block) hlsl_block_add_block(&iff->else_block, else_block); + iff->is_loop_conditional = is_loop_condition; return &iff->node; } void hlsl_block_add_if(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, - enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc) + enum hlsl_if_flatten_type flatten_type, bool is_loop_condition, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *instr = hlsl_new_if(ctx, condition, then_block, else_block, flatten_type, loc); + struct hlsl_ir_node *instr = hlsl_new_if(ctx, condition, then_block, + else_block, flatten_type, is_loop_condition, loc); if (instr) { @@ -2164,7 +2190,7 @@ struct hlsl_ir_node *hlsl_block_add_load_component(struct hlsl_ctx *ctx, struct comp_type = hlsl_type_get_component_type(ctx, type, comp); init_node(&load->node, HLSL_IR_LOAD, comp_type, loc); - if (!init_deref_from_component_index(ctx, &comp_path_block, &load->src, deref, comp, loc)) + if (!hlsl_init_deref_from_component_index(ctx, &comp_path_block, &load->src, deref, comp, loc)) { vkd3d_free(load); block->value = ctx->error_instr; @@ -2239,8 +2265,8 @@ static struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, enum h { struct hlsl_ir_resource_store *store; - if (type != HLSL_RESOURCE_STORE - || hlsl_deref_get_type(ctx, resource)->sampler_dim != HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + if (type != HLSL_RESOURCE_STORE || (!resource->var->is_tgsm + && hlsl_deref_get_type(ctx, resource)->sampler_dim != HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)) VKD3D_ASSERT(!byte_offset); if (!(store = hlsl_alloc(ctx, sizeof(*store)))) @@ -2363,6 +2389,18 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, const struct hlsl_pr type = hlsl_get_type(ctx->cur_scope, "GeometryShader", true, true); break; + case VKD3D_SHADER_TYPE_HULL: + type = hlsl_get_type(ctx->cur_scope, "HullShader", true, true); + break; + + case VKD3D_SHADER_TYPE_DOMAIN: + type = hlsl_get_type(ctx->cur_scope, "DomainShader", true, true); + break; + + case VKD3D_SHADER_TYPE_COMPUTE: + type = hlsl_get_type(ctx->cur_scope, "ComputeShader", true, true); + break; + default: hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_PROFILE, "Invalid profile \"%s\".", profile->name); return NULL; @@ -2751,8 +2789,8 @@ static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_ma return NULL; } - if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), - &then_block, &else_block, src->flatten_type, &src->node.loc))) + if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), &then_block, + &else_block, src->flatten_type, src->is_loop_conditional, &src->node.loc))) { hlsl_block_cleanup(&then_block); hlsl_block_cleanup(&else_block); @@ -2804,6 +2842,9 @@ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_ hlsl_block_cleanup(&body); return NULL; } + + hlsl_ir_loop(dst)->limiter = src->limiter; + hlsl_ir_loop(dst)->limiter_component = src->limiter_component; return dst; } @@ -3869,7 +3910,10 @@ static void dump_ir_if(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, { vkd3d_string_buffer_printf(buffer, "if ("); dump_src(buffer, &if_node->condition); - vkd3d_string_buffer_printf(buffer, ") {\n"); + vkd3d_string_buffer_printf(buffer, ") {"); + if (if_node->is_loop_conditional) + vkd3d_string_buffer_printf(buffer, " // loop conditional."); + vkd3d_string_buffer_printf(buffer, "\n"); dump_block(ctx, buffer, &if_node->then_block); vkd3d_string_buffer_printf(buffer, " %10s } else {\n", ""); dump_block(ctx, buffer, &if_node->else_block); @@ -3915,7 +3959,10 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i static void dump_ir_loop(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_loop *loop) { - vkd3d_string_buffer_printf(buffer, "for (;;) {\n"); + vkd3d_string_buffer_printf(buffer, "for (;;) {"); + if (loop->limiter) + vkd3d_string_buffer_printf(buffer, " // limiter: %s[%u]", loop->limiter->name, loop->limiter_component); + vkd3d_string_buffer_printf(buffer, "\n"); dump_block(ctx, buffer, &loop->body); vkd3d_string_buffer_printf(buffer, " %10s }", ""); } @@ -4080,8 +4127,11 @@ static void dump_ir_interlocked(struct vkd3d_string_buffer *buffer, const struct VKD3D_ASSERT(interlocked->op < ARRAY_SIZE(op_names)); vkd3d_string_buffer_printf(buffer, "interlocked_%s(dst = ", op_names[interlocked->op]); dump_deref(buffer, &interlocked->dst); - vkd3d_string_buffer_printf(buffer, ", coords = "); - dump_src(buffer, &interlocked->coords); + if (interlocked->coords.node) + { + vkd3d_string_buffer_printf(buffer, ", coords = "); + dump_src(buffer, &interlocked->coords); + } if (interlocked->cmp_value.node) { vkd3d_string_buffer_printf(buffer, ", cmp_value = "); @@ -5049,6 +5099,43 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) hlsl_release_string_buffer(ctx, name); } +void hlsl_ctx_init_entry_function_attributes(struct hlsl_ctx *ctx) +{ + ctx->domain = VKD3D_TESSELLATOR_DOMAIN_INVALID; + ctx->output_control_point_count = UINT_MAX; + ctx->output_control_point_type = NULL; + ctx->output_primitive = 0; + ctx->partitioning = 0; + ctx->patch_constant_func = NULL; + ctx->input_control_point_count = UINT_MAX; + ctx->input_control_point_type = NULL; + ctx->input_primitive_param = NULL; + ctx->max_vertex_count = 0; + ctx->input_primitive_type = VKD3D_PT_UNDEFINED; + ctx->output_topology_type = VKD3D_PT_UNDEFINED; + + ctx->found_numthreads = 0; + memset(ctx->thread_count, 0, sizeof(ctx->thread_count)); +} + +static void hlsl_ctx_cleanup_locale(struct hlsl_ctx *ctx) +{ +#ifdef _WIN32 + _free_locale(ctx->c_locale); +#else + freelocale(ctx->c_locale); +#endif +} + +static void hlsl_ctx_init_locale(struct hlsl_ctx *ctx) +{ +#ifdef _WIN32 + ctx->c_locale = _create_locale(LC_ALL, "C"); +#else + ctx->c_locale = newlocale(LC_ALL_MASK, "C", NULL); +#endif +} + static bool hlsl_ctx_init(struct hlsl_ctx *ctx, struct vkd3d_shader_source_list *source_files, const struct vkd3d_shader_compile_info *compile_info, const struct hlsl_profile_info *profile, struct vkd3d_shader_message_context *message_context) @@ -5070,10 +5157,13 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, struct vkd3d_shader_source_list ctx->location.line = ctx->location.column = 1; vkd3d_string_buffer_cache_init(&ctx->string_buffers); + hlsl_ctx_init_locale(ctx); + list_init(&ctx->scopes); if (!(ctx->dummy_scope = hlsl_new_scope(ctx, NULL))) { + hlsl_ctx_cleanup_locale(ctx); vkd3d_string_buffer_cache_cleanup(&ctx->string_buffers); return false; } @@ -5092,10 +5182,16 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, struct vkd3d_shader_source_list if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, hlsl_strdup(ctx, "$Globals"), 0, NULL, NULL, &ctx->location))) + { + hlsl_ctx_cleanup_locale(ctx); return false; + } if (!(ctx->params_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, hlsl_strdup(ctx, "$Params"), 0, NULL, NULL, &ctx->location))) + { + hlsl_ctx_cleanup_locale(ctx); return false; + } ctx->cur_buffer = ctx->globals_buffer; ctx->warn_implicit_truncation = true; @@ -5114,8 +5210,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, struct vkd3d_shader_source_list break; case VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY: - ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; - ctx->double_as_float_alias = option->value & VKD3D_SHADER_COMPILE_OPTION_DOUBLE_AS_FLOAT_ALIAS; + ctx->compatibility_flags = option->value; break; case VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT: @@ -5136,17 +5231,13 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, struct vkd3d_shader_source_list } if (!(ctx->error_instr = hlsl_new_error_expr(ctx))) + { + hlsl_ctx_cleanup_locale(ctx); return false; + } hlsl_block_add_instr(&ctx->static_initializers, ctx->error_instr); - ctx->domain = VKD3D_TESSELLATOR_DOMAIN_INVALID; - ctx->output_control_point_count = UINT_MAX; - ctx->output_primitive = 0; - ctx->partitioning = 0; - ctx->input_control_point_count = UINT_MAX; - ctx->max_vertex_count = 0; - ctx->input_primitive_type = VKD3D_PT_UNDEFINED; - ctx->output_topology_type = VKD3D_PT_UNDEFINED; + hlsl_ctx_init_entry_function_attributes(ctx); return true; } @@ -5198,6 +5289,7 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) } vkd3d_free(ctx->constant_defs.regs); + hlsl_ctx_cleanup_locale(ctx); } static int hlsl_ctx_parse(struct hlsl_ctx *ctx, struct vkd3d_shader_source_list *source_list, @@ -5334,7 +5426,7 @@ int hlsl_parse(const struct vkd3d_shader_compile_info *compile_info, if (!vsir_program_init(program, compile_info, &version, 0, VSIR_CF_STRUCTURED, normalisation_level)) return VKD3D_ERROR_OUT_OF_MEMORY; - program->f32_denorm_mode = VSIR_DENORM_FLUSH_TO_ZERO; + program->f32_denormal_mode = VKD3D_SHADER_DENORMAL_MODE_FLUSH_TO_ZERO; if ((ret = hlsl_ctx_parse(&ctx, &program->source_files, compile_info, profile, message_context)) < 0) { diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index 87147985129..d996f635648 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -554,6 +554,10 @@ struct hlsl_ir_var * sampler and a texture for SM<4 backwards compatibility. */ bool is_combined_sampler; + /* Whether the initial value of the variable is a compile-time constant + * expression and therefore it could be moved to ctx->static_initializers. */ + bool is_compile_time_const; + uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; @@ -683,6 +687,9 @@ struct hlsl_ir_if struct hlsl_block then_block; struct hlsl_block else_block; enum hlsl_if_flatten_type flatten_type; + /* If this "if" was created inside a "for" loop to take care of breaking it + * when the conditional is false. */ + bool is_loop_conditional; }; enum hlsl_loop_unroll_type @@ -709,6 +716,11 @@ struct hlsl_ir_loop unsigned int next_index; /* liveness index of the end of the loop */ struct hlsl_src unroll_limit; enum hlsl_loop_unroll_type unroll_type; + + /* Uniform variable that contains an int that defines a limit for the + * number of iterations, if any. */ + struct hlsl_ir_var *limiter; + unsigned int limiter_component; }; struct hlsl_ir_switch_case @@ -1071,11 +1083,6 @@ struct hlsl_profile_info bool software; }; -struct hlsl_vec4 -{ - float f[4]; -}; - enum hlsl_buffer_type { HLSL_BUFFER_CONSTANT, @@ -1134,6 +1141,8 @@ struct hlsl_ctx * scanner is declared as reentrant, which is the case. */ void *scanner; + vkd3d_locale c_locale; + /* Pointer to the current scope; changes as the parser reads the code. */ struct hlsl_scope *cur_scope; /* Scope of global variables. */ @@ -1191,21 +1200,25 @@ struct hlsl_ctx /* List of the instruction nodes for initializing static variables. */ struct hlsl_block static_initializers; - /* Dynamic array of constant values that appear in the shader, associated to the 'c' registers. - * Only used for SM1 profiles. */ + /* Dynamic array of constant values that appear in the shader, associated + * to the 'c' and 'i' registers. Only used for SM1 profiles. */ struct hlsl_constant_defs { struct hlsl_constant_register { + bool is_int; uint32_t index; uint32_t allocated_mask; - struct hlsl_vec4 value; + union hlsl_constant_value_component value[4]; struct vkd3d_shader_location loc; } *regs; size_t count, size; } constant_defs; /* 'c' registers where the constants expected by SM2 sincos are stored. */ struct hlsl_reg d3dsincosconst1, d3dsincosconst2; + /* 'i' register allocated by SM3 to keep the maximum number of iterations. */ + struct hlsl_reg d3d255intconst; + /* Number of allocated registers, used in translation to vsir. */ unsigned int ssa_count, temp_count, indexable_temp_count; @@ -1263,11 +1276,10 @@ struct hlsl_ctx /* Whether the numthreads() attribute has been provided in the entry-point function. */ uint32_t found_numthreads : 1; - bool semantic_compat_mapping; + enum vkd3d_shader_compile_option_backward_compatibility compatibility_flags; bool child_effect; bool include_empty_buffers; bool warn_implicit_truncation; - bool double_as_float_alias; }; static inline bool hlsl_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) @@ -1528,6 +1540,13 @@ static inline bool hlsl_is_numeric_type(const struct hlsl_type *type) return type->class <= HLSL_CLASS_LAST_NUMERIC; } +static inline struct hlsl_type *hlsl_change_base_type(const struct hlsl_ctx *ctx, + const struct hlsl_type *type, enum hlsl_base_type base_type) +{ + VKD3D_ASSERT(hlsl_is_numeric_type(type)); + return hlsl_get_numeric_type(ctx, type->class, base_type, type->e.numeric.dimx, type->e.numeric.dimy); +} + static inline bool hlsl_is_vec1(const struct hlsl_type *type) { return type->class == HLSL_CLASS_SCALAR || (type->class == HLSL_CLASS_VECTOR && type->e.numeric.dimx == 1); @@ -1600,7 +1619,7 @@ struct hlsl_ir_node *hlsl_block_add_float_constant(struct hlsl_ctx *ctx, struct float f, const struct vkd3d_shader_location *loc); void hlsl_block_add_if(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, - enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc); + enum hlsl_if_flatten_type flatten_type, bool is_loop_condition, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_index(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_int_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -1666,6 +1685,8 @@ int hlsl_emit_vsir(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info struct vsir_program *program, struct vkd3d_shader_code *reflection_data); bool hlsl_init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, unsigned int path_len); +bool hlsl_init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_deref *deref, + const struct hlsl_deref *prefix, unsigned int index, const struct vkd3d_shader_location *loc); bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *chain); bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); @@ -1677,6 +1698,8 @@ bool hlsl_clone_semantic(struct hlsl_ctx *ctx, struct hlsl_semantic *dst, const void hlsl_cleanup_ir_switch_cases(struct list *cases); void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c); +void hlsl_ctx_init_entry_function_attributes(struct hlsl_ctx *ctx); + void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new); void hlsl_free_attribute(struct hlsl_attribute *attr); @@ -1721,7 +1744,8 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, uint32_t struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, - struct hlsl_block *else_block, enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc); + struct hlsl_block *else_block, enum hlsl_if_flatten_type flatten_type, bool is_loop_condition, + const struct vkd3d_shader_location *loc); struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, enum hlsl_so_object_type so_type, struct hlsl_type *type); struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, @@ -1789,14 +1813,22 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector, struct list *cases, const struct vkd3d_shader_location *loc); -void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, - enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); -void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, - const char *fmt, ...) VKD3D_PRINTF_FUNC(3, 4); -void hlsl_warning(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, - enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); -void hlsl_note(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, - enum vkd3d_shader_log_level level, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); +#define hlsl_error(ctx, loc, error, ...) \ + hlsl_error_(ctx, loc, error, __FUNCTION__, __VA_ARGS__) +void hlsl_error_(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(5, 6); +#define hlsl_fixme(ctx, loc, ...) \ + hlsl_fixme_(ctx, loc, __FUNCTION__, __VA_ARGS__) +void hlsl_fixme_(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); +#define hlsl_warning(ctx, loc, error, ...) \ + hlsl_warning_(ctx, loc, error, __FUNCTION__, __VA_ARGS__) +void hlsl_warning_(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(5, 6); +#define hlsl_note(ctx, loc, level, ...) \ + hlsl_note_(ctx, loc, level, __FUNCTION__, __VA_ARGS__) +void hlsl_note_(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_log_level level, const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(5, 6); void hlsl_push_scope(struct hlsl_ctx *ctx); void hlsl_pop_scope(struct hlsl_ctx *ctx); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index 24d08b314a4..85f0d89aa86 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -19,6 +19,10 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ +%top{ +#include "config.h" +} + %{ #define YY_NO_UNISTD_H #include "hlsl.h" @@ -238,15 +242,18 @@ while {return KW_WHILE; } } [0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[h|H|f|F]? { - yylval->floatval = atof(yytext); + struct hlsl_ctx *ctx = yyget_extra(yyscanner); + yylval->floatval = vkd3d_parse_float(yytext, ctx->c_locale); return C_FLOAT; } [0-9]+\.([eE][+-]?[0-9]+)?[h|H|f|F]? { - yylval->floatval = atof(yytext); + struct hlsl_ctx *ctx = yyget_extra(yyscanner); + yylval->floatval = vkd3d_parse_float(yytext, ctx->c_locale); return C_FLOAT; } [0-9]+[eE][+-]?[0-9]+[h|H|f|F]? { - yylval->floatval = atof(yytext); + struct hlsl_ctx *ctx = yyget_extra(yyscanner); + yylval->floatval = vkd3d_parse_float(yytext, ctx->c_locale); return C_FLOAT; } 0x[0-9a-fA-F]+[lL]? { diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index e74b9a6c4de..6c1f73df70e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -471,7 +471,7 @@ static void append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co hlsl_block_init(&then_block); hlsl_block_add_jump(ctx, &then_block, HLSL_IR_JUMP_BREAK, NULL, &condition->loc); - hlsl_block_add_if(ctx, cond_block, not, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, &condition->loc); + hlsl_block_add_if(ctx, cond_block, not, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, true, &condition->loc); } static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs) @@ -544,9 +544,10 @@ static void check_loop_attributes(struct hlsl_ctx *ctx, const struct parse_attri hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unroll attribute can't be used with 'fastopt' attribute."); } -static bool is_static_expression(const struct hlsl_block *block) +static bool has_side_effects(const struct hlsl_block *block) { struct hlsl_ir_node *node; + struct hlsl_ir_var *var; LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) { @@ -558,13 +559,58 @@ static bool is_static_expression(const struct hlsl_block *block) case HLSL_IR_SAMPLER_STATE: case HLSL_IR_STRING_CONSTANT: case HLSL_IR_SWIZZLE: + case HLSL_IR_INDEX: case HLSL_IR_LOAD: + break; + case HLSL_IR_STORE: + var = hlsl_ir_store(node)->lhs.var; + if (var->is_synthetic) + break; + return true; + case HLSL_IR_CALL: + case HLSL_IR_IF: + case HLSL_IR_INTERLOCKED: + case HLSL_IR_LOOP: + case HLSL_IR_JUMP: + case HLSL_IR_RESOURCE_LOAD: + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_SWITCH: + case HLSL_IR_STATEBLOCK_CONSTANT: + case HLSL_IR_SYNC: + return true; + } + } + + return false; +} + +static bool is_compile_time_const(const struct hlsl_block *block, bool default_values) +{ + struct hlsl_ir_node *node; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + { + switch (node->type) + { + case HLSL_IR_COMPILE: + case HLSL_IR_CONSTANT: + case HLSL_IR_EXPR: + case HLSL_IR_SAMPLER_STATE: + case HLSL_IR_STRING_CONSTANT: + case HLSL_IR_SWIZZLE: case HLSL_IR_INDEX: - continue; + break; + case HLSL_IR_LOAD: + var = hlsl_ir_load(node)->src.var; + if (var->is_synthetic || var->is_compile_time_const || (var->default_values && default_values)) + break; + return false; case HLSL_IR_STORE: - if (hlsl_ir_store(node)->lhs.var->is_synthetic) + var = hlsl_ir_store(node)->lhs.var; + if (var->is_synthetic) break; - /* fall-through */ + return false; case HLSL_IR_CALL: case HLSL_IR_IF: case HLSL_IR_INTERLOCKED: @@ -583,7 +629,8 @@ static bool is_static_expression(const struct hlsl_block *block) } static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx, - struct hlsl_block *block, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) + struct hlsl_block *block, struct hlsl_type *dst_type, bool default_values, + const struct vkd3d_shader_location *loc) { struct hlsl_default_value ret = {0}; struct hlsl_ir_node *node; @@ -593,7 +640,7 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx if (node_from_block(block)->data_type->class == HLSL_CLASS_ERROR) return ret; - if (!is_static_expression(block)) + if (!is_compile_time_const(block, default_values)) hlsl_error(ctx, &node_from_block(block)->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected literal expression."); @@ -644,7 +691,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str { struct hlsl_default_value res; - res = evaluate_static_expression(ctx, block, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); + res = evaluate_static_expression(ctx, block, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), false, loc); VKD3D_ASSERT(!res.string); return res.number.u; } @@ -676,7 +723,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum hlsl_loop_type { struct hlsl_block expr; - if (!is_static_expression(&attr->instrs)) + if (has_side_effects(&attr->instrs)) { hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unroll limit expressions cannot have side effects."); @@ -1681,8 +1728,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct if (arg->data_type->class == HLSL_CLASS_ERROR) return arg; - bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, - arg->data_type->e.numeric.dimx, arg->data_type->e.numeric.dimy); + bool_type = hlsl_change_base_type(ctx, arg->data_type, HLSL_TYPE_BOOL); args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc); return add_expr(ctx, block, op, args, bool_type, loc); } @@ -2000,205 +2046,27 @@ static bool invert_swizzle_matrix(const struct hlsl_matrix_swizzle *swizzle, return true; } -static bool add_resource_store(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_ir_index *lhs, struct hlsl_ir_index *resource_access, - struct hlsl_ir_node *rhs, unsigned int width, uint32_t writemask, bool matrix_writemask) +static struct hlsl_ir_node *resolve_assignment_lhs(struct hlsl_ctx *ctx, struct hlsl_block *block, + bool is_function_out_arg, struct hlsl_ir_node *lhs, struct hlsl_type **lhs_type, + struct hlsl_ir_node **rhs, unsigned int *writemask, bool *matrix_writemask) { - struct hlsl_ir_node *coords = resource_access->idx.node; - struct hlsl_type *resource_type, *resource_format; - struct vkd3d_shader_location loc = lhs->node.loc; - unsigned int dim_count, expected_width; - struct hlsl_deref resource_deref; - - if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, resource_access->val.node)) - return false; - - resource_type = hlsl_deref_get_type(ctx, &resource_deref); - resource_format = resource_type->e.resource.format; - expected_width = resource_format->e.numeric.dimx * resource_format->e.numeric.dimy; - VKD3D_ASSERT(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); - - if (resource_type->class != HLSL_CLASS_UAV) - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Read-only resources cannot be stored to."); - - dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); - - VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); - VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); - VKD3D_ASSERT(coords->data_type->e.numeric.dimx == dim_count); - - if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - { - struct hlsl_ir_node *field_offset = hlsl_block_add_uint_constant(ctx, block, 0, &loc); - struct hlsl_type *val_type = lhs->node.data_type; - struct hlsl_ir_index *ptr = lhs, *prev = NULL; - - if (rhs->data_type->class > HLSL_CLASS_LAST_NUMERIC) - { - hlsl_fixme(ctx, &loc, "Composite type structured buffer stores."); - return false; - } - - while (ptr != resource_access) - { - struct hlsl_ir_index *next = hlsl_ir_index(ptr->val.node); - struct hlsl_type *type = ptr->val.node->data_type; - - if (hlsl_index_is_noncontiguous(next)) - { - type = next->val.node->data_type; - } - else if (hlsl_index_is_noncontiguous(ptr)) - { - if (prev) - { - type = prev->val.node->data_type; - } - else - { - val_type = ptr->val.node->data_type; - type = ptr->node.data_type; - } - } - - field_offset = hlsl_block_add_packed_index_offset_append(ctx, - block, field_offset, ptr->idx.node, type, &loc); - - prev = ptr; - ptr = next; - } - - if (matrix_writemask || rhs->data_type->class == HLSL_CLASS_MATRIX) - { - /* Type of the minor inner vector. column type for row major, - * row type for column major. */ - struct hlsl_type *minor_type = hlsl_get_vector_type(ctx, - val_type->e.numeric.type, hlsl_type_minor_size(val_type)); - struct hlsl_block writes; - bool written = false; - - hlsl_block_init(&writes); - - for (unsigned int i = 0, k = 0; i < val_type->e.numeric.dimy; ++i) - { - bool row_major = hlsl_type_is_row_major(val_type); - struct hlsl_ir_node *mtx_offset; - struct hlsl_ir_node *row_c; - - row_c = hlsl_block_add_uint_constant(ctx, &writes, i, &loc); - mtx_offset = hlsl_block_add_packed_index_offset_append(ctx, &writes, - field_offset, row_c, row_major ? val_type : minor_type, &loc); - - for (unsigned int j = 0; j < val_type->e.numeric.dimx; ++j) - { - struct hlsl_ir_node *column_c, *load, *col_offset; - - if (matrix_writemask) - { - unsigned int idx = i * 4 + j; - - if (!(writemask & (1u << idx))) - continue; - } - - written = true; - column_c = hlsl_block_add_uint_constant(ctx, &writes, j, &loc); - col_offset = hlsl_block_add_packed_index_offset_append(ctx, &writes, - mtx_offset, column_c, row_major ? minor_type : val_type, &loc); - load = hlsl_add_load_component(ctx, &writes, rhs, k++, &loc); - hlsl_block_add_resource_store(ctx, &writes, HLSL_RESOURCE_STORE, - &resource_deref, col_offset, coords, load, VKD3DSP_WRITEMASK_0, &loc); - } - } - - if (written) - hlsl_block_add_block(block, &writes); - hlsl_block_cleanup(&writes); - } - else - { - unsigned int size = val_type->e.numeric.dimx; - - for (unsigned int i = 0, k = 0; i < size; ++i) - { - struct hlsl_ir_node *c, *load, *offset; - - if (!(writemask & (1u << i))) - continue; - - if (val_type->class == HLSL_CLASS_SCALAR) - { - VKD3D_ASSERT(i == 0); - VKD3D_ASSERT(size == 1); - - offset = field_offset; - } - else - { - c = hlsl_block_add_uint_constant(ctx, block, i, &loc); - offset = hlsl_block_add_packed_index_offset_append(ctx, block, field_offset, c, val_type, &loc); - } - - load = hlsl_add_load_component(ctx, block, rhs, k++, &loc); - hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STORE, - &resource_deref, offset, coords, load, VKD3DSP_WRITEMASK_0, &loc); - } - } - } - else - { - if (width != expected_width) - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, - "Resource store expressions must write to all components."); - - hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STORE, - &resource_deref, NULL, coords, rhs, writemask, &loc); - } - - hlsl_cleanup_deref(&resource_deref); - - return true; -} - -static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, - enum parse_assign_op assign_op, struct hlsl_ir_node *rhs, bool is_function_out_arg) -{ - struct hlsl_type *lhs_type = lhs->data_type; - unsigned int writemask = 0, width = 0; - struct hlsl_ir_index *resource_access; - bool matrix_writemask = false; + unsigned int width = 0; bool first_cast = true; - if (lhs->data_type->class == HLSL_CLASS_ERROR || rhs->data_type->class == HLSL_CLASS_ERROR) - { - block->value = ctx->error_instr; - return true; - } - - if (assign_op == ASSIGN_OP_SUB) - { - if (!(rhs = add_unary_arithmetic_expr(ctx, block, HLSL_OP1_NEG, rhs, &rhs->loc))) - return false; - assign_op = ASSIGN_OP_ADD; - } - if (assign_op != ASSIGN_OP_ASSIGN) + if (hlsl_is_numeric_type(lhs->data_type)) { - enum hlsl_ir_expr_op op = op_from_assignment(assign_op); + unsigned int size = hlsl_type_component_count(lhs->data_type); - VKD3D_ASSERT(op); - if (!(rhs = add_binary_expr(ctx, block, op, lhs, rhs, &rhs->loc))) - return false; + *writemask = (1 << size) - 1; + width = size; } - - if (hlsl_is_numeric_type(lhs_type)) + else { - unsigned int size = hlsl_type_component_count(lhs_type); - - writemask = (1 << size) - 1; - width = size; + *writemask = 0; } - rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc); + *lhs_type = lhs->data_type; + *matrix_writemask = false; while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX) { @@ -2210,18 +2078,19 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc if (hlsl_type_component_count(lhs->data_type) != hlsl_type_component_count(cast->data_type)) { hlsl_fixme(ctx, &cast->loc, "Size change on the LHS."); - return false; + return NULL; } if (hlsl_version_ge(ctx, 4, 0) && (!is_function_out_arg || !first_cast)) { hlsl_error(ctx, &cast->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE, "Base type casts are not allowed on the LHS for profiles >= 4."); - return false; + return NULL; } - lhs_type = lhs->data_type; - if (lhs_type->class == HLSL_CLASS_VECTOR || (lhs_type->class == HLSL_CLASS_MATRIX && matrix_writemask)) - lhs_type = hlsl_get_vector_type(ctx, lhs->data_type->e.numeric.type, width); + *lhs_type = lhs->data_type; + if ((*lhs_type)->class == HLSL_CLASS_VECTOR + || ((*lhs_type)->class == HLSL_CLASS_MATRIX && *matrix_writemask)) + *lhs_type = hlsl_get_vector_type(ctx, lhs->data_type->e.numeric.type, width); first_cast = false; } @@ -2230,7 +2099,7 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); uint32_t s; - VKD3D_ASSERT(!matrix_writemask); + VKD3D_ASSERT(!*matrix_writemask); if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX) { @@ -2239,48 +2108,80 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) { hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); - return false; + return NULL; } - if (!invert_swizzle_matrix(&ms, &s, &writemask, &width)) + if (!invert_swizzle_matrix(&ms, &s, writemask, &width)) { - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix."); - return false; + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, + "Invalid writemask for matrix."); + return NULL; } - matrix_writemask = true; + *matrix_writemask = true; } else { s = swizzle->u.vector; - if (!invert_swizzle(&s, &writemask, &width)) + if (!invert_swizzle(&s, writemask, &width)) { hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); - return false; + return NULL; } } - rhs = hlsl_block_add_swizzle(ctx, block, s, width, rhs, &swizzle->node.loc); + *rhs = hlsl_block_add_swizzle(ctx, block, s, width, *rhs, &swizzle->node.loc); lhs = swizzle->val.node; - lhs_type = hlsl_get_vector_type(ctx, lhs_type->e.numeric.type, width); + *lhs_type = hlsl_get_vector_type(ctx, (*lhs_type)->e.numeric.type, width); } else { hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE, "Invalid lvalue."); - return false; + return NULL; } } + return lhs; +} + +static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, + enum parse_assign_op assign_op, struct hlsl_ir_node *rhs, bool is_function_out_arg) +{ + struct hlsl_type *lhs_type = lhs->data_type; + bool matrix_writemask = false; + unsigned int writemask = 0; + + if (lhs->data_type->class == HLSL_CLASS_ERROR || rhs->data_type->class == HLSL_CLASS_ERROR) + { + block->value = ctx->error_instr; + return true; + } + + if (assign_op == ASSIGN_OP_SUB) + { + if (!(rhs = add_unary_arithmetic_expr(ctx, block, HLSL_OP1_NEG, rhs, &rhs->loc))) + return false; + assign_op = ASSIGN_OP_ADD; + } + if (assign_op != ASSIGN_OP_ASSIGN) + { + enum hlsl_ir_expr_op op = op_from_assignment(assign_op); + + VKD3D_ASSERT(op); + if (!(rhs = add_binary_expr(ctx, block, op, lhs, rhs, &rhs->loc))) + return false; + } + + rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc); + + if (!(lhs = resolve_assignment_lhs(ctx, block, is_function_out_arg, lhs, + &lhs_type, &rhs, &writemask, &matrix_writemask))) + return false; + /* lhs casts could have resulted in a discrepancy between the * rhs->data_type and the type of the variable that will be ulimately * stored to. This is corrected. */ rhs = add_cast(ctx, block, rhs, lhs_type, &rhs->loc); - if (lhs->type == HLSL_IR_INDEX && (resource_access = hlsl_index_chain_find_resource_access(hlsl_ir_index(lhs)))) - { - if (!add_resource_store(ctx, block, hlsl_ir_index(lhs), - resource_access, rhs, width, writemask, matrix_writemask)) - return false; - } - else if (matrix_writemask) + if (matrix_writemask) { struct hlsl_deref deref; unsigned int i, j, k = 0; @@ -2420,7 +2321,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i { if (!hlsl_clone_block(ctx, &block, instrs)) return; - default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); + default_value = evaluate_static_expression(ctx, &block, dst_comp_type, true, &src->loc); if (dst->default_values) dst->default_values[*store_index] = default_value; @@ -2691,6 +2592,14 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) return; } + if (ctx->cur_scope == ctx->globals + && !(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_GROUPSHARED))) + modifiers |= HLSL_STORAGE_UNIFORM; + + if (ctx->cur_scope == ctx->globals && (modifiers & HLSL_STORAGE_UNIFORM) + && (ctx->compatibility_flags & VKD3D_SHADER_COMPILE_OPTION_CONST_GLOBAL_UNIFORMS)) + type = hlsl_type_clone(ctx, type, 0, HLSL_MODIFIER_CONST); + if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) { hlsl_cleanup_semantic(&new_semantic); @@ -2717,12 +2626,11 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "packoffset() is only allowed inside constant buffer declarations."); } - else - { - if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, - "Const variable \"%s\" is missing an initializer.", var->name); - } + + if (!(modifiers & (HLSL_STORAGE_UNIFORM | HLSL_STORAGE_STATIC | HLSL_STORAGE_GROUPSHARED)) + && (type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, + "Const variable \"%s\" is missing an initializer.", var->name); if (ctx->cur_scope == ctx->globals) { @@ -2730,25 +2638,14 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Variable '%s' is declared as both \"uniform\" and \"static\".", var->name); - if ((modifiers & HLSL_STORAGE_GROUPSHARED)) - { - /* d3dcompiler/fxc always validates global groupshared variables, - * regardless of whether the groupshared modifier is ignored. */ - validate_groupshared_var(ctx, var); - - if (ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE) - { - modifiers &= ~HLSL_STORAGE_GROUPSHARED; - hlsl_warning(ctx, &var->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_MODIFIER, - "Ignoring the 'groupshared' modifier in a non-compute shader."); - } - } + if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_GROUPSHARED)) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Variable '%s' is declared as both \"uniform\" and \"groupshared\".", var->name); - /* Mark it as uniform. We need to do this here since synthetic - * variables also get put in the global scope, but shouldn't be - * considered uniforms, and we have no way of telling otherwise. */ - if (!(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_GROUPSHARED))) - var->storage_modifiers |= HLSL_STORAGE_UNIFORM; + /* d3dcompiler/fxc always validates global groupshared variables, + * regardless of whether the groupshared modifier is ignored. */ + if (modifiers & HLSL_STORAGE_GROUPSHARED) + validate_groupshared_var(ctx, var); if (stream_output) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISPLACED_STREAM_OUTPUT, @@ -2862,7 +2759,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var static_initialization = var->storage_modifiers & HLSL_STORAGE_STATIC || (var->data_type->modifiers & HLSL_MODIFIER_CONST - && is_static_expression(v->initializer.instrs)); + && is_compile_time_const(v->initializer.instrs, false)); if (is_default_values_initializer) { @@ -2895,6 +2792,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var else if (static_initialization) { hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); + var->is_compile_time_const = true; } else { @@ -2919,6 +2817,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var zero = hlsl_block_add_uint_constant(ctx, &ctx->static_initializers, 0, &var->loc); cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc); hlsl_block_add_simple_store(ctx, &ctx->static_initializers, var, cast); + var->is_compile_time_const = true; } free_parse_variable_def(v); } @@ -3276,7 +3175,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, if (!hlsl_type_is_integer(type)) return arg; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->e.numeric.dimx, type->e.numeric.dimy); + type = hlsl_change_base_type(ctx, type, HLSL_TYPE_FLOAT); return add_implicit_conversion(ctx, params->instrs, arg, type, loc); } @@ -3359,7 +3258,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false; if (hlsl_type_is_integer(type)) - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->e.numeric.dimx, type->e.numeric.dimy); + type = hlsl_change_base_type(ctx, type, HLSL_TYPE_FLOAT); convert_args(ctx, params, type, loc); return true; @@ -3373,7 +3272,7 @@ static bool elementwise_intrinsic_int_convert_args(struct hlsl_ctx *ctx, if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_INT, type->e.numeric.dimx, type->e.numeric.dimy); + type = hlsl_change_base_type(ctx, type, HLSL_TYPE_INT); convert_args(ctx, params, type, loc); return true; @@ -3387,7 +3286,7 @@ static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy); + type = hlsl_change_base_type(ctx, type, HLSL_TYPE_UINT); convert_args(ctx, params, type, loc); return true; @@ -3454,14 +3353,6 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx, return write_acos_or_asin(ctx, params, loc, false); } -/* Find the type corresponding to the given source type, with the same - * dimensions but a different base type. */ -static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, - const struct hlsl_type *type, enum hlsl_base_type base_type) -{ - return hlsl_get_numeric_type(ctx, type->class, base_type, type->e.numeric.dimx, type->e.numeric.dimy); -} - static void add_combine_components(struct hlsl_ctx *ctx, const struct parse_initializer *params, struct hlsl_ir_node *arg, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) { @@ -3485,7 +3376,7 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, struct hlsl_ir_node *arg = params->args[0], *cast; struct hlsl_type *bool_type; - bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); + bool_type = hlsl_change_base_type(ctx, arg->data_type, HLSL_TYPE_BOOL); cast = add_cast(ctx, params->instrs, arg, bool_type, loc); add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_AND, loc); return true; @@ -3497,7 +3388,7 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer * struct hlsl_ir_node *arg = params->args[0], *cast; struct hlsl_type *bool_type; - bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); + bool_type = hlsl_change_base_type(ctx, arg->data_type, HLSL_TYPE_BOOL); cast = add_cast(ctx, params->instrs, arg, bool_type, loc); add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_OR, loc); return true; @@ -3618,7 +3509,7 @@ static bool intrinsic_asfloat(struct hlsl_ctx *ctx, string->buffer); hlsl_release_string_buffer(ctx, string); } - data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_FLOAT); + data_type = hlsl_change_base_type(ctx, data_type, HLSL_TYPE_FLOAT); operands[0] = params->args[0]; return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); @@ -3641,7 +3532,7 @@ static bool intrinsic_asint(struct hlsl_ctx *ctx, string->buffer); hlsl_release_string_buffer(ctx, string); } - data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_INT); + data_type = hlsl_change_base_type(ctx, data_type, HLSL_TYPE_INT); operands[0] = params->args[0]; return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); @@ -3677,7 +3568,7 @@ static bool intrinsic_asuint(struct hlsl_ctx *ctx, string->buffer); hlsl_release_string_buffer(ctx, string); } - data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT); + data_type = hlsl_change_base_type(ctx, data_type, HLSL_TYPE_UINT); operands[0] = params->args[0]; return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); @@ -3785,7 +3676,7 @@ static bool intrinsic_countbits(struct hlsl_ctx *ctx, if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc)) return false; - type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT); + type = hlsl_change_base_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT); operands[0] = params->args[0]; return add_expr(ctx, params->instrs, HLSL_OP1_COUNTBITS, operands, type, loc); @@ -4116,7 +4007,7 @@ static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc)) return false; - type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_FLOAT); + type = hlsl_change_base_type(ctx, params->args[0]->data_type, HLSL_TYPE_FLOAT); operands[0] = params->args[0]; return add_expr(ctx, params->instrs, HLSL_OP1_F16TOF32, operands, type, loc); @@ -4131,7 +4022,7 @@ static bool intrinsic_f32tof16(struct hlsl_ctx *ctx, if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; - type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT); + type = hlsl_change_base_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT); operands[0] = params->args[0]; return add_expr(ctx, params->instrs, HLSL_OP1_F32TOF16, operands, type, loc); @@ -4158,7 +4049,7 @@ static bool intrinsic_firstbithigh(struct hlsl_ctx *ctx, if (!elementwise_intrinsic_int_convert_args(ctx, params, loc)) return false; } - type = convert_numeric_type(ctx, type, HLSL_TYPE_UINT); + type = hlsl_change_base_type(ctx, type, HLSL_TYPE_UINT); operands[0] = params->args[0]; if (hlsl_version_lt(ctx, 5, 0)) @@ -4194,7 +4085,7 @@ static bool intrinsic_firstbitlow(struct hlsl_ctx *ctx, if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc)) return false; - type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT); + type = hlsl_change_base_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT); operands[0] = params->args[0]; return add_expr(ctx, params->instrs, HLSL_OP1_CTZ, operands, type, loc); @@ -4287,10 +4178,10 @@ static bool intrinsic_frexp(struct hlsl_ctx *ctx, hlsl_fixme(ctx, loc, "frexp() on doubles."); return false; } - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->e.numeric.dimx, type->e.numeric.dimy); - uint_dim_type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy); - int_dim_type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_INT, type->e.numeric.dimx, type->e.numeric.dimy); - bool_dim_type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->e.numeric.dimx, type->e.numeric.dimy); + type = hlsl_change_base_type(ctx, type, HLSL_TYPE_FLOAT); + uint_dim_type = hlsl_change_base_type(ctx, type, HLSL_TYPE_UINT); + int_dim_type = hlsl_change_base_type(ctx, type, HLSL_TYPE_INT); + bool_dim_type = hlsl_change_base_type(ctx, type, HLSL_TYPE_BOOL); if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, bool_dim_type->name, uint_dim_type->name, int_dim_type->name, type->name, type->name))) @@ -4333,12 +4224,9 @@ static bool intrinsic_fwidth(struct hlsl_ctx *ctx, static bool intrinsic_isinf(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_type *type = params->args[0]->data_type, *bool_type; + struct hlsl_type *bool_type = hlsl_change_base_type(ctx, params->args[0]->data_type, HLSL_TYPE_BOOL); struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; - bool_type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, - type->e.numeric.dimx, type->e.numeric.dimy); - args[0] = params->args[0]; return !!add_expr(ctx, params->instrs, HLSL_OP1_ISINF, args, bool_type, loc); } @@ -4816,8 +4704,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, struct hlsl_ir_node *lt, *neg, *op1, *op2, *zero, *arg = params->args[0]; static const struct hlsl_constant_value zero_value; - struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, - arg->data_type->e.numeric.dimx, arg->data_type->e.numeric.dimy); + struct hlsl_type *int_type = hlsl_change_base_type(ctx, arg->data_type, HLSL_TYPE_INT); zero = hlsl_block_add_constant(ctx, params->instrs, hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type), &zero_value, loc); @@ -4992,10 +4879,11 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, } static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, - const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) + const struct vkd3d_shader_location *loc, const char *name, + enum hlsl_sampler_dim dim, enum hlsl_resource_load_type type) { + struct hlsl_resource_load_params load_params = {.type = type}; unsigned int sampler_dim = hlsl_sampler_dim_count(dim); - struct hlsl_resource_load_params load_params = { 0 }; const struct hlsl_type *sampler_type; struct hlsl_ir_node *coords; @@ -5019,17 +4907,10 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * hlsl_release_string_buffer(ctx, string); } - if (!strcmp(name, "tex2Dbias") - || !strcmp(name, "tex2Dlod") - || !strcmp(name, "texCUBEbias")) + if (type == HLSL_RESOURCE_SAMPLE_LOD || type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) { struct hlsl_ir_node *lod, *c; - if (!strcmp(name, "tex2Dlod")) - load_params.type = HLSL_RESOURCE_SAMPLE_LOD; - else - load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; - c = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, params->args[1], loc); coords = add_implicit_conversion(ctx, params->instrs, c, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); @@ -5038,9 +4919,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * load_params.lod = add_implicit_conversion(ctx, params->instrs, lod, hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc); } - else if (!strcmp(name, "tex2Dproj") - || !strcmp(name, "tex3Dproj") - || !strcmp(name, "texCUBEproj")) + else if (type == HLSL_RESOURCE_SAMPLE_PROJ) { coords = add_implicit_conversion(ctx, params->instrs, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), loc); @@ -5057,12 +4936,8 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * load_params.type = HLSL_RESOURCE_SAMPLE; } - else - { - load_params.type = HLSL_RESOURCE_SAMPLE_PROJ; - } } - else if (params->args_count == 4) /* Gradient sampling. */ + else if (type == HLSL_RESOURCE_SAMPLE_GRAD) { coords = add_implicit_conversion(ctx, params->instrs, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); @@ -5070,11 +4945,9 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); load_params.ddy = add_implicit_conversion(ctx, params->instrs, params->args[3], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); - load_params.type = HLSL_RESOURCE_SAMPLE_GRAD; } else { - load_params.type = HLSL_RESOURCE_SAMPLE; coords = add_implicit_conversion(ctx, params->instrs, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); } @@ -5110,85 +4983,107 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * static bool intrinsic_tex1D(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - return intrinsic_tex(ctx, params, loc, "tex1D", HLSL_SAMPLER_DIM_1D); + return intrinsic_tex(ctx, params, loc, "tex1D", HLSL_SAMPLER_DIM_1D, + params->args_count == 4 ? HLSL_RESOURCE_SAMPLE_GRAD : HLSL_RESOURCE_SAMPLE); } static bool intrinsic_tex1Dgrad(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - return intrinsic_tex(ctx, params, loc, "tex1Dgrad", HLSL_SAMPLER_DIM_1D); + return intrinsic_tex(ctx, params, loc, "tex1Dgrad", HLSL_SAMPLER_DIM_1D, HLSL_RESOURCE_SAMPLE_GRAD); } static bool intrinsic_tex2D(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - return intrinsic_tex(ctx, params, loc, "tex2D", HLSL_SAMPLER_DIM_2D); + return intrinsic_tex(ctx, params, loc, "tex2D", HLSL_SAMPLER_DIM_2D, + params->args_count == 4 ? HLSL_RESOURCE_SAMPLE_GRAD : HLSL_RESOURCE_SAMPLE); } static bool intrinsic_tex2Dbias(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - return intrinsic_tex(ctx, params, loc, "tex2Dbias", HLSL_SAMPLER_DIM_2D); + return intrinsic_tex(ctx, params, loc, "tex2Dbias", HLSL_SAMPLER_DIM_2D, HLSL_RESOURCE_SAMPLE_LOD_BIAS); } static bool intrinsic_tex2Dgrad(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - return intrinsic_tex(ctx, params, loc, "tex2Dgrad", HLSL_SAMPLER_DIM_2D); + return intrinsic_tex(ctx, params, loc, "tex2Dgrad", HLSL_SAMPLER_DIM_2D, HLSL_RESOURCE_SAMPLE_GRAD); } static bool intrinsic_tex2Dlod(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - return intrinsic_tex(ctx, params, loc, "tex2Dlod", HLSL_SAMPLER_DIM_2D); + return intrinsic_tex(ctx, params, loc, "tex2Dlod", HLSL_SAMPLER_DIM_2D, HLSL_RESOURCE_SAMPLE_LOD); } static bool intrinsic_tex2Dproj(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - return intrinsic_tex(ctx, params, loc, "tex2Dproj", HLSL_SAMPLER_DIM_2D); + return intrinsic_tex(ctx, params, loc, "tex2Dproj", HLSL_SAMPLER_DIM_2D, HLSL_RESOURCE_SAMPLE_PROJ); } static bool intrinsic_tex3D(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D); + return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D, + params->args_count == 4 ? HLSL_RESOURCE_SAMPLE_GRAD : HLSL_RESOURCE_SAMPLE); +} + +static bool intrinsic_tex3Dbias(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex3Dbias", HLSL_SAMPLER_DIM_3D, HLSL_RESOURCE_SAMPLE_LOD_BIAS); } static bool intrinsic_tex3Dgrad(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - return intrinsic_tex(ctx, params, loc, "tex3Dgrad", HLSL_SAMPLER_DIM_3D); + return intrinsic_tex(ctx, params, loc, "tex3Dgrad", HLSL_SAMPLER_DIM_3D, HLSL_RESOURCE_SAMPLE_GRAD); +} + +static bool intrinsic_tex3Dlod(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex3Dlod", HLSL_SAMPLER_DIM_3D, HLSL_RESOURCE_SAMPLE_LOD); } static bool intrinsic_tex3Dproj(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - return intrinsic_tex(ctx, params, loc, "tex3Dproj", HLSL_SAMPLER_DIM_3D); + return intrinsic_tex(ctx, params, loc, "tex3Dproj", HLSL_SAMPLER_DIM_3D, HLSL_RESOURCE_SAMPLE_PROJ); } static bool intrinsic_texCUBE(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - return intrinsic_tex(ctx, params, loc, "texCUBE", HLSL_SAMPLER_DIM_CUBE); + return intrinsic_tex(ctx, params, loc, "texCUBE", HLSL_SAMPLER_DIM_CUBE, + params->args_count == 4 ? HLSL_RESOURCE_SAMPLE_GRAD : HLSL_RESOURCE_SAMPLE); } static bool intrinsic_texCUBEbias(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - return intrinsic_tex(ctx, params, loc, "texCUBEbias", HLSL_SAMPLER_DIM_CUBE); + return intrinsic_tex(ctx, params, loc, "texCUBEbias", HLSL_SAMPLER_DIM_CUBE, HLSL_RESOURCE_SAMPLE_LOD_BIAS); } static bool intrinsic_texCUBEgrad(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - return intrinsic_tex(ctx, params, loc, "texCUBEgrad", HLSL_SAMPLER_DIM_CUBE); + return intrinsic_tex(ctx, params, loc, "texCUBEgrad", HLSL_SAMPLER_DIM_CUBE, HLSL_RESOURCE_SAMPLE_GRAD); +} + +static bool intrinsic_texCUBElod(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "texCUBElod", HLSL_SAMPLER_DIM_CUBE, HLSL_RESOURCE_SAMPLE_LOD); } static bool intrinsic_texCUBEproj(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - return intrinsic_tex(ctx, params, loc, "texCUBEproj", HLSL_SAMPLER_DIM_CUBE); + return intrinsic_tex(ctx, params, loc, "texCUBEproj", HLSL_SAMPLER_DIM_CUBE, HLSL_RESOURCE_SAMPLE_PROJ); } static bool intrinsic_transpose(struct hlsl_ctx *ctx, @@ -5301,10 +5196,12 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, const struct parse_initializer *params, const struct vkd3d_shader_location *loc, const char *name) { - struct hlsl_ir_node *interlocked, *lhs, *coords, *val, *cmp_val = NULL, *orig_val = NULL; - struct hlsl_type *lhs_type, *val_type; + struct hlsl_ir_node *interlocked, *lhs, *val, *cmp_val = NULL, *orig_val = NULL; + struct hlsl_type *lhs_type, *val_type, *ret_type = NULL; struct vkd3d_string_buffer *string; + unsigned int writemask, component; struct hlsl_deref dst_deref; + bool matrix_writemask; if (hlsl_version_lt(ctx, 5, 0)) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, @@ -5336,6 +5233,9 @@ static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op orig_val = params->args[2]; } + if (orig_val) + ret_type = lhs_type; + if (lhs_type->class != HLSL_CLASS_SCALAR || (lhs_type->e.numeric.type != HLSL_TYPE_UINT && lhs_type->e.numeric.type != HLSL_TYPE_INT)) { @@ -5358,8 +5258,10 @@ static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op /* Floating values are always cast to signed integers. */ if (val_base_type == HLSL_TYPE_FLOAT || val_base_type == HLSL_TYPE_HALF || val_base_type == HLSL_TYPE_DOUBLE) val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_INT); + else if (val_base_type != lhs_type->e.numeric.type) + val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT); else - val_type = hlsl_get_scalar_type(ctx, lhs_type->e.numeric.type); + val_type = hlsl_get_scalar_type(ctx, val_base_type); } else { @@ -5371,47 +5273,50 @@ static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op if (!(val = add_implicit_conversion(ctx, params->instrs, val, val_type, loc))) return false; - /* TODO: groupshared variables */ - if (lhs->type == HLSL_IR_INDEX && hlsl_index_chain_find_resource_access(hlsl_ir_index(lhs))) - { - if (!hlsl_index_is_resource_access(hlsl_ir_index(lhs))) - { - hlsl_fixme(ctx, &lhs->loc, "Non-direct structured resource interlocked targets."); - return false; - } - - if (!hlsl_init_deref_from_index_chain(ctx, &dst_deref, hlsl_ir_index(lhs)->val.node)) - return false; - coords = hlsl_ir_index(lhs)->idx.node; + if (!(lhs = resolve_assignment_lhs(ctx, params->instrs, false, lhs, + &lhs_type, &val, &writemask, &matrix_writemask))) + return false; - VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); - VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); + VKD3D_ASSERT(writemask); + /* The writemask should be single component. */ + VKD3D_ASSERT(!(writemask & (writemask - 1))); - if (hlsl_deref_get_type(ctx, &dst_deref)->class != HLSL_CLASS_UAV) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Interlocked targets must be UAV or groupshared elements."); - return false; - } - } - else if (lhs->type == HLSL_IR_INDEX && hlsl_index_chain_has_tgsm_access(hlsl_ir_index(lhs))) + component = vkd3d_log2i(writemask); + if (matrix_writemask) { - hlsl_fixme(ctx, loc, "Interlocked operations on indexed groupshared elements."); - return false; + unsigned int i = component / 4, j = component % 4; + + component = i * lhs->data_type->e.numeric.dimx + j; } - else if (lhs->type == HLSL_IR_LOAD && (hlsl_ir_load(lhs)->src.var->storage_modifiers & HLSL_STORAGE_GROUPSHARED)) + + if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) { - hlsl_init_simple_deref_from_var(&dst_deref, hlsl_ir_load(lhs)->src.var); - coords = hlsl_block_add_uint_constant(ctx, params->instrs, 0, loc); + struct hlsl_ir_node *c, *cell; + + VKD3D_ASSERT(!matrix_writemask); + + c = hlsl_block_add_uint_constant(ctx, params->instrs, component, &lhs->loc); + cell = hlsl_block_add_index(ctx, params->instrs, lhs, c, &lhs->loc); + + if (!hlsl_init_deref_from_index_chain(ctx, &dst_deref, cell)) + return false; } else { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Interlocked targets must be UAV or groupshared elements."); - return false; + struct hlsl_block component_path_block; + struct hlsl_deref dst_deref_prefix; + + if (!hlsl_init_deref_from_index_chain(ctx, &dst_deref_prefix, lhs)) + return false; + if (!hlsl_init_deref_from_component_index(ctx, &component_path_block, &dst_deref, + &dst_deref_prefix, component, &lhs->loc)) + return false; + + hlsl_block_add_block(params->instrs, &component_path_block); + hlsl_cleanup_deref(&dst_deref_prefix); } - interlocked = hlsl_new_interlocked(ctx, op, orig_val ? lhs_type : NULL, &dst_deref, coords, cmp_val, val, loc); + interlocked = hlsl_new_interlocked(ctx, op, ret_type, &dst_deref, NULL, cmp_val, val, loc); hlsl_cleanup_deref(&dst_deref); if (!interlocked) return false; @@ -5485,19 +5390,9 @@ static bool intrinsic_InterlockedXor(struct hlsl_ctx *ctx, return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_XOR, params, loc, "InterlockedXor"); } -static void validate_group_barrier_profile(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) -{ - if (ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, - "Group barriers can only be used in compute shaders."); - } -} - static bool intrinsic_AllMemoryBarrier(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - validate_group_barrier_profile(ctx, loc); return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV | VKD3DSSF_GROUP_SHARED_MEMORY, loc); } @@ -5505,7 +5400,6 @@ static bool intrinsic_AllMemoryBarrier(struct hlsl_ctx *ctx, static bool intrinsic_AllMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - validate_group_barrier_profile(ctx, loc); return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV | VKD3DSSF_GROUP_SHARED_MEMORY | VKD3DSSF_THREAD_GROUP, loc); } @@ -5627,19 +5521,12 @@ static bool intrinsic_ConstructGSWithSO(struct hlsl_ctx *ctx, static bool intrinsic_DeviceMemoryBarrier(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - if ((ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE && ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL) - || hlsl_version_lt(ctx, 4, 0)) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, - "DeviceMemoryBarrier() can only be used in compute and pixel shaders 4.0 or higher."); - } return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV, loc); } static bool intrinsic_DeviceMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - validate_group_barrier_profile(ctx, loc); return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV | VKD3DSSF_THREAD_GROUP, loc); } @@ -5647,7 +5534,6 @@ static bool intrinsic_DeviceMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx, static bool intrinsic_GroupMemoryBarrier(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - validate_group_barrier_profile(ctx, loc); return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GROUP_SHARED_MEMORY, loc); } @@ -5655,7 +5541,6 @@ static bool intrinsic_GroupMemoryBarrier(struct hlsl_ctx *ctx, static bool intrinsic_GroupMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - validate_group_barrier_profile(ctx, loc); return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GROUP_SHARED_MEMORY | VKD3DSSF_THREAD_GROUP, loc); } @@ -5769,11 +5654,14 @@ intrinsic_functions[] = {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, {"tex2Dproj", 2, false, intrinsic_tex2Dproj}, {"tex3D", -1, false, intrinsic_tex3D}, + {"tex3Dbias", 2, false, intrinsic_tex3Dbias}, {"tex3Dgrad", 4, false, intrinsic_tex3Dgrad}, + {"tex3Dlod", 2, false, intrinsic_tex3Dlod}, {"tex3Dproj", 2, false, intrinsic_tex3Dproj}, {"texCUBE", -1, false, intrinsic_texCUBE}, {"texCUBEbias", 2, false, intrinsic_texCUBEbias}, {"texCUBEgrad", 4, false, intrinsic_texCUBEgrad}, + {"texCUBElod", 2, false, intrinsic_texCUBElod}, {"texCUBEproj", 2, false, intrinsic_texCUBEproj}, {"transpose", 1, true, intrinsic_transpose}, {"trunc", 1, true, intrinsic_trunc}, @@ -5978,16 +5866,14 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, if (cond_type->e.numeric.dimx == 1 && cond_type->e.numeric.dimy == 1) { - cond_type = hlsl_get_numeric_type(ctx, common_type->class, - HLSL_TYPE_BOOL, common_type->e.numeric.dimx, common_type->e.numeric.dimy); + cond_type = hlsl_change_base_type(ctx, common_type, HLSL_TYPE_BOOL); cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc); } else { if (common_type->e.numeric.dimx == 1 && common_type->e.numeric.dimy == 1) { - common_type = hlsl_get_numeric_type(ctx, cond_type->class, - common_type->e.numeric.type, cond_type->e.numeric.dimx, cond_type->e.numeric.dimy); + common_type = hlsl_change_base_type(ctx, cond_type, common_type->e.numeric.type); } else if (cond_type->e.numeric.dimx != common_type->e.numeric.dimx || cond_type->e.numeric.dimy != common_type->e.numeric.dimy) @@ -6007,8 +5893,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, hlsl_release_string_buffer(ctx, value_string); } - cond_type = hlsl_get_numeric_type(ctx, common_type->class, HLSL_TYPE_BOOL, - common_type->e.numeric.dimx, common_type->e.numeric.dimy); + cond_type = hlsl_change_base_type(ctx, common_type, HLSL_TYPE_BOOL); cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc); } @@ -6031,8 +5916,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, hlsl_release_string_buffer(ctx, second_string); } - cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, - cond_type->e.numeric.dimx, cond_type->e.numeric.dimy); + cond_type = hlsl_change_base_type(ctx, cond_type, HLSL_TYPE_BOOL); cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc); common_type = first->data_type; } @@ -9482,7 +9366,7 @@ selection_statement: check_condition_type(ctx, condition); condition = add_cast(ctx, $4, condition, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &@4); - hlsl_block_add_if(ctx, $4, condition, $6.then_block, $6.else_block, flatten_type, &@2); + hlsl_block_add_if(ctx, $4, condition, $6.then_block, $6.else_block, flatten_type, false, &@2); destroy_block($6.then_block); destroy_block($6.else_block); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 6f61eb92a67..d10a4ff1ceb 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -24,6 +24,8 @@ #include <stdio.h> #include <math.h> +VKD3D_DECLARE_DEBUG_CHANNEL(copyprop) + /* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ #define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 @@ -580,7 +582,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *semantic_vars, for (i = 0; i < hlsl_type_major_size(type); ++i) { - struct hlsl_ir_node *cast; + struct hlsl_ir_node *cast, *instr; struct hlsl_ir_var *input; struct hlsl_ir_load *load; @@ -606,6 +608,33 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *semantic_vars, if (!(load = hlsl_new_load_index(ctx, &prim_deref, idx, loc))) return; hlsl_block_add_instr(block, &load->node); + instr = &load->node; + } + else if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(ctx, 4, 0) + && (ctx->compatibility_flags & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES) + && !ascii_strcasecmp(semantic->name, "VFACE")) + { + /* VFACE on sm4 has sm3 semantics, returning 1.0 or -1.0. + * Also, the variable can be declared as float (which is not true + * of SV_IsFrontFace), but is converted to uint in the signature. + * Note that VPOS is not similarly affected, + * despite also having different semantics between sm3 and sm4. */ + + struct hlsl_ir_node *one, *minusone; + + if (!(input = add_semantic_var(ctx, semantic_vars, var, + hlsl_change_base_type(ctx, vector_type_src, HLSL_TYPE_BOOL), + modifiers, semantic, 0, false, force_align, true, loc))) + return; + ++semantic->index; + + if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) + return; + hlsl_block_add_instr(block, &load->node); + + one = hlsl_block_add_float_constant(ctx, block, 1.0f, &var->loc); + minusone = hlsl_block_add_float_constant(ctx, block, -1.0f, &var->loc); + instr = hlsl_add_conditional(ctx, block, &load->node, one, minusone); } else { @@ -617,9 +646,10 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *semantic_vars, if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) return; hlsl_block_add_instr(block, &load->node); + instr = &load->node; } - cast = hlsl_block_add_cast(ctx, block, &load->node, vector_type_dst, &var->loc); + cast = hlsl_block_add_cast(ctx, block, instr, vector_type_dst, &var->loc); if (type->class == HLSL_CLASS_MATRIX) { @@ -1107,7 +1137,7 @@ static void insert_early_return_break(struct hlsl_ctx *ctx, hlsl_block_add_jump(ctx, &then_block, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc); - if (!(iff = hlsl_new_if(ctx, &load->node, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, &cf_instr->loc))) + if (!(iff = hlsl_new_if(ctx, &load->node, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, false, &cf_instr->loc))) return; list_add_after(&load->node.entry, &iff->entry); } @@ -1292,7 +1322,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun load = hlsl_block_add_simple_load(ctx, block, func->early_return_var, &cf_instr->loc); not = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_LOGIC_NOT, load, &cf_instr->loc); - hlsl_block_add_if(ctx, block, not, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, &cf_instr->loc); + hlsl_block_add_if(ctx, block, not, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, false, &cf_instr->loc); } return has_early_return; @@ -1814,34 +1844,280 @@ static struct hlsl_ir_node *lower_tgsm_loads(struct hlsl_ctx *ctx, struct hlsl_i return hlsl_block_add_resource_load(ctx, block, ¶ms, loc); } -/* Lowers stores to TGSMs to resource stores. */ -static struct hlsl_ir_node *lower_tgsm_stores(struct hlsl_ctx *ctx, +static enum vkd3d_result resource_access_from_deref(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_deref *deref, struct hlsl_deref *resource_deref, + struct hlsl_type **resource_type, struct hlsl_type **val_type, struct hlsl_ir_node **coords, + struct hlsl_ir_node **field_offset, const struct vkd3d_shader_location *loc) +{ + bool tgsm = deref->var->is_tgsm; + struct hlsl_type **deref_types; + unsigned int resource_idx; + + if (!(deref_types = vkd3d_malloc(sizeof(*deref_types) * (deref->path_len + 1)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + deref_types[0] = deref->var->data_type; + for (unsigned int i = 1; i <= deref->path_len; ++i) + deref_types[i] = hlsl_get_element_type_from_path_index(ctx, deref_types[i - 1], deref->path[i - 1].node); + + *resource_type = NULL; + *val_type = deref_types[deref->path_len]; + + if (tgsm) + { + *resource_type = deref_types[0]; + resource_idx = 0; + } + else + { + for (unsigned int i = 0; i <= deref->path_len; ++i) + { + if (deref_types[i]->class == HLSL_CLASS_UAV || deref_types[i]->class == HLSL_CLASS_TEXTURE) + { + *resource_type = deref_types[i]; + resource_idx = i; + break; + } + } + } + + if (!*resource_type) + { + vkd3d_free(deref_types); + return VKD3D_ERROR_NOT_FOUND; + } + + if (!hlsl_init_deref(ctx, resource_deref, deref->var, resource_idx)) + { + vkd3d_free(deref_types); + return VKD3D_ERROR; + } + + for (unsigned int i = 0; i < resource_idx; ++i) + hlsl_src_from_node(&resource_deref->path[i], deref->path[i].node); + + if (tgsm) + { + if ((*resource_type)->class == HLSL_CLASS_ARRAY) + { + VKD3D_ASSERT(deref->path_len); + + *coords = deref->path[0].node; + VKD3D_ASSERT(hlsl_is_vec1((*coords)->data_type)); + VKD3D_ASSERT((*coords)->data_type->e.numeric.type == HLSL_TYPE_UINT); + } + else + { + *coords = hlsl_block_add_uint_constant(ctx, block, 0, loc); + } + } + else + { + *coords = deref->path[resource_idx].node; + VKD3D_ASSERT((*coords)->data_type->class == HLSL_CLASS_VECTOR); + VKD3D_ASSERT((*coords)->data_type->e.numeric.type == HLSL_TYPE_UINT); + VKD3D_ASSERT((*coords)->data_type->e.numeric.dimx == hlsl_sampler_dim_count((*resource_type)->sampler_dim)); + } + + if (tgsm || (*resource_type)->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + *field_offset = hlsl_block_add_uint_constant(ctx, block, 0, loc); + for (int i = deref->path_len - 1; i >= (int)resource_idx; --i) + { + /* The coords field is used to index resources and arrayed TGSM objects. */ + if (i == resource_idx && (!tgsm || (*resource_type)->class == HLSL_CLASS_ARRAY)) + break; + + *field_offset = hlsl_block_add_packed_index_offset_append(ctx, block, *field_offset, + deref->path[i].node, deref_types[i], loc); + } + } + else + { + *field_offset = NULL; + } + + vkd3d_free(deref_types); + return VKD3D_OK; +} + +/* Lowers stores to resources and TGSM objects to resource stores. */ +static struct hlsl_ir_node *lower_resource_stores(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { + struct hlsl_ir_node *coords, *field_offset; + struct hlsl_type *resource_type, *val_type; + struct hlsl_deref resource_deref = {0}; + struct hlsl_ir_node *res = NULL; struct hlsl_ir_store *store; - struct hlsl_ir_node *coords; - struct hlsl_deref res_deref; struct hlsl_deref *deref; + bool tgsm; if (instr->type != HLSL_IR_STORE) return NULL; store = hlsl_ir_store(instr); deref = &store->lhs; + tgsm = deref->var->is_tgsm; - if (!deref->var->is_tgsm) + if (!tgsm && !(deref->var->is_uniform && deref->path_len)) return NULL; - if (deref->path_len) + if (resource_access_from_deref(ctx, block, deref, &resource_deref, &resource_type, + &val_type, &coords, &field_offset, &instr->loc) != VKD3D_OK) + return NULL; + + if (resource_type->class == HLSL_CLASS_TEXTURE) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Read-only resources cannot be stored to."); + return false; + } + + if (tgsm || resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + unsigned int dimx = val_type->e.numeric.dimx; + + VKD3D_ASSERT(val_type->class <= HLSL_CLASS_VECTOR); + + for (unsigned int i = 0, k = 0; i < dimx; ++i) + { + struct hlsl_ir_node *c, *load, *offset; + + if (!(store->writemask & (1u << i))) + continue; + + if (val_type->class == HLSL_CLASS_SCALAR) + { + VKD3D_ASSERT(i == 0); + VKD3D_ASSERT(dimx == 1); + + offset = field_offset; + } + else + { + c = hlsl_block_add_uint_constant(ctx, block, i, &instr->loc); + offset = hlsl_block_add_packed_index_offset_append(ctx, block, field_offset, c, val_type, &instr->loc); + } + + load = hlsl_add_load_component(ctx, block, store->rhs.node, k++, &instr->loc); + res = hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STORE, &resource_deref, + offset, coords, load, VKD3DSP_WRITEMASK_0, &instr->loc); + } + } + else + { + unsigned int expected_width = resource_type->e.resource.format->e.numeric.dimx + * resource_type->e.resource.format->e.numeric.dimy; + unsigned int width = vkd3d_log2i(store->writemask) + 1; + + if (width != expected_width) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, + "Resource store expressions must write to all components."); + + res = hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STORE, &resource_deref, + NULL, coords, store->rhs.node, store->writemask, &instr->loc); + } + + hlsl_cleanup_deref(&resource_deref); + return res; +} + +/* Generates the coords field of an interlocked operation from its dst deref, + * and points the dst deref to the UAV/TGSM object itself. */ +static struct hlsl_ir_node *generate_interlocked_coords(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_type *resource_type, *val_type; + struct hlsl_ir_node *coords, *field_offset; + struct hlsl_ir_interlocked *interlocked; + struct hlsl_deref resource_deref; + struct hlsl_ir_node *res = NULL; + struct hlsl_deref *deref; + enum vkd3d_result ret; + bool tgsm; + + if (instr->type != HLSL_IR_INTERLOCKED) + return NULL; + interlocked = hlsl_ir_interlocked(instr); + deref = &interlocked->dst; + tgsm = deref->var->is_tgsm; + + ret = resource_access_from_deref(ctx, block, deref, &resource_deref, &resource_type, + &val_type, &coords, &field_offset, &instr->loc); + if (ret != VKD3D_OK) { - hlsl_fixme(ctx, &instr->loc, "Store to indexed TGSM."); + if (ret == VKD3D_ERROR_NOT_FOUND) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Interlocked targets must be UAV or groupshared elements."); return NULL; } - hlsl_init_simple_deref_from_var(&res_deref, deref->var); - coords = hlsl_block_add_uint_constant(ctx, block, 0, &instr->loc); + if (resource_type->class != HLSL_CLASS_UAV && !tgsm) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Interlocked targets must be UAV or groupshared elements."); + goto done; + } + + VKD3D_ASSERT(!interlocked->coords.node); + + if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER + || (tgsm && resource_type->class == HLSL_CLASS_ARRAY)) + { + struct hlsl_deref structured_coords_deref; + struct hlsl_ir_var *structured_coords; + + if (!(structured_coords = hlsl_new_synthetic_var(ctx, "interlocked-coords", + hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, 2), &instr->loc))) + goto done; + + hlsl_init_simple_deref_from_var(&structured_coords_deref, structured_coords); + + hlsl_block_add_store_component(ctx, block, &structured_coords_deref, 0, coords); + hlsl_block_add_store_component(ctx, block, &structured_coords_deref, 1, field_offset); + + hlsl_cleanup_deref(&structured_coords_deref); + + coords = hlsl_block_add_simple_load(ctx, block, structured_coords, &instr->loc); + } + else + { + if (tgsm) + coords = field_offset; + else + { + VKD3D_ASSERT(!field_offset); + + if (resource_type->e.resource.format->class != HLSL_CLASS_SCALAR) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Non-structured UAV interlocked targets must have scalar type."); + goto done; + } + } + } + + if ((res = hlsl_clone_instr(ctx, instr))) + { + struct hlsl_ir_interlocked *new_interlocked = hlsl_ir_interlocked(res); + + hlsl_cleanup_deref(&new_interlocked->dst); + + if (!hlsl_copy_deref(ctx, &new_interlocked->dst, &resource_deref)) + { + hlsl_free_instr(res); + res = NULL; + goto done; + } + + hlsl_src_from_node(&new_interlocked->coords, coords); + + hlsl_block_add_instr(block, res); + } - return hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STORE, &res_deref, - NULL, coords, store->rhs.node, store->writemask, &instr->loc); +done: + hlsl_cleanup_deref(&resource_deref); + return res; } /* Allocate a unique, ordered index to each instruction, which will be used for @@ -2109,7 +2385,7 @@ static void copy_propagation_invalidate_variable(struct hlsl_ctx *ctx, struct co { unsigned i; - TRACE("Invalidate variable %s[%u]%s.\n", var_def->var->name, comp, debug_hlsl_writemask(writemask)); + TRACE_(copyprop)("Invalidate variable %s[%u]%s.\n", var_def->var->name, comp, debug_hlsl_writemask(writemask)); for (i = 0; i < 4; ++i) { @@ -2206,7 +2482,7 @@ static void copy_propagation_set_value(struct hlsl_ctx *ctx, struct copy_propaga { struct copy_propagation_component_trace *trace = &var_def->traces[comp + i]; - TRACE("Variable %s[%u] is written by instruction %p%s.\n", + TRACE_(copyprop)("Variable %s[%u] is written by instruction %p%s.\n", var_def->var->name, comp + i, instr, debug_hlsl_writemask(1u << i)); copy_propagation_trace_record_value(ctx, trace, instr, j++, time); @@ -2243,14 +2519,14 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, } else if (new_instr != value->node) { - TRACE("No single source for propagating load from %s[%u-%u]%s\n", + TRACE_(copyprop)("No single source for propagating load from %s[%u-%u]%s\n", var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count)); return false; } hlsl_swizzle_set_component(&ret_swizzle, i, value->component); } - TRACE("Load from %s[%u-%u]%s propagated as instruction %p%s.\n", + TRACE_(copyprop)("Load from %s[%u-%u]%s propagated as instruction %p%s.\n", var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count)); @@ -2298,7 +2574,7 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, return false; list_add_before(&instr->entry, &cons->entry); - TRACE("Load from %s[%u-%u]%s turned into a constant %p.\n", + TRACE_(copyprop)("Load from %s[%u-%u]%s turned into a constant %p.\n", var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), cons); hlsl_replace_node(instr, cons); @@ -2482,7 +2758,7 @@ static bool copy_propagation_replace_with_deref(struct hlsl_ctx *ctx, if (hlsl_version_lt(ctx, 4, 0) && x->is_uniform && ctx->profile->type != VKD3D_SHADER_TYPE_VERTEX) { - TRACE("Skipping propagating non-constant deref to SM1 uniform %s.\n", var->name); + TRACE_(copyprop)("Skipping propagating non-constant deref to SM1 uniform %s.\n", var->name); goto done; } @@ -2582,7 +2858,7 @@ static bool copy_propagation_replace_with_deref(struct hlsl_ctx *ctx, if (new_instr->data_type->class == HLSL_CLASS_SCALAR || new_instr->data_type->class == HLSL_CLASS_VECTOR) new_instr = hlsl_block_add_swizzle(ctx, &block, ret_swizzle, instr_component_count, new_instr, &instr->loc); - if (TRACE_ON()) + if (TRACE_ON_(copyprop)) { struct vkd3d_string_buffer buffer; @@ -2608,7 +2884,7 @@ static bool copy_propagation_replace_with_deref(struct hlsl_ctx *ctx, vkd3d_string_buffer_printf(&buffer, "]%s (i = %p).\n", debug_hlsl_swizzle(ret_swizzle, instr_component_count), index); - vkd3d_string_buffer_trace(&buffer); + TRACE_TEXT_(copyprop, buffer.buffer, buffer.content_size); vkd3d_string_buffer_cleanup(&buffer); } @@ -2748,7 +3024,7 @@ static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx, */ if (!load->src.var->is_uniform) { - TRACE("Ignoring load from non-uniform object variable %s\n", load->src.var->name); + TRACE_(copyprop)("Ignoring load from non-uniform object variable %s\n", load->src.var->name); return false; } @@ -3815,41 +4091,34 @@ static void split_copy(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, static void split_resource_load(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, struct hlsl_ir_resource_load *load, const unsigned int idx, struct hlsl_type *type) { - struct hlsl_ir_node *c, *idx_offset, *split_load; + struct hlsl_ir_resource_load *res_load; + struct hlsl_ir_node *c, *idx_offset; struct hlsl_block block; hlsl_block_init(&block); c = hlsl_block_add_uint_constant(ctx, &block, idx, &store->node.loc); - /* Structured (i.e. arrayed) TGSM load. */ - if (load->resource.var->is_tgsm && !load->resource.path_len && load->node.data_type->class == HLSL_CLASS_ARRAY) - { - struct hlsl_resource_load_params params = {0}; + res_load = hlsl_ir_resource_load(hlsl_clone_instr(ctx, &load->node)); + res_load->node.data_type = type; - params.type = HLSL_RESOURCE_LOAD; - params.resource = &load->node; - params.coords = c; - params.format = load->node.data_type->e.array.type; - split_load = hlsl_block_add_resource_load(ctx, &block, ¶ms, &load->node.loc); + if (load->resource.var->is_tgsm && !load->coords.node && !load->resource.path_len + && load->node.data_type->class == HLSL_CLASS_ARRAY) + { + /* Structured (i.e. arrayed) TGSM load. */ + hlsl_src_from_node(&res_load->coords, c); } else { - struct hlsl_ir_resource_load *res_load; - idx_offset = hlsl_block_add_packed_index_offset_append(ctx, &block, load->byte_offset.node, c, load->node.data_type, &store->node.loc); - res_load = hlsl_ir_resource_load(hlsl_clone_instr(ctx, &load->node)); hlsl_src_remove(&res_load->byte_offset); hlsl_src_from_node(&res_load->byte_offset, idx_offset); - res_load->node.data_type = type; - hlsl_block_add_instr(&block, &res_load->node); - - split_load = &res_load->node; } - hlsl_block_add_store_index(ctx, &block, &store->lhs, c, split_load, 0, &store->node.loc); + hlsl_block_add_instr(&block, &res_load->node); + hlsl_block_add_store_index(ctx, &block, &store->lhs, c, &res_load->node, 0, &store->node.loc); list_move_before(&store->node.entry, &block.instrs); } @@ -3986,10 +4255,11 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr if (rhs->type == HLSL_IR_RESOURCE_LOAD) { /* As we forbid non-scalar or vector types in non-structured resource - * loads, this is specific to structured buffer loads. */ + * loads, this is specific to structured buffer loads and TGSM loads. */ struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(rhs); - VKD3D_ASSERT(hlsl_deref_get_type(ctx, &load->resource)->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER); + VKD3D_ASSERT(load->resource.var->is_tgsm + || hlsl_deref_get_type(ctx, &load->resource)->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER); for (i = 0; i < hlsl_type_major_size(type); ++i) { @@ -4598,9 +4868,10 @@ static bool normalize_switch_cases(struct hlsl_ctx *ctx, struct hlsl_ir_node *in return true; } -static struct hlsl_ir_node *lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, +static struct hlsl_ir_node *lower_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { + struct hlsl_ir_load *vector_load; struct hlsl_ir_node *idx; struct hlsl_deref *deref; struct hlsl_type *type; @@ -4619,20 +4890,32 @@ static struct hlsl_ir_node *lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx for (i = 0; i < deref->path_len - 1; ++i) type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node); + if (type->class != HLSL_CLASS_VECTOR) + return NULL; + idx = deref->path[deref->path_len - 1].node; - if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT) + if (!(vector_load = hlsl_new_load_parent(ctx, deref, &instr->loc))) + return NULL; + hlsl_block_add_instr(block, &vector_load->node); + + if (idx->type == HLSL_IR_CONSTANT) + { + uint32_t swizzle = 0; + + if (hlsl_ir_constant(idx)->value.u[0].u >= type->e.numeric.dimx) + return NULL; + + hlsl_swizzle_set_component(&swizzle, 0, hlsl_ir_constant(idx)->value.u[0].u); + return hlsl_block_add_swizzle(ctx, block, swizzle, 1, &vector_load->node, &instr->loc); + } + else { struct hlsl_ir_node *eq, *swizzle, *c, *operands[HLSL_MAX_OPERANDS] = {0}; unsigned int width = type->e.numeric.dimx; struct hlsl_constant_value value; - struct hlsl_ir_load *vector_load; enum hlsl_ir_expr_op op; - if (!(vector_load = hlsl_new_load_parent(ctx, deref, &instr->loc))) - return NULL; - hlsl_block_add_instr(block, &vector_load->node); - swizzle = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X), width, idx, &instr->loc); value.u[0].u = 0; @@ -4792,18 +5075,26 @@ static struct hlsl_ir_node *lower_nonconstant_array_loads(struct hlsl_ctx *ctx, return hlsl_block_add_simple_load(ctx, block, var, &instr->loc); } -static struct hlsl_type *clone_texture_array_as_combined_sampler_array(struct hlsl_ctx *ctx, struct hlsl_type *type) +static struct hlsl_type *clone_texture_array_as_combined_sampler_array(struct hlsl_ctx *ctx, + struct hlsl_type *type, const struct vkd3d_shader_location *loc) { struct hlsl_type *sampler_type; if (type->class == HLSL_CLASS_ARRAY) { - if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, type->e.array.type))) + if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, type->e.array.type, loc))) return NULL; return hlsl_new_array_type(ctx, sampler_type, type->e.array.elements_count, HLSL_ARRAY_GENERIC); } + if (type->sampler_dim > HLSL_SAMPLER_DIM_LAST_SAMPLER) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Cannot create a combined sampler from a %s.", debug_hlsl_type(ctx, type)); + return NULL; + } + return ctx->builtin_types.sampler[type->sampler_dim]; } @@ -4871,7 +5162,7 @@ static bool lower_separate_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in if (!(var = hlsl_get_var(ctx->globals, name->buffer))) { - if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, resource->data_type))) + if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, resource->data_type, &instr->loc))) { hlsl_release_string_buffer(ctx, name); return false; @@ -5325,44 +5616,6 @@ static struct hlsl_ir_node *lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_nod return hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, sum, neg); } -/* Lower CEIL to FRC */ -static struct hlsl_ir_node *lower_ceil(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -{ - struct hlsl_ir_node *arg, *neg, *frc; - struct hlsl_ir_expr *expr; - - if (instr->type != HLSL_IR_EXPR) - return NULL; - - expr = hlsl_ir_expr(instr); - arg = expr->operands[0].node; - if (expr->op != HLSL_OP1_CEIL) - return NULL; - - neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, arg, &instr->loc); - frc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, neg, &instr->loc); - return hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, frc, arg); -} - -/* Lower FLOOR to FRC */ -static struct hlsl_ir_node *lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -{ - struct hlsl_ir_node *arg, *neg, *frc; - struct hlsl_ir_expr *expr; - - if (instr->type != HLSL_IR_EXPR) - return NULL; - - expr = hlsl_ir_expr(instr); - arg = expr->operands[0].node; - if (expr->op != HLSL_OP1_FLOOR) - return NULL; - - frc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, arg, &instr->loc); - neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, frc, &instr->loc); - return hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, neg, arg); -} - /* Lower SIN/COS to SINCOS for SM1. */ static struct hlsl_ir_node *lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { @@ -5478,45 +5731,6 @@ static struct hlsl_ir_node *lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir return hlsl_block_add_expr(ctx, block, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc); } -/* Lower TERNARY to CMP for SM1. */ -static struct hlsl_ir_node *lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -{ - struct hlsl_ir_node *cond, *first, *second, *float_cond, *neg; - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; - struct hlsl_ir_expr *expr; - struct hlsl_type *type; - - if (instr->type != HLSL_IR_EXPR) - return NULL; - - expr = hlsl_ir_expr(instr); - if (expr->op != HLSL_OP3_TERNARY) - return NULL; - - cond = expr->operands[0].node; - first = expr->operands[1].node; - second = expr->operands[2].node; - - if (cond->data_type->class > HLSL_CLASS_VECTOR || instr->data_type->class > HLSL_CLASS_VECTOR) - { - hlsl_fixme(ctx, &instr->loc, "Lower ternary of type other than scalar or vector."); - return NULL; - } - - VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); - - type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, - instr->data_type->e.numeric.dimx, instr->data_type->e.numeric.dimy); - float_cond = hlsl_block_add_cast(ctx, block, cond, type, &instr->loc); - neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, float_cond, &instr->loc); - - memset(operands, 0, sizeof(operands)); - operands[0] = neg; - operands[1] = second; - operands[2] = first; - return hlsl_block_add_expr(ctx, block, HLSL_OP3_CMP, operands, first->data_type, &instr->loc); -} - static struct hlsl_ir_node *lower_resource_load_bias(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { @@ -5769,8 +5983,7 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc if (cond_type->e.numeric.type != HLSL_TYPE_BOOL) { - cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, - cond_type->e.numeric.dimx, cond_type->e.numeric.dimy); + cond_type = hlsl_change_base_type(ctx, cond_type, HLSL_TYPE_BOOL); condition = hlsl_block_add_cast(ctx, instrs, condition, cond_type, &condition->loc); } @@ -5800,7 +6013,7 @@ static struct hlsl_ir_node *lower_int_division_sm4(struct hlsl_ctx *ctx, return NULL; if (type->e.numeric.type != HLSL_TYPE_INT) return NULL; - utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy); + utype = hlsl_change_base_type(ctx, type, HLSL_TYPE_UINT); xor = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_BIT_XOR, arg1, arg2); @@ -5838,7 +6051,7 @@ static struct hlsl_ir_node *lower_int_modulus_sm4(struct hlsl_ctx *ctx, return NULL; if (type->e.numeric.type != HLSL_TYPE_INT) return NULL; - utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy); + utype = hlsl_change_base_type(ctx, type, HLSL_TYPE_UINT); for (i = 0; i < type->e.numeric.dimx; ++i) high_bit_value.u[i].u = 0x80000000; @@ -5938,7 +6151,7 @@ static struct hlsl_ir_node *lower_float_modulus(struct hlsl_ctx *ctx, return NULL; if (type->e.numeric.type != HLSL_TYPE_FLOAT) return NULL; - btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->e.numeric.dimx, type->e.numeric.dimy); + btype = hlsl_change_base_type(ctx, type, HLSL_TYPE_BOOL); mul1 = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, arg2, arg1); neg1 = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, mul1, &instr->loc); @@ -5981,8 +6194,7 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, operands[0] = jump->condition.node; operands[1] = zero; - cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, - arg_type->e.numeric.dimx, arg_type->e.numeric.dimy); + cmp_type = hlsl_change_base_type(ctx, arg_type, HLSL_TYPE_BOOL); cmp = hlsl_block_add_expr(ctx, &block, HLSL_OP2_LESS, operands, cmp_type, &instr->loc); bool_false = hlsl_block_add_constant(ctx, &block, @@ -6368,8 +6580,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = interlocked->dst.var; var->last_read = max(var->last_read, last_read); deref_mark_last_read(&interlocked->dst, last_read); - interlocked->coords.node->last_read = last_read; interlocked->value.node->last_read = last_read; + if (interlocked->coords.node) + interlocked->coords.node->last_read = last_read; if (interlocked->cmp_value.node) interlocked->cmp_value.node->last_read = last_read; break; @@ -6453,12 +6666,18 @@ struct register_allocator /* If an allocation is VIP, no new allocations can be made in the * register unless they are VIP as well. */ bool vip; + /* Clip/Cull allocations can't share registers. */ + bool clip_cull; } *allocations; size_t count, capacity; /* Total number of registers allocated so far. */ uint32_t reg_count; + /* Total number of registers used for Clip or Cull allocations. + * Used to limit Clip/Cull allocations to two registers. */ + uint32_t clip_cull_count; + /* Special flag so allocations that can share registers prioritize those * that will result in smaller writemasks. * For instance, a single-register allocation would prefer to share a register @@ -6468,7 +6687,7 @@ struct register_allocator }; static unsigned int get_available_writemask(const struct register_allocator *allocator, - uint32_t reg_idx, int mode, bool vip) + uint32_t reg_idx, int mode, bool vip, bool clip_cull) { unsigned int writemask = VKD3DSP_WRITEMASK_ALL; size_t i; @@ -6484,6 +6703,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all writemask = 0; if (allocation->vip && !vip) writemask = 0; + if (allocation->clip_cull != clip_cull) + writemask = 0; } if (!writemask) @@ -6493,8 +6714,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all return writemask; } -static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx, - unsigned int writemask, int mode, bool vip) +static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, + uint32_t reg_idx, unsigned int writemask, int mode, bool vip, bool clip_cull) { struct allocation *allocation; @@ -6507,6 +6728,7 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a allocation->writemask = writemask; allocation->mode = mode; allocation->vip = vip; + allocation->clip_cull = clip_cull; allocator->reg_count = max(allocator->reg_count, reg_idx + 1); } @@ -6523,7 +6745,7 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a * 'vip' can be used so that no new allocations can be made in the given register * unless they are 'vip' as well. */ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int reg_size, unsigned int component_count, int mode, bool force_align, bool vip) + unsigned int reg_size, unsigned int component_count, int mode, bool force_align, bool vip, bool clip_cull) { struct hlsl_reg ret = {.allocation_size = 1, .allocated = true}; unsigned int required_size = force_align ? 4 : reg_size; @@ -6536,7 +6758,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a { for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx) { - unsigned int available_writemask = get_available_writemask(allocator, reg_idx, mode, vip); + unsigned int available_writemask = get_available_writemask(allocator, reg_idx, mode, vip, clip_cull); if (vkd3d_popcount(available_writemask) >= pref) { @@ -6548,7 +6770,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a ret.writemask = hlsl_combine_writemasks(writemask, vkd3d_write_mask_from_component_count(component_count)); - record_allocation(ctx, allocator, reg_idx, writemask, mode, vip); + record_allocation(ctx, allocator, reg_idx, writemask, mode, vip, clip_cull); return ret; } } @@ -6558,7 +6780,8 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a ret.id = allocator->reg_count; ret.writemask = vkd3d_write_mask_from_component_count(component_count); record_allocation(ctx, allocator, allocator->reg_count, - vkd3d_write_mask_from_component_count(reg_size), mode, vip); + vkd3d_write_mask_from_component_count(reg_size), mode, vip, clip_cull); + allocator->clip_cull_count += clip_cull; return ret; } @@ -6571,11 +6794,11 @@ static bool is_range_available(const struct register_allocator *allocator, for (i = 0; i < (reg_size / 4); ++i) { - writemask = get_available_writemask(allocator, reg_idx + i, mode, vip); + writemask = get_available_writemask(allocator, reg_idx + i, mode, vip, false); if (writemask != VKD3DSP_WRITEMASK_ALL) return false; } - writemask = get_available_writemask(allocator, reg_idx + (reg_size / 4), mode, vip); + writemask = get_available_writemask(allocator, reg_idx + (reg_size / 4), mode, vip, false); if ((writemask & last_reg_mask) != last_reg_mask) return false; return true; @@ -6595,9 +6818,9 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, } for (i = 0; i < reg_size / 4; ++i) - record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, mode, vip); + record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, mode, vip, false); if (reg_size % 4) - record_allocation(ctx, allocator, reg_idx + (reg_size / 4), (1u << (reg_size % 4)) - 1, mode, vip); + record_allocation(ctx, allocator, reg_idx + (reg_size / 4), (1u << (reg_size % 4)) - 1, mode, vip, false); ret.type = allocator->type; ret.id = reg_idx; @@ -6614,7 +6837,7 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, /* FIXME: We could potentially pack structs or arrays more efficiently... */ if (type->class <= HLSL_CLASS_VECTOR) - return allocate_register(ctx, allocator, type->e.numeric.dimx, type->e.numeric.dimx, 0, false, false); + return allocate_register(ctx, allocator, type->e.numeric.dimx, type->e.numeric.dimx, 0, false, false, false); else return allocate_range(ctx, allocator, reg_size, 0, false); } @@ -6993,13 +7216,17 @@ static bool find_constant(struct hlsl_ctx *ctx, const float *f, unsigned int cou for (size_t i = 0; i < defs->count; ++i) { const struct hlsl_constant_register *reg = &defs->regs[i]; + const float *regf; + if (reg->is_int) + continue; + regf = (float[]){reg->value[0].f, reg->value[1].f, reg->value[2].f, reg->value[3].f}; for (size_t j = 0; j <= 4 - count; ++j) { unsigned int writemask = ((1u << count) - 1) << j; if ((reg->allocated_mask & writemask) == writemask - && !memcmp(f, ®->value.f[j], count * sizeof(float))) + && !memcmp(f, ®f[j], count * sizeof(float))) { ret->type = VKD3DSPR_CONST; ret->id = reg->index; @@ -7014,7 +7241,8 @@ static bool find_constant(struct hlsl_ctx *ctx, const float *f, unsigned int cou return false; } -static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f, +static void record_constant_value(struct hlsl_ctx *ctx, unsigned int component_index, + bool is_int, union hlsl_constant_value_component v, const struct vkd3d_shader_location *loc) { struct hlsl_constant_defs *defs = &ctx->constant_defs; @@ -7024,9 +7252,9 @@ static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, for (i = 0; i < defs->count; ++i) { reg = &defs->regs[i]; - if (reg->index == (component_index / 4)) + if (reg->is_int == is_int && reg->index == (component_index / 4)) { - reg->value.f[component_index % 4] = f; + reg->value[component_index % 4] = v; reg->allocated_mask |= (1u << (component_index % 4)); return; } @@ -7036,12 +7264,31 @@ static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, return; reg = &defs->regs[defs->count++]; memset(reg, 0, sizeof(*reg)); + reg->is_int = is_int; reg->index = component_index / 4; - reg->value.f[component_index % 4] = f; + reg->value[component_index % 4] = v; reg->allocated_mask = (1u << (component_index % 4)); reg->loc = *loc; } +static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, + float f, const struct vkd3d_shader_location *loc) +{ + union hlsl_constant_value_component v; + + v.f = f; + record_constant_value(ctx, component_index, false, v, loc); +} + +static void record_constint(struct hlsl_ctx *ctx, unsigned int component_index, + int i, const struct vkd3d_shader_location *loc) +{ + union hlsl_constant_value_component v; + + v.i = i; + record_constant_value(ctx, component_index, true, v, loc); +} + static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct register_allocator *allocator) { @@ -7178,6 +7425,34 @@ static void sort_uniforms_by_bind_count(struct hlsl_ctx *ctx, enum hlsl_regset r list_move_tail(&ctx->extern_vars, &sorted); } +static bool allocate_constint_registers_for_loops(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, void *context) +{ + struct register_allocator *allocator = (struct register_allocator *)context; + struct hlsl_ir_loop *loop; + + if (ctx->d3d255intconst.allocated) + return false; + + if (instr->type != HLSL_IR_LOOP) + return false; + loop = hlsl_ir_loop(instr); + + if (!loop->limiter) + { + ctx->d3d255intconst = allocate_register(ctx, allocator, 4, 4, 0, false, false, false); + TRACE("Allocated D3D255INTCONST to %s.\n", + debug_register(ctx->d3d255intconst, hlsl_get_vector_type(ctx, HLSL_TYPE_INT, 4))); + record_constint(ctx, ctx->d3d255intconst.id * 4 + 0, 255, &instr->loc); + record_constint(ctx, ctx->d3d255intconst.id * 4 + 1, 0, &instr->loc); + record_constint(ctx, ctx->d3d255intconst.id * 4 + 2, 0, &instr->loc); + record_constint(ctx, ctx->d3d255intconst.id * 4 + 3, 0, &instr->loc); + return true; + } + + return false; +} + /* In SM2, 'sincos' expects specific constants as src1 and src2 arguments. * These have to be referenced directly, i.e. as 'c' not 'r'. */ static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -7218,6 +7493,7 @@ static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_bl static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *body) { struct register_allocator allocator = {.type = VKD3DSPR_CONST}, allocator_used = {.type = VKD3DSPR_CONST}; + struct register_allocator allocator_constint = {.type = VKD3DSPR_CONSTINT}; struct hlsl_ir_var *var; sort_uniforms_by_bind_count(ctx, HLSL_REGSET_NUMERIC); @@ -7240,14 +7516,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *bo { if (i < bind_count) { - if (get_available_writemask(&allocator_used, reg_idx + i, 0, false) != VKD3DSP_WRITEMASK_ALL) + if (get_available_writemask(&allocator_used, reg_idx + i, 0, false, false) != VKD3DSP_WRITEMASK_ALL) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "Overlapping register() reservations on 'c%u'.", reg_idx + i); } - record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 0, false); + record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 0, false, false); } - record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 0, false); + record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 0, false, false); } var->regs[HLSL_REGSET_NUMERIC].type = VKD3DSPR_CONST; @@ -7281,6 +7557,9 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *bo allocate_sincos_const_registers(ctx, body, &allocator); + hlsl_transform_ir(ctx, allocate_constint_registers_for_loops, body, &allocator_constint); + + vkd3d_free(allocator_constint.allocations); vkd3d_free(allocator.allocations); } @@ -7378,6 +7657,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var struct vkd3d_shader_version version; bool special_interpolation = false; bool vip_allocation = false; + bool clip_cull = false; uint32_t reg; bool builtin; @@ -7415,7 +7695,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var enum vkd3d_shader_sysval_semantic semantic; bool has_idx; - if (!sm4_sysval_semantic_from_semantic_name(&semantic, &version, ctx->semantic_compat_mapping, ctx->domain, + if (!sm4_sysval_semantic_from_semantic_name(&semantic, &version, ctx->compatibility_flags, ctx->domain, var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_primitive)) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, @@ -7437,6 +7717,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var || semantic == VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX || semantic == VKD3D_SHADER_SV_PRIMITIVE_ID) vip_allocation = true; + else if (vsir_sysval_semantic_is_clip_cull(semantic)) + clip_cull = true; if (semantic == VKD3D_SHADER_SV_IS_FRONT_FACE || semantic == VKD3D_SHADER_SV_SAMPLE_INDEX || (version.type == VKD3D_SHADER_TYPE_DOMAIN && !output && !is_primitive) @@ -7459,8 +7741,16 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var if (version.major >= 4 && !special_interpolation) mode = get_interpolation_mode(&version, var->data_type, var->storage_modifiers); - var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, - reg_size, component_count, mode, var->force_align, vip_allocation); + var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, reg_size, + component_count, mode, var->force_align, vip_allocation, clip_cull); + var->regs[HLSL_REGSET_NUMERIC].type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; + + if (clip_cull && allocator->clip_cull_count > 2) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Too many SV_ClipDistance or SV_CullDistance parameters."); + return; + } TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); @@ -7709,6 +7999,10 @@ static void allocate_buffers(struct hlsl_ctx *ctx) var->buffer = ctx->params_buffer; } + LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) + { + buffer->used_size = buffer->size = 0; + } hlsl_calculate_buffer_offsets(ctx); validate_buffer_offsets(ctx); @@ -9115,7 +9409,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog enum hlsl_base_type numeric_type; bool has_idx, ret; - ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, ctx->semantic_compat_mapping, + ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, ctx->compatibility_flags, ctx->domain, var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_primitive); VKD3D_ASSERT(ret); if (sysval == ~0u) @@ -9165,12 +9459,32 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog break; } - if (sysval == VKD3D_SHADER_SV_TARGET && !ascii_strcasecmp(name, "color")) + if (sysval == VKD3D_SHADER_SV_STENCIL_REF) + { + if (hlsl_version_lt(ctx, 5, 0)) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "Stencil export is only supported in shader model 5.0 or higher."); + if (var->semantic.index) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Invalid semantic index %u for semantic variable %s.", var->semantic.index, var->name); + if (!hlsl_is_vec1(var->data_type) || numeric_type != HLSL_TYPE_UINT) + { + if ((string = hlsl_type_to_string(ctx, var->data_type))) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Invalid data type %s for semantic variable %s.", string->buffer, var->name); + hlsl_release_string_buffer(ctx, string); + } + program->global_flags |= VKD3DSGF_ENABLE_STENCIL_REF; + } + + if (sysval == VKD3D_SHADER_SV_TARGET && ascii_strcasecmp(name, "SV_Target")) name = "SV_Target"; - else if (sysval == VKD3D_SHADER_SV_DEPTH && !ascii_strcasecmp(name, "depth")) - name ="SV_Depth"; - else if (sysval == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position")) + else if (sysval == VKD3D_SHADER_SV_DEPTH && ascii_strcasecmp(name, "SV_Depth")) + name = "SV_Depth"; + else if (sysval == VKD3D_SHADER_SV_POSITION && ascii_strcasecmp(name, "SV_Position")) name = "SV_Position"; + else if (sysval == VKD3D_SHADER_SV_IS_FRONT_FACE && ascii_strcasecmp(name, "SV_IsFrontFace")) + name = "SV_IsFrontFace"; } else { @@ -9238,6 +9552,10 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog component_type = VKD3D_SHADER_COMPONENT_FLOAT; } + if (sysval == VKD3D_SHADER_SV_IS_FRONT_FACE && var->data_type->e.numeric.dimx > 1) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "%s input must have only 1 component.", var->semantic.name); + if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, signature->element_count + 1, sizeof(*signature->elements))) { @@ -9385,27 +9703,44 @@ static void sm1_generate_vsir_constant_defs(struct hlsl_ctx *ctx, ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; return; } - if (!vsir_instruction_init_with_params(program, ins, &constant_reg->loc, VSIR_OP_DEF, 1, 1)) + if (!vsir_instruction_init_with_params(program, ins, &constant_reg->loc, + constant_reg->is_int ? VSIR_OP_DEFI : VSIR_OP_DEF, 1, 1)) { vsir_instruction_init(ins, &constant_reg->loc, VSIR_OP_NOP); ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; return; } - dst = &ins->dst[0]; - vsir_dst_operand_init(dst, VKD3DSPR_CONST, VSIR_DATA_F32, 1); - dst->reg.dimension = VSIR_DIMENSION_VEC4; - dst->reg.idx[0].offset = constant_reg->index; - dst->write_mask = VKD3DSP_WRITEMASK_ALL; + if (constant_reg->is_int) + { + dst = &ins->dst[0]; + vsir_operand_init(&dst->reg, VKD3DSPR_CONSTINT, VSIR_DATA_I32, 1); + dst->reg.dimension = VSIR_DIMENSION_VEC4; + dst->reg.idx[0].offset = constant_reg->index; + dst->write_mask = VKD3DSP_WRITEMASK_ALL; - src = &ins->src[0]; - vsir_src_operand_init(src, VKD3DSPR_IMMCONST, VSIR_DATA_F32, 0); - src->reg.dimension = VSIR_DIMENSION_VEC4; - for (x = 0; x < 4; ++x) + src = &ins->src[0]; + vsir_src_operand_init(src, VKD3DSPR_IMMCONST, VSIR_DATA_I32, 0); + src->reg.dimension = VSIR_DIMENSION_VEC4; + for (x = 0; x < 4; ++x) + src->reg.u.immconst_u32[x] = constant_reg->value[x].u; + src->swizzle = VKD3D_SHADER_NO_SWIZZLE; + } + else { - src->reg.u.immconst_f32[x] = constant_reg->value.f[x]; + dst = &ins->dst[0]; + vsir_operand_init(&dst->reg, VKD3DSPR_CONST, VSIR_DATA_F32, 1); + dst->reg.dimension = VSIR_DIMENSION_VEC4; + dst->reg.idx[0].offset = constant_reg->index; + dst->write_mask = VKD3DSP_WRITEMASK_ALL; + + src = &ins->src[0]; + vsir_src_operand_init(src, VKD3DSPR_IMMCONST, VSIR_DATA_F32, 0); + src->reg.dimension = VSIR_DIMENSION_VEC4; + for (x = 0; x < 4; ++x) + src->reg.u.immconst_f32[x] = constant_reg->value[x].f; + src->swizzle = VKD3D_SHADER_NO_SWIZZLE; } - src->swizzle = VKD3D_SHADER_NO_SWIZZLE; } } @@ -9413,10 +9748,10 @@ static void sm1_generate_vsir_sampler_dcls(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_block *block) { enum vkd3d_shader_resource_type resource_type; - struct vkd3d_shader_register_range *range; struct vkd3d_shader_semantic *semantic; struct vkd3d_shader_instruction *ins; enum hlsl_sampler_dim sampler_dim; + struct vsir_register_range *range; struct vsir_dst_operand *dst; struct hlsl_ir_var *var; unsigned int i, count; @@ -9469,7 +9804,7 @@ static void sm1_generate_vsir_sampler_dcls(struct hlsl_ctx *ctx, semantic->resource_type = resource_type; dst = &semantic->resource.reg; - vsir_register_init(&dst->reg, VKD3DSPR_COMBINED_SAMPLER, VSIR_DATA_F32, 1); + vsir_operand_init(&dst->reg, VKD3DSPR_COMBINED_SAMPLER, VSIR_DATA_F32, 1); dst->reg.dimension = VSIR_DIMENSION_NONE; dst->reg.idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index + i; dst->write_mask = 0; @@ -9574,7 +9909,7 @@ static void vsir_src_from_hlsl_node(struct vsir_src_operand *src, struct hlsl_ct } else { - vsir_register_init(&src->reg, instr->reg.type, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); + vsir_operand_init(&src->reg, instr->reg.type, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); src->reg.idx[0].offset = instr->reg.id; src->reg.dimension = VSIR_DIMENSION_VEC4; src->swizzle = generate_vsir_get_src_swizzle(instr->reg.writemask, map_writemask); @@ -9598,7 +9933,7 @@ static struct vsir_src_operand *sm4_generate_vsir_new_idx_src(struct hlsl_ctx *c } static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, - struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) + struct vsir_operand *reg, uint32_t *writemask, const struct hlsl_deref *deref) { const struct hlsl_ir_var *var = deref->var; unsigned int offset_const_deref; @@ -9635,7 +9970,7 @@ static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struc } static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, - struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) + struct vsir_operand *reg, uint32_t *writemask, const struct hlsl_deref *deref) { const struct vkd3d_shader_version *version = &program->shader_version; const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); @@ -10030,7 +10365,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, return true; case HLSL_TYPE_DOUBLE: - if (ctx->double_as_float_alias) + if (ctx->compatibility_flags & VKD3D_SHADER_COMPILE_OPTION_DOUBLE_AS_FLOAT_ALIAS) { generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_MOV, 0, 0, true); return true; @@ -10095,11 +10430,13 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr { struct hlsl_ir_node *instr = &expr->node; struct hlsl_type *type = instr->data_type; + struct vkd3d_shader_instruction *ins; if (!hlsl_is_numeric_type(type)) goto err; - if (type->e.numeric.type == HLSL_TYPE_DOUBLE && !ctx->double_as_float_alias) + if (type->e.numeric.type == HLSL_TYPE_DOUBLE + && !(ctx->compatibility_flags & VKD3D_SHADER_COMPILE_OPTION_DOUBLE_AS_FLOAT_ALIAS)) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "The 'double' type is not supported for the %s profile.", ctx->profile->name); @@ -10115,6 +10452,38 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr case HLSL_OP1_CAST: return sm1_generate_vsir_instr_expr_cast(ctx, program, expr); + case HLSL_OP1_CEIL: + if (!hlsl_type_is_floating_point(type)) + goto err; + if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX || hlsl_version_ge(ctx, 2, 0)) + { + uint32_t ssa_frc = ctx->ssa_count++; + + /* FIXME: Native frc in vs 1.x has some weird constraints we're not respecting. */ + + /* frc sr0, -SRC + * add DST, SRC, sr0 + */ + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_FRC, 1, 1))) + return false; + vsir_dst_operand_init_ssa_f32v4(&ins->dst[0], ssa_frc); + vsir_src_from_hlsl_node(&ins->src[0], ctx, expr->operands[0].node, ins->dst[0].write_mask); + ins->src[0].modifiers = VKD3DSPSM_NEG; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_ADD, 1, 2))) + return false; + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + vsir_src_from_hlsl_node(&ins->src[0], ctx, expr->operands[0].node, ins->dst[0].write_mask); + vsir_src_operand_init_ssa_f32v4(&ins->src[1], ssa_frc); + } + else + { + /* Not supported in native ps 1.x. */ + goto err; + } + break; + case HLSL_OP1_COS_REDUCED: VKD3D_ASSERT(expr->node.reg.writemask == VKD3DSP_WRITEMASK_0); if (!hlsl_type_is_floating_point(type)) @@ -10140,6 +10509,38 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VSIR_OP_EXP); break; + case HLSL_OP1_FLOOR: + if (!hlsl_type_is_floating_point(type)) + goto err; + if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX || hlsl_version_ge(ctx, 2, 0)) + { + uint32_t ssa_frc = ctx->ssa_count++; + + /* FIXME: Native frc in vs 1.x has some weird constraints we're not respecting. */ + + /* frc sr0, SRC + * add DST, SRC, -sr0 + */ + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_FRC, 1, 1))) + return false; + vsir_dst_operand_init_ssa_f32v4(&ins->dst[0], ssa_frc); + vsir_src_from_hlsl_node(&ins->src[0], ctx, expr->operands[0].node, ins->dst[0].write_mask); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_ADD, 1, 2))) + return false; + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + vsir_src_from_hlsl_node(&ins->src[0], ctx, expr->operands[0].node, ins->dst[0].write_mask); + vsir_src_operand_init_ssa_f32v4(&ins->src[1], ssa_frc); + ins->src[1].modifiers = VKD3DSPSM_NEG; + } + else + { + /* Not supported in native ps 1.x. */ + goto err; + } + break; + case HLSL_OP1_LOG2: if (!hlsl_type_is_floating_point(type)) goto err; @@ -10256,6 +10657,53 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_MAD, 0, 0, true); break; + case HLSL_OP3_TERNARY: + VKD3D_ASSERT(expr->operands[0].node->data_type->e.numeric.type == HLSL_TYPE_BOOL); + /* bool in sm1 is 1.0 (true) or 0.0 (false). */ + if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX) + { + uint32_t ssa_add = ctx->ssa_count++; + + /* add sr0, SRC1, -SRC2 + * mad DST, SRC0, sr0, SRC2 + * + * That is, essentially lerp between the two. This is not + * IEEE 754 safe, but then again neither is sm1 in general, + * and this is what native emits. */ + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_ADD, 1, 2))) + return false; + vsir_dst_operand_init_ssa_f32v4(&ins->dst[0], ssa_add); + vsir_src_from_hlsl_node(&ins->src[0], ctx, expr->operands[1].node, ins->dst[0].write_mask); + vsir_src_from_hlsl_node(&ins->src[1], ctx, expr->operands[2].node, ins->dst[0].write_mask); + ins->src[1].modifiers = VKD3DSPSM_NEG; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_MAD, 1, 3))) + return false; + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + vsir_src_from_hlsl_node(&ins->src[0], ctx, expr->operands[0].node, ins->dst[0].write_mask); + vsir_src_operand_init_ssa_f32v4(&ins->src[1], ssa_add); + vsir_src_from_hlsl_node(&ins->src[2], ctx, expr->operands[2].node, ins->dst[0].write_mask); + } + else if (hlsl_version_ge(ctx, 2, 0)) + { + /* cmp DST, -SRC0, SRC2, SRC1 */ + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_CMP, 1, 3))) + return false; + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + vsir_src_from_hlsl_node(&ins->src[0], ctx, expr->operands[0].node, ins->dst[0].write_mask); + ins->src[0].modifiers = VKD3DSPSM_NEG; + vsir_src_from_hlsl_node(&ins->src[1], ctx, expr->operands[2].node, ins->dst[0].write_mask); + vsir_src_from_hlsl_node(&ins->src[2], ctx, expr->operands[1].node, ins->dst[0].write_mask); + } + else + { + /* FIXME: We can use CMP in 1.2-1.4 but we can't use VKD3DSPSM_NEG. */ + goto err; + } + break; + default: goto err; } @@ -10314,12 +10762,12 @@ static void sm1_generate_vsir_init_dst_operand_from_deref(struct hlsl_ctx *ctx, if (type == VKD3DSPR_DEPTHOUT) { - vsir_register_init(&dst->reg, type, VSIR_DATA_F32, 0); + vsir_operand_init(&dst->reg, type, VSIR_DATA_F32, 0); dst->reg.dimension = VSIR_DIMENSION_SCALAR; } else { - vsir_register_init(&dst->reg, type, VSIR_DATA_F32, 1); + vsir_operand_init(&dst->reg, type, VSIR_DATA_F32, 1); dst->reg.idx[0].offset = register_index; dst->reg.dimension = VSIR_DIMENSION_VEC4; } @@ -10342,7 +10790,7 @@ static void sm1_generate_vsir_instr_mova(struct hlsl_ctx *ctx, return; dst = &ins->dst[0]; - vsir_register_init(&dst->reg, VKD3DSPR_ADDR, VSIR_DATA_F32, 0); + vsir_operand_init(&dst->reg, VKD3DSPR_ADDR, VSIR_DATA_F32, 0); dst->write_mask = VKD3DSP_WRITEMASK_0; VKD3D_ASSERT(instr->data_type->class <= HLSL_CLASS_VECTOR); @@ -10361,7 +10809,7 @@ static struct vsir_src_operand *sm1_generate_vsir_new_address_src(struct hlsl_ct } memset(idx_src, 0, sizeof(*idx_src)); - vsir_register_init(&idx_src->reg, VKD3DSPR_ADDR, VSIR_DATA_F32, 0); + vsir_operand_init(&idx_src->reg, VKD3DSPR_ADDR, VSIR_DATA_F32, 0); idx_src->reg.dimension = VSIR_DIMENSION_VEC4; idx_src->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); return idx_src; @@ -10586,6 +11034,11 @@ static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, vsir_src_from_hlsl_node(&ins->src[0], ctx, condition, VKD3DSP_WRITEMASK_ALL); } + else if (jump->type == HLSL_IR_JUMP_BREAK) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_BREAK, 0, 0))) + return; + } else { hlsl_fixme(ctx, &instr->loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); @@ -10629,6 +11082,33 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program return; } +static void sm1_generate_vsir_instr_loop(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_loop *loop) +{ + struct hlsl_ir_node *instr = &loop->node; + struct vkd3d_shader_instruction *ins; + struct vsir_src_operand *src; + + if (loop->limiter) + { + hlsl_fixme(ctx, &instr->loc, "Loops with user-defined limiter int uniform."); + return; + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_REP, 0, 1))) + return; + src = &ins->src[0]; + vsir_src_operand_init(src, VKD3DSPR_CONSTINT, VSIR_DATA_I32, 1); + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = VKD3D_SHADER_NO_SWIZZLE; + src->reg.idx[0].offset = ctx->d3d255intconst.id; + + sm1_generate_vsir_block(ctx, &loop->body, program); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_ENDREP, 0, 0))) + return; +} + static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) { struct hlsl_ir_node *instr, *next; @@ -10681,6 +11161,10 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); break; + case HLSL_IR_LOOP: + sm1_generate_vsir_instr_loop(ctx, program, hlsl_ir_loop(instr)); + break; + default: hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); break; @@ -11119,7 +11603,7 @@ static void ctab_write_uniforms(struct ctab_ctx *ctab) switch (comp_type->e.numeric.type) { case HLSL_TYPE_DOUBLE: - if (ctx->double_as_float_alias) + if (ctx->compatibility_flags & VKD3D_SHADER_COMPILE_OPTION_DOUBLE_AS_FLOAT_ALIAS) uni.u = var->default_values[k].number.u; else uni.u = 0; @@ -11258,7 +11742,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs uint32_t write_mask; bool has_idx; - sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping, ctx->domain, + sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->compatibility_flags, ctx->domain, var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_primitive); if (semantic == ~0u) semantic = VKD3D_SHADER_SV_NONE; @@ -11350,18 +11834,18 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs if (is_primitive) { VKD3D_ASSERT(has_idx); - vsir_register_init(&dst->reg, type, VSIR_DATA_F32, 2); + vsir_operand_init(&dst->reg, type, VSIR_DATA_F32, 2); dst->reg.idx[0].offset = var->data_type->e.array.elements_count; dst->reg.idx[1].offset = idx; } else if (has_idx) { - vsir_register_init(&dst->reg, type, VSIR_DATA_F32, 1); + vsir_operand_init(&dst->reg, type, VSIR_DATA_F32, 1); dst->reg.idx[0].offset = idx; } else { - vsir_register_init(&dst->reg, type, VSIR_DATA_F32, 0); + vsir_operand_init(&dst->reg, type, VSIR_DATA_F32, 0); } if (shader_sm4_is_scalar_register(&dst->reg)) @@ -12171,8 +12655,9 @@ static bool sm4_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_progr static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_resource_store *store) { + struct hlsl_ir_node *coords = store->coords.node, *byte_offset = store->byte_offset.node; struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource); - struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; + struct hlsl_ir_node *value = store->value.node; struct hlsl_ir_node *instr = &store->node; bool tgsm = store->resource.var->is_tgsm; struct vkd3d_shader_instruction *ins; @@ -12209,14 +12694,13 @@ static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, return false; } - if (tgsm && !hlsl_is_numeric_type(resource_type)) - { - hlsl_fixme(ctx, &store->node.loc, "Store to structured TGSM."); - return false; - } + structured = resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER + || (tgsm && resource_type->class == HLSL_CLASS_ARRAY); - if ((structured = resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)) + if (structured) { + VKD3D_ASSERT(byte_offset); + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_STORE_STRUCTURED, 1, 3))) return false; @@ -12224,7 +12708,7 @@ static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, &store->resource, &instr->loc, store->writemask)) return false; - vsir_src_from_hlsl_node(&ins->src[1], ctx, store->byte_offset.node, VKD3DSP_WRITEMASK_ALL); + vsir_src_from_hlsl_node(&ins->src[1], ctx, byte_offset, VKD3DSP_WRITEMASK_ALL); } else if (tgsm || resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) { @@ -12245,7 +12729,16 @@ static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, return false; } - vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); + if (tgsm && !structured) + { + VKD3D_ASSERT(byte_offset); + vsir_src_from_hlsl_node(&ins->src[0], ctx, byte_offset, VKD3DSP_WRITEMASK_ALL); + } + else + { + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); + } + vsir_src_from_hlsl_node(&ins->src[structured ? 2 : 1], ctx, value, VKD3DSP_WRITEMASK_ALL); return true; @@ -12707,6 +13200,13 @@ static bool sm4_generate_vsir_instr_interlocked(struct hlsl_ctx *ctx, enum vkd3d_shader_opcode opcode; struct vsir_dst_operand *dst; + if (!interlocked->dst.var->is_tgsm && hlsl_deref_get_type(ctx, &interlocked->dst)->class != HLSL_CLASS_UAV) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Interlocked targets must be UAV or groupshared elements."); + return false; + } + opcode = is_imm ? imm_opcodes[interlocked->op] : opcodes[interlocked->op]; if (value->data_type->e.numeric.type == HLSL_TYPE_INT) @@ -12784,6 +13284,19 @@ static bool sm4_generate_vsir_instr_sync(struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr = &sync->node; struct vkd3d_shader_instruction *ins; + if (sync->sync_flags == VKD3DSSF_GLOBAL_UAV) + { + if (ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE && ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "DeviceMemoryBarrier() can only be used in compute and pixel shaders."); + } + else + { + if (ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "Group barriers can only be used in compute shaders."); + } + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_SYNC, 0, 0))) return false; ins->flags = sync->sync_flags; @@ -13683,8 +14196,8 @@ static void generate_vsir_descriptors_for_var(struct hlsl_ctx *ctx, struct vsir_ for (unsigned int k = 0; k < component_count; ++k) { const struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); - struct vkd3d_shader_register_range range; struct vkd3d_shader_descriptor_info1 *d; + struct vsir_register_range range; unsigned int regset_offset; enum hlsl_regset regset; uint32_t id; @@ -13747,8 +14260,8 @@ static void generate_vsir_descriptors_for_var(struct hlsl_ctx *ctx, struct vsir_ static void generate_vsir_descriptors(struct hlsl_ctx *ctx, struct vsir_program *program) { - struct vkd3d_shader_register_range range; struct vkd3d_shader_descriptor_info1 *d; + struct vsir_register_range range; const struct hlsl_ir_var *var; if (program->shader_version.major < 4) @@ -13809,14 +14322,23 @@ static void generate_vsir_descriptors(struct hlsl_ctx *ctx, struct vsir_program program->normalisation_flags.has_descriptor_info = true; } -/* For some reason, for matrices, values from default value initializers end - * up in different components than from regular initializers. Default value - * initializers fill the matrix in vertical reading order - * (left-to-right top-to-bottom) instead of regular reading order - * (top-to-bottom left-to-right), so they have to be adjusted. An exception is - * that the order of matrix initializers for function parameters are row-major - * (top-to-bottom left-to-right). */ -static unsigned int get_component_index_from_default_initializer_index(struct hlsl_type *type, unsigned int index) +enum reading_order +{ + /* For some reason, for matrices, values from default value initializers end + * up in different components than from regular initializers. Default value + * initializers fill the matrix in vertical reading order + * (left-to-right top-to-bottom) instead of regular reading order + * (top-to-bottom left-to-right), so they have to be adjusted. An exception is + * that the order of matrix initializers for function parameters are row-major + * (top-to-bottom left-to-right). */ + READING_ORDER_DEFAULT_VALUES, + /* Map the component index from column-major matrices as if they were + * row-major matrices. This is the order in which constint registers are + * assigned to matrix components in SM3. */ + READING_ORDER_ALWAYS_ROW_MAJOR, +}; + +static unsigned int map_matrix_component_index(struct hlsl_type *type, unsigned int index, enum reading_order order) { unsigned int element_comp_count, element, x, y, i; unsigned int base = 0; @@ -13824,15 +14346,20 @@ static unsigned int get_component_index_from_default_initializer_index(struct hl switch (type->class) { case HLSL_CLASS_MATRIX: - x = index / type->e.numeric.dimy; - y = index % type->e.numeric.dimy; - return y * type->e.numeric.dimx + x; + if ((order == READING_ORDER_ALWAYS_ROW_MAJOR && !hlsl_type_is_row_major(type)) + || order == READING_ORDER_DEFAULT_VALUES) + { + x = index / type->e.numeric.dimy; + y = index % type->e.numeric.dimy; + return y * type->e.numeric.dimx + x; + } + return index; case HLSL_CLASS_ARRAY: element_comp_count = hlsl_type_component_count(type->e.array.type); element = index / element_comp_count; base = element * element_comp_count; - return base + get_component_index_from_default_initializer_index(type->e.array.type, index - base); + return base + map_matrix_component_index(type->e.array.type, index - base, order); case HLSL_CLASS_STRUCT: for (i = 0; i < type->e.record.field_count; ++i) @@ -13841,7 +14368,7 @@ static unsigned int get_component_index_from_default_initializer_index(struct hl element_comp_count = hlsl_type_component_count(field_type); if (index - base < element_comp_count) - return base + get_component_index_from_default_initializer_index(field_type, index - base); + return base + map_matrix_component_index(field_type, index - base, order); base += element_comp_count; } break; @@ -14402,7 +14929,7 @@ static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *ou continue; } - comp_index = get_component_index_from_default_initializer_index(var->data_type, k); + comp_index = map_matrix_component_index(var->data_type, k, READING_ORDER_DEFAULT_VALUES); comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, comp_index, ®set); if (regset == HLSL_REGSET_NUMERIC) { @@ -14541,7 +15068,7 @@ static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx load = hlsl_block_add_simple_load(ctx, dst, var, loc); cond = hlsl_block_add_unary_expr(ctx, dst, HLSL_OP1_LOGIC_NOT, load, loc); - if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, loc))) + if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, false, loc))) return NULL; hlsl_block_add_instr(dst, iff); @@ -14632,6 +15159,174 @@ static void loop_unrolling_simplify(struct hlsl_ctx *ctx, struct hlsl_block *blo *index = current_index; } +static struct hlsl_ir_if *find_loop_conditional(struct hlsl_block *block) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->type == HLSL_IR_IF && hlsl_ir_if(instr)->is_loop_conditional) + return hlsl_ir_if(instr); + } + + return NULL; +} + +static bool hlsl_version_has_sm1_loop_support(struct hlsl_ctx *ctx) +{ + if (hlsl_version_ge(ctx, 4, 0)) + return false; + if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) + return hlsl_version_ge(ctx, 2, 0); + return hlsl_version_ge(ctx, 2, 1); +} + +static bool is_instr_constant_lt_var(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + unsigned int k, struct hlsl_ir_var **var, unsigned int *var_component, bool *negated) +{ + unsigned int component, component_count, swizzle_component = 0; + struct hlsl_ir_expr *expr; + struct hlsl_ir_node *arg1; + struct hlsl_ir_load *load; + bool floating = false; + bool neg = false; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + + if (expr->op == HLSL_OP1_LOGIC_NOT) + { + if (expr->operands[0].node->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(expr->operands[0].node); + neg = true; + } + + if (expr->op == HLSL_OP2_GEQUAL) + neg = !neg; + else if (expr->op != HLSL_OP2_LESS) + return false; + + if (expr->operands[0].node->type != HLSL_IR_CONSTANT) + return false; + + arg1 = expr->operands[1].node; + if (arg1->type == HLSL_IR_EXPR && hlsl_ir_expr(arg1)->op == HLSL_OP1_CAST) + { + if (hlsl_type_is_floating_point(arg1->data_type)) + floating = true; + arg1 = hlsl_ir_expr(arg1)->operands[0].node; + } + if (arg1->type == HLSL_IR_SWIZZLE) + { + VKD3D_ASSERT(hlsl_ir_swizzle(arg1)->val.node->data_type->class == HLSL_CLASS_VECTOR); + + swizzle_component = hlsl_swizzle_get_component(hlsl_ir_swizzle(arg1)->u.vector, 0); + arg1 = hlsl_ir_swizzle(arg1)->val.node; + } + if (arg1->type != HLSL_IR_LOAD) + return false; + if (!floating && hlsl_ir_constant(expr->operands[0].node)->value.u[0].u != k) + return false; + if (floating && hlsl_ir_constant(expr->operands[0].node)->value.u[0].f != k) + return false; + + load = hlsl_ir_load(arg1); + if (load->node.data_type->e.numeric.type != HLSL_TYPE_INT + && load->node.data_type->e.numeric.type != HLSL_TYPE_UINT) + return false; + *var = load->src.var; + + if (!hlsl_component_index_range_from_deref(ctx, &load->src, &component, &component_count)) + return false; + component += swizzle_component; + component = map_matrix_component_index((*var)->data_type, component, READING_ORDER_ALWAYS_ROW_MAJOR); + + *var_component = component; + *negated = neg; + return true; +} + +/* In SM1, the native compiler will emit REP or LOOP instructions to represent + * loops. These instructions require an i# register containing the number of + * iterations. + * + * When the compiler is able to determine that the number of iterations depends + * of an int uniform, it will expect the user to provide its value on an i# + * register instead of defining it in the program using DEFI. We call this + * uniform the "limiter". + * + * To detect such cases, instead of simply pattern-matching loops like + * "for (i = 0; i < n; ++i)", native apparently tests that the condition + * ultimately matches 'k < n' for the k-th iteration of the loop, for all k + * from 0 to 255. + * + * This means that complex conditions like "for (i = 0; i % 255 < n; ++i)" and + * "for (i = 0; i / 2 < n; i += 2)" will use a user-provided i# uniform, so will + * conditions like "for (; i < n;)" if i is initialized to 0 before the loop + * and incremented by 1 inside it. + * Interestingly, something as simple as "for (i = 1; i <= n; ++i)" doesn't + * match, so it doesn't use a user-provided i# uniform. + * + * The choice of whether a uniform is placed in c# or i# (or both) is a + * relatively user-visible detail, that also affects the registers on which + * other uniforms are allocated, so it's important to try to replicate the + * same logic. + * + * For a block of instructions containing a singular, lowered, iteration of a + * loop, this function checks if the loop conditional on that iteration has the + * form 'k < n', where k is a constant. + * + * If we find that the condition matches, we track the uniform (n) as the + * loop's limiter. On the other hand, if the condition ever fails, or matches + * for another uniform (or another component of the same one), then we unset it, + * return NULL and fail. */ +static struct hlsl_ir_if *identify_limiter_uniform(struct hlsl_ctx *ctx, + struct hlsl_ir_loop *loop, struct hlsl_block *block, unsigned int k) +{ + struct hlsl_ir_node *condition; + struct hlsl_ir_if *iff = NULL; + unsigned int var_component; + struct hlsl_ir_var *var; + bool negated; + + VKD3D_ASSERT(loop->unroll_type == HLSL_LOOP_FORCE_LOOP); + VKD3D_ASSERT(loop->type != HLSL_LOOP_DO_WHILE); + + /* If we don't find the loop conditional, this means that either this + * whole iteration is guaranteed, so the loop is not limited by our + * current limiter candidate, or that the loop always ends in this + * iteration for conditions other than the limiter itself. In the latter + * case, fxc/d3dcompiler doesn't invalidate the limiter. */ + if (!(iff = find_loop_conditional(block))) + return NULL; + + condition = iff->condition.node; + if (!is_instr_constant_lt_var(ctx, condition, k, &var, &var_component, &negated)) + goto fail; + if (!negated) + goto fail; + + if (!var->is_uniform) + goto fail; + + if (!loop->limiter) + { + loop->limiter = var; + loop->limiter_component = var_component; + } + + if (loop->limiter != var || loop->limiter_component != var_component) + goto fail; + + return iff; + +fail: + loop->limiter = NULL; + return NULL; +} + static bool loop_unrolling_check_val(struct copy_propagation_state *state, struct hlsl_ir_var *var) { struct copy_propagation_value *v; @@ -14643,8 +15338,8 @@ static bool loop_unrolling_check_val(struct copy_propagation_state *state, struc return hlsl_ir_constant(v->node)->value.u[0].u; } -static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *loop, - unsigned int unroll_limit) +static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_loop *loop, unsigned int unroll_limit, bool identify_limiter) { struct hlsl_block draft, tmp_dst, loop_body; struct hlsl_ir_var *broken, *continued; @@ -14708,6 +15403,26 @@ static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block * loop_unrolling_simplify(ctx, &target_if->then_block, &state, &index); + if (identify_limiter) + { + struct hlsl_ir_if *limiter_if; + + if (!(limiter_if = identify_limiter_uniform(ctx, loop, &target_if->then_block, i))) + { + loop_unrolling_simplify(ctx, &target_if->then_block, &state, &index); + if (!loop_unrolling_check_val(&state, broken)) + loop->limiter = NULL; + goto fail; + } + + VKD3D_ASSERT(list_empty(&limiter_if->else_block.instrs)); + /* Remove the conditional so the next iteration can assume that this + * one was completed while lowering. */ + list_remove(&limiter_if->node.entry); + hlsl_free_instr(&limiter_if->node); + loop_unrolling_simplify(ctx, &target_if->then_block, &state, &index); + } + if (loop_unrolling_check_val(&state, broken)) break; @@ -14731,6 +15446,9 @@ static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block * goto fail; } + if (identify_limiter) + goto fail; + hlsl_block_cleanup(&loop_body); copy_propagation_state_destroy(&state); @@ -14779,12 +15497,30 @@ static bool unroll_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void * unroll_limit = c->value.u[0].u; } - if (!loop_unrolling_unroll_loop(ctx, program, loop, unroll_limit)) + if (!loop_unrolling_unroll_loop(ctx, program, loop, unroll_limit, false)) loop->unroll_type = HLSL_LOOP_FORCE_LOOP; return true; } +static bool identify_loop_limiters(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context) +{ + struct hlsl_block *program = context; + struct hlsl_ir_loop *loop; + + if (node->type != HLSL_IR_LOOP) + return false; + + loop = hlsl_ir_loop(node); + + if (loop->type == HLSL_LOOP_DO_WHILE) + return false; + if (loop->unroll_type == HLSL_LOOP_FORCE_LOOP) + loop_unrolling_unroll_loop(ctx, program, loop, 255, true); + + return false; +} + /* We could handle this at parse time. However, loop unrolling often needs to * know the value of variables modified in the "iter" block. It is possible to * detect that all exit paths of a loop body modify such variables in the same @@ -14891,6 +15627,8 @@ static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc if (!hlsl_transform_ir_once(ctx, unroll_loops, block, block)) break; } + if (hlsl_version_has_sm1_loop_support(ctx)) + hlsl_transform_ir(ctx, identify_loop_limiters, block, block); resolve_continues(ctx, block, NULL); hlsl_transform_ir(ctx, resolve_loops, block, NULL); } @@ -15324,7 +16062,9 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v replace_ir(ctx, lower_tgsm_loads, body); hlsl_lower_index_loads(ctx, body); - replace_ir(ctx, lower_tgsm_stores, body); + split_copies(ctx, body); + replace_ir(ctx, lower_resource_stores, body); + replace_ir(ctx, generate_interlocked_coords, body); if (entry_func->return_var) { @@ -15497,7 +16237,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v remove_unreachable_code(ctx, body); hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL); - replace_ir(ctx, lower_nonconstant_vector_derefs, body); + replace_ir(ctx, lower_vector_derefs, body); replace_ir(ctx, lower_casts_to_bool, body); replace_ir(ctx, lower_int_dot, body); @@ -15544,7 +16284,6 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v hlsl_transform_ir(ctx, cast_discard_neg_conditions_to_vec4, body, NULL); - replace_ir(ctx, lower_ternary, body); replace_ir(ctx, lower_int_modulus_sm1, body); replace_ir(ctx, lower_division, body); /* Constants casted to float must be folded, and new casts to bool also need to be lowered. */ @@ -15556,8 +16295,6 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v replace_ir(ctx, lower_sqrt, body); replace_ir(ctx, lower_dot, body); replace_ir(ctx, lower_round, body); - replace_ir(ctx, lower_ceil, body); - replace_ir(ctx, lower_floor, body); replace_ir(ctx, lower_trig, body); replace_ir(ctx, lower_comparison_operators, body); replace_ir(ctx, lower_logic_not, body); @@ -15576,14 +16313,6 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v hlsl_run_folding_passes(ctx, body); - if (profile->major_version < 4) - { - /* Ternary operations can be potentially introduced by hlsl_run_folding_passes(). */ - replace_ir(ctx, lower_ternary, body); - if (ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL) - replace_ir(ctx, lower_cmp, body); - } - do compute_liveness(ctx, body); while (hlsl_transform_ir(ctx, dce, body, NULL)); @@ -15620,6 +16349,7 @@ int hlsl_emit_vsir(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info struct list semantic_vars, patch_semantic_vars; struct hlsl_ir_var *var; + hlsl_ctx_init_entry_function_attributes(ctx); parse_entry_function_attributes(ctx, entry_func); if (ctx->result) return ctx->result; @@ -15653,8 +16383,16 @@ int hlsl_emit_vsir(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info } else if (var->storage_modifiers & HLSL_STORAGE_GROUPSHARED) { - var->is_tgsm = 1; - list_add_tail(&ctx->extern_vars, &var->extern_entry); + if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) + { + var->is_tgsm = 1; + list_add_tail(&ctx->extern_vars, &var->extern_entry); + } + else + { + hlsl_warning(ctx, &var->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_MODIFIER, + "Ignoring the 'groupshared' modifier in a non-compute shader."); + } } } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index e6edbe598bc..e1bbcac7589 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -18,9 +18,8 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ -#include <math.h> - #include "hlsl.h" +#include <math.h> static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) @@ -1286,6 +1285,7 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons { enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; + float x, y; VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); @@ -1294,6 +1294,22 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons { switch (type) { + /* Explicitly disable floating-point contraction on Clang to + * prevent it from fusing the multiplication and the + * addition/subtraction below. Current versions of GCC + * unfortunately do no support the FP_CONTRACT pragma, but don't + * contract the expression either. */ +#ifdef __clang__ +#pragma STDC FP_CONTRACT OFF +#endif + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + x = src1->value.u[k].f; + y = src2->value.u[k].f; + /* Explicit cast to float to avoid x87 excess precision. */ + dst->u[k].f = x - (float)(truncf(x / y) * y); + break; + case HLSL_TYPE_INT: if (src2->value.u[k].i == 0) { diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index 3c5167c6266..3ab3dbd4efe 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -19,6 +19,7 @@ #include "vkd3d_shader_private.h" #include "vkd3d_types.h" +#include "wine/rbtree.h" struct vsir_transformation_context { @@ -76,13 +77,13 @@ const char *vsir_data_type_get_name(enum vsir_data_type t, const char *error) return error; } -const char *vsir_denorm_mode_get_name(enum vsir_denorm_mode m, const char *error) +const char *vsir_denorm_mode_get_name(enum vkd3d_shader_denormal_mode m, const char *error) { static const char * const names[] = { - [VSIR_DENORM_ANY] = "any", - [VSIR_DENORM_PRESERVE] = "preserve", - [VSIR_DENORM_FLUSH_TO_ZERO] = "ftz", + [VKD3D_SHADER_DENORMAL_MODE_ANY] = "any", + [VKD3D_SHADER_DENORMAL_MODE_PRESERVE] = "preserve", + [VKD3D_SHADER_DENORMAL_MODE_FLUSH_TO_ZERO] = "ftz", }; if ((size_t)m < ARRAY_SIZE(names)) @@ -91,6 +92,72 @@ const char *vsir_denorm_mode_get_name(enum vsir_denorm_mode m, const char *error return error; } +const char *vsir_register_type_get_name(enum vkd3d_shader_register_type type, const char *error) +{ + static const char * const names[] = + { + [VKD3DSPR_ADDR ] = "a", + [VKD3DSPR_ATTROUT ] = "oD", + [VKD3DSPR_COLOROUT ] = "oC", + [VKD3DSPR_COMBINED_SAMPLER ] = "s", + [VKD3DSPR_CONST ] = "c", + [VKD3DSPR_CONSTBOOL ] = "b", + [VKD3DSPR_CONSTBUFFER ] = "cb", + [VKD3DSPR_CONSTINT ] = "i", + [VKD3DSPR_COVERAGE ] = "vCoverage", + [VKD3DSPR_DEPTHOUT ] = "oDepth", + [VKD3DSPR_DEPTHOUTGE ] = "oDepthGE", + [VKD3DSPR_DEPTHOUTLE ] = "oDepthLE", + [VKD3DSPR_FORKINSTID ] = "vForkInstanceId", + [VKD3DSPR_FUNCTIONBODY ] = "fb", + [VKD3DSPR_FUNCTIONPOINTER ] = "fp", + [VKD3DSPR_GROUPSHAREDMEM ] = "g", + [VKD3DSPR_GSINSTID ] = "vGSInstanceID", + [VKD3DSPR_IDXTEMP ] = "x", + [VKD3DSPR_IMMCONST ] = "l", + [VKD3DSPR_IMMCONST64 ] = "d", + [VKD3DSPR_IMMCONSTBUFFER ] = "icb", + [VKD3DSPR_INCONTROLPOINT ] = "vicp", + [VKD3DSPR_INPUT ] = "v", + [VKD3DSPR_JOININSTID ] = "vJoinInstanceId", + [VKD3DSPR_LABEL ] = "l", + [VKD3DSPR_LOCALTHREADID ] = "vThreadIDInGroup", + [VKD3DSPR_LOCALTHREADINDEX ] = "vThreadIDInGroupFlattened", + [VKD3DSPR_LOOP ] = "aL", + [VKD3DSPR_NULL ] = "null", + [VKD3DSPR_OUTCONTROLPOINT ] = "vocp", + [VKD3DSPR_OUTPOINTID ] = "vOutputControlPointID", + [VKD3DSPR_OUTPUT ] = "o", + [VKD3DSPR_OUTSTENCILREF ] = "oStencilRef", + [VKD3DSPR_PARAMETER ] = "parameter", + [VKD3DSPR_PATCHCONST ] = "vpc", + [VKD3DSPR_POINT_COORD ] = "vPointCoord", + [VKD3DSPR_PREDICATE ] = "p", + [VKD3DSPR_PRIMID ] = "primID", + [VKD3DSPR_RASTERIZER ] = "rasterizer", + [VKD3DSPR_RESOURCE ] = "t", + [VKD3DSPR_SAMPLEMASK ] = "oMask", + [VKD3DSPR_SAMPLER ] = "s", + [VKD3DSPR_SSA ] = "sr", + [VKD3DSPR_STREAM ] = "m", + [VKD3DSPR_TEMP ] = "r", + [VKD3DSPR_TESSCOORD ] = "vDomainLocation", + [VKD3DSPR_TEXCRDOUT ] = "oT", + [VKD3DSPR_TEXTURE ] = "t", + [VKD3DSPR_THREADGROUPID ] = "vThreadGroupID", + [VKD3DSPR_THREADID ] = "vThreadID", + [VKD3DSPR_UAV ] = "u", + [VKD3DSPR_UNDEF ] = "undef", + [VKD3DSPR_WAVELANECOUNT ] = "vWaveLaneCount", + [VKD3DSPR_WAVELANEINDEX ] = "vWaveLaneIndex", + }; + + if ((uint32_t)type < ARRAY_SIZE(names) && names[type]) + return names[type]; + + return error; +} + const char *vsir_opcode_get_name(enum vkd3d_shader_opcode op, const char *error) { static const char * const names[] = @@ -560,7 +627,7 @@ bool vsir_program_add_icb(struct vsir_program *program, struct vkd3d_shader_imme static struct vsir_src_operand *vsir_program_clone_src_operands( struct vsir_program *program, const struct vsir_src_operand *operands, size_t count); -static bool shader_register_clone_relative_addresses(struct vkd3d_shader_register *reg, struct vsir_program *program) +static bool vsir_operand_clone_indirect_indices(struct vsir_operand *reg, struct vsir_program *program) { size_t i; @@ -588,7 +655,7 @@ static struct vsir_dst_operand *vsir_program_clone_dst_operands( memcpy(ret, operands, count * sizeof(*operands)); for (i = 0; i < count; ++i) { - if (!shader_register_clone_relative_addresses(&ret[i].reg, program)) + if (!vsir_operand_clone_indirect_indices(&ret[i].reg, program)) return NULL; } @@ -607,7 +674,7 @@ static struct vsir_src_operand *vsir_program_clone_src_operands( memcpy(ret, operands, count * sizeof(*operands)); for (i = 0; i < count; ++i) { - if (!shader_register_clone_relative_addresses(&ret[i].reg, program)) + if (!vsir_operand_clone_indirect_indices(&ret[i].reg, program)) return NULL; } @@ -798,8 +865,7 @@ unsigned int vsir_signature_next_location(const struct shader_signature *signatu } struct vkd3d_shader_descriptor_info1 *vsir_program_add_descriptor(struct vsir_program *program, - enum vkd3d_shader_descriptor_type type, unsigned int register_id, - const struct vkd3d_shader_register_range *range, + enum vkd3d_shader_descriptor_type type, unsigned int register_id, const struct vsir_register_range *range, enum vkd3d_shader_resource_type resource_type, enum vsir_data_type resource_data_type) { struct vkd3d_shader_scan_descriptor_info1 *info = &program->descriptors; @@ -876,7 +942,7 @@ static uint32_t vsir_combine_swizzles(uint32_t first, uint32_t second) return ret; } -void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, +void vsir_operand_init(struct vsir_operand *reg, enum vkd3d_shader_register_type reg_type, enum vsir_data_type data_type, unsigned int idx_count) { reg->type = reg_type; @@ -897,7 +963,7 @@ void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_reg reg->alignment = 0; } -static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) +static bool vsir_operand_is_phase_instance_id(const struct vsir_operand *reg) { return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID; } @@ -905,9 +971,10 @@ static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shade void vsir_src_operand_init(struct vsir_src_operand *src, enum vkd3d_shader_register_type reg_type, enum vsir_data_type data_type, unsigned int idx_count) { - vsir_register_init(&src->reg, reg_type, data_type, idx_count); + vsir_operand_init(&src->reg, reg_type, data_type, idx_count); src->swizzle = 0; src->modifiers = VKD3DSPSM_NONE; + src->owner_index = 0; } static void vsir_src_operand_init_const_f32(struct vsir_src_operand *src, float value) @@ -997,7 +1064,7 @@ static void vsir_src_operand_init_ssa_f32(struct vsir_src_operand *src, unsigned vsir_src_operand_init_ssa_scalar(src, idx, VSIR_DATA_F32); } -static void vsir_src_operand_init_ssa_f32v4(struct vsir_src_operand *src, unsigned int idx) +void vsir_src_operand_init_ssa_f32v4(struct vsir_src_operand *src, unsigned int idx) { vsir_src_operand_init_ssa(src, idx, VSIR_DATA_F32, VSIR_DIMENSION_VEC4); } @@ -1031,7 +1098,7 @@ static void vsir_src_operand_init_temp_u32(struct vsir_src_operand *src, unsigne void vsir_dst_operand_init(struct vsir_dst_operand *dst, enum vkd3d_shader_register_type reg_type, enum vsir_data_type data_type, unsigned int idx_count) { - vsir_register_init(&dst->reg, reg_type, data_type, idx_count); + vsir_operand_init(&dst->reg, reg_type, data_type, idx_count); dst->write_mask = VKD3DSP_WRITEMASK_0; dst->modifiers = VKD3DSPDM_NONE; dst->shift = 0; @@ -1081,7 +1148,7 @@ static void vsir_dst_operand_init_ssa_f32(struct vsir_dst_operand *dst, unsigned vsir_dst_operand_init_ssa_scalar(dst, idx, VSIR_DATA_F32); } -static void vsir_dst_operand_init_ssa_f32v4(struct vsir_dst_operand *dst, unsigned int idx) +void vsir_dst_operand_init_ssa_f32v4(struct vsir_dst_operand *dst, unsigned int idx) { vsir_dst_operand_init_ssa(dst, idx, VSIR_DATA_F32, VSIR_DIMENSION_VEC4); } @@ -1252,7 +1319,7 @@ static enum vkd3d_result vsir_program_normalize_addr(struct vsir_program *progra tmp_idx = program->temp_count++; ins->opcode = VSIR_OP_FTOU; - vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); + vsir_operand_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); ins->dst[0].reg.idx[0].offset = tmp_idx; ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; } @@ -1266,7 +1333,7 @@ static enum vkd3d_result vsir_program_normalize_addr(struct vsir_program *progra ins = vsir_program_iterator_current(&it); ins->opcode = VSIR_OP_ROUND_NE; - vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VSIR_DATA_F32, 1); + vsir_operand_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VSIR_DATA_F32, 1); ins->dst[0].reg.idx[0].offset = tmp_idx; ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; @@ -1274,13 +1341,13 @@ static enum vkd3d_result vsir_program_normalize_addr(struct vsir_program *progra if (!vsir_instruction_init_with_params(program, ins2, &ins->location, VSIR_OP_FTOU, 1, 1)) return VKD3D_ERROR_OUT_OF_MEMORY; - vsir_register_init(&ins2->dst[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); + vsir_operand_init(&ins2->dst[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); ins2->dst[0].reg.idx[0].offset = tmp_idx; ins2->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ins2->dst[0].write_mask = ins->dst[0].write_mask; ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; - vsir_register_init(&ins2->src[0].reg, VKD3DSPR_TEMP, VSIR_DATA_F32, 1); + vsir_operand_init(&ins2->src[0].reg, VKD3DSPR_TEMP, VSIR_DATA_F32, 1); ins2->src[0].reg.idx[0].offset = tmp_idx; ins2->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ins2->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; @@ -1299,7 +1366,7 @@ static enum vkd3d_result vsir_program_normalize_addr(struct vsir_program *progra if (tmp_idx == ~0u) tmp_idx = program->temp_count++; - vsir_register_init(&rel->reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); + vsir_operand_init(&rel->reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); rel->reg.idx[0].offset = tmp_idx; rel->reg.dimension = VSIR_DIMENSION_VEC4; } @@ -1376,7 +1443,7 @@ static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program, if (!vsir_instruction_init_with_params(program, ins, &ifc->location, opcode, 1, 2)) return VKD3D_ERROR_OUT_OF_MEMORY; - vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); + vsir_operand_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ins->dst[0].reg.idx[0].offset = *tmp_idx; ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; @@ -1390,7 +1457,7 @@ static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program, return VKD3D_ERROR_OUT_OF_MEMORY; ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; - vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); + vsir_operand_init(&ins->src[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ins->src[0].reg.idx[0].offset = *tmp_idx; ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); @@ -1440,6 +1507,9 @@ static enum vkd3d_result vsir_program_lower_lrp(struct vsir_program *program, st ins->src[0] = src[0]; ins->src[1] = src[1]; vsir_src_operand_init_ssa(&ins->src[2], mad_id, src[2].reg.data_type, src[2].reg.dimension); + /* Copy rel_addrs in second use of SRC0, to avoid multiple owners. */ + if (!vsir_operand_clone_indirect_indices(&ins->src[0].reg, program)) + goto fail; return VKD3D_OK; @@ -1449,6 +1519,84 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; } +static enum vkd3d_result vsir_program_lower_mnxn(struct vsir_program *program, struct vsir_program_iterator *mnxn) +{ + struct vkd3d_shader_instruction *ins = vsir_program_iterator_current(mnxn); + const struct vkd3d_shader_location location = ins->location; + const struct vsir_src_operand *src = ins->src; + const struct vsir_dst_operand *dst = ins->dst; + struct vsir_program_iterator it; + enum vkd3d_shader_opcode opcode; + unsigned int reg_count; + + /* mNxM DST, SRC0, SRC1 + * -> + * dpN DST.x, SRC0, SRC1 + * ... + * dpN DST.(M-1), SRC0, SRC1+(M-1) */ + + switch (ins->opcode) + { + case VSIR_OP_M3x2: + reg_count = 2; + opcode = VSIR_OP_DP3; + break; + case VSIR_OP_M3x3: + reg_count = 3; + opcode = VSIR_OP_DP3; + break; + case VSIR_OP_M3x4: + reg_count = 4; + opcode = VSIR_OP_DP3; + break; + case VSIR_OP_M4x3: + reg_count = 3; + opcode = VSIR_OP_DP4; + break; + case VSIR_OP_M4x4: + reg_count = 4; + opcode = VSIR_OP_DP4; + break; + default: + vkd3d_unreachable(); + } + + VKD3D_ASSERT(dst[0].write_mask == ((1u << reg_count) - 1)); + VKD3D_ASSERT(src[1].reg.idx_count == 1); + + if (!(ins = vsir_program_iterator_insert_before(mnxn, &it, reg_count - 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (unsigned int i = 0; i < reg_count; ++i) + { + if (!vsir_instruction_init_with_params(program, ins, &location, opcode, 1, 2)) + goto fail; + ins->dst[0] = dst[0]; + ins->dst[0].write_mask = (VKD3DSP_WRITEMASK_0 << i); + + ins->src[0] = src[0]; + ins->src[1] = src[1]; + ins->src[1].reg.idx[0].offset += i; + + /* Copy rel_addrs to avoid multiple owners. */ + if (!vsir_operand_clone_indirect_indices(&ins->dst[0].reg, program)) + goto fail; + if (!vsir_operand_clone_indirect_indices(&ins->src[0].reg, program)) + goto fail; + if (!vsir_operand_clone_indirect_indices(&ins->src[1].reg, program)) + goto fail; + + ins = vsir_program_iterator_next(&it); + } + + return VKD3D_OK; + +fail: + vsir_program_iterator_nop_range(&it, mnxn, &location); + + return VKD3D_ERROR_OUT_OF_MEMORY; +} + static enum vkd3d_result vsir_program_lower_nrm(struct vsir_program *program, struct vsir_program_iterator *nrm) { struct vkd3d_shader_instruction *ins = vsir_program_iterator_current(nrm); @@ -1562,6 +1710,9 @@ static enum vkd3d_result vsir_program_lower_pow(struct vsir_program *program, st vsir_dst_operand_init_ssa(&ins->dst[0], mul_id, src[0].reg.data_type, VSIR_DIMENSION_SCALAR); vsir_src_operand_init_ssa(&ins->src[0], movc_id, src[0].reg.data_type, VSIR_DIMENSION_SCALAR); ins->src[1] = src[1]; + /* Copy rel_addrs in second use of SRC1, to avoid multiple owners. */ + if (!vsir_operand_clone_indirect_indices(&ins->src[1].reg, program)) + goto fail; ins = vsir_program_iterator_next(&it); if (!vsir_instruction_init_with_params(program, ins, &location, VSIR_OP_EXP, 1, 1)) @@ -1597,14 +1748,14 @@ static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program if (!vsir_instruction_init_with_params(program, ins, &texkill->location, VSIR_OP_LTO, 1, 2)) return VKD3D_ERROR_OUT_OF_MEMORY; - vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); + vsir_operand_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ins->dst[0].reg.idx[0].offset = *tmp_idx; ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; ins->src[0].reg = texkill->src[0].reg; ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; - vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VSIR_DATA_F32, 0); + vsir_operand_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VSIR_DATA_F32, 0); ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; ins->src[1].reg.u.immconst_f32[0] = 0.0f; ins->src[1].reg.u.immconst_f32[1] = 0.0f; @@ -1622,16 +1773,16 @@ static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VSIR_OP_OR, 1, 2))) return VKD3D_ERROR_OUT_OF_MEMORY; - vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); + vsir_operand_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ins->dst[0].reg.idx[0].offset = *tmp_idx; ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; - vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); + vsir_operand_init(&ins->src[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ins->src[0].reg.idx[0].offset = *tmp_idx; ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); + vsir_operand_init(&ins->src[1].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; ins->src[1].reg.idx[0].offset = *tmp_idx; ins->src[1].swizzle = vkd3d_shader_create_swizzle(j, j, j, j); @@ -1644,7 +1795,7 @@ static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program return VKD3D_ERROR_OUT_OF_MEMORY; ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; - vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); + vsir_operand_init(&ins->src[0].reg, VKD3DSPR_TEMP, VSIR_DATA_U32, 1); ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ins->src[0].reg.idx[0].offset = *tmp_idx; ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); @@ -1765,7 +1916,7 @@ static enum vkd3d_result vsir_program_lower_udiv(struct vsir_program *program, vsir_src_operand_init_ssa(&mov->src[0], src1_id, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); vsir_src_operand_init_ssa(&mov->src[1], src1_id, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); - vsir_register_init(&mov->src[2].reg, VKD3DSPR_IMMCONST, VSIR_DATA_U32, 0); + vsir_operand_init(&mov->src[2].reg, VKD3DSPR_IMMCONST, VSIR_DATA_U32, 0); mov->src[2].reg.dimension = udiv->src[1].reg.dimension; mov->src[2].reg.u.immconst_u32[0] = 1; mov->src[2].reg.u.immconst_u32[1] = 1; @@ -1800,7 +1951,7 @@ static enum vkd3d_result vsir_program_lower_udiv(struct vsir_program *program, vsir_src_operand_init_ssa(&ins->src[0], src1_id, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); vsir_src_operand_init_ssa(&ins->src[1], program->ssa_count, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); - vsir_register_init(&ins->src[2].reg, VKD3DSPR_IMMCONST, VSIR_DATA_U32, 0); + vsir_operand_init(&ins->src[2].reg, VKD3DSPR_IMMCONST, VSIR_DATA_U32, 0); ins->src[2].reg.dimension = udiv->src[1].reg.dimension; ins->src[2].reg.u.immconst_u32[0] = UINT_MAX; ins->src[2].reg.u.immconst_u32[1] = UINT_MAX; @@ -1834,7 +1985,7 @@ static enum vkd3d_result vsir_program_lower_udiv(struct vsir_program *program, vsir_src_operand_init_ssa(&ins->src[0], src1_id, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); vsir_src_operand_init_ssa(&ins->src[1], program->ssa_count, udiv->src[1].reg.data_type, udiv->src[1].reg.dimension); - vsir_register_init(&ins->src[2].reg, VKD3DSPR_IMMCONST, VSIR_DATA_U32, 0); + vsir_operand_init(&ins->src[2].reg, VKD3DSPR_IMMCONST, VSIR_DATA_U32, 0); ins->src[2].reg.dimension = udiv->src[1].reg.dimension; ins->src[2].reg.u.immconst_u32[0] = UINT_MAX; ins->src[2].reg.u.immconst_u32[1] = UINT_MAX; @@ -2069,17 +2220,68 @@ static enum vkd3d_result vsir_program_lower_texcrd(struct vsir_program *program, struct vkd3d_shader_instruction *ins, struct vkd3d_shader_message_context *message_context) { /* texcrd DST, t# -> mov DST, t# */ + ins->opcode = VSIR_OP_MOV; + return VKD3D_OK; +} - if (ins->src[0].modifiers) - { - vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: texcrd source modifier."); - return VKD3D_ERROR_NOT_IMPLEMENTED; - } +static enum vkd3d_result vsir_program_lower_texdepth(struct vsir_program *program, + struct vsir_program_iterator *texdepth, struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_instruction *ins = vsir_program_iterator_current(texdepth); + const struct vkd3d_shader_location location = ins->location; + uint32_t ssa_eq, ssa_div, ssa_sat; + struct vsir_program_iterator it; - ins->opcode = VSIR_OP_MOV; + /* texdepth r5 + * -> + * eq srEQ, r5.y, 0.0 + * div srDIV, r5.x, r5.y + * saturate srSAT, srDIV + * movc oDepth, srEQ, 1.0, srSAT */ + + if (!(ins = vsir_program_iterator_insert_before(texdepth, &it, 3))) + return VKD3D_ERROR_OUT_OF_MEMORY; + if (!vsir_instruction_init_with_params(program, ins, &location, VSIR_OP_EQO, 1, 2)) + goto fail; + ssa_eq = program->ssa_count++; + vsir_dst_operand_init_ssa_bool(&ins->dst[0], ssa_eq); + vsir_src_operand_init_temp_f32v4(&ins->src[0], 5); + ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y); + vsir_src_operand_init_const_f32(&ins->src[1], 0.0f); + + ins = vsir_program_iterator_next(&it); + if (!vsir_instruction_init_with_params(program, ins, &location, VSIR_OP_DIV, 1, 2)) + goto fail; + ssa_div = program->ssa_count++; + vsir_dst_operand_init_ssa_f32(&ins->dst[0], ssa_div); + vsir_src_operand_init_temp_f32v4(&ins->src[0], 5); + ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + vsir_src_operand_init_temp_f32v4(&ins->src[1], 5); + ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y); + + ins = vsir_program_iterator_next(&it); + if (!vsir_instruction_init_with_params(program, ins, &location, VSIR_OP_SATURATE, 1, 1)) + goto fail; + ssa_sat = program->ssa_count++; + vsir_dst_operand_init_ssa_f32(&ins->dst[0], ssa_sat); + vsir_src_operand_init_ssa_f32(&ins->src[0], ssa_div); + + ins = vsir_program_iterator_next(&it); + if (!vsir_instruction_init_with_params(program, ins, &location, VSIR_OP_MOVC, 1, 3)) + goto fail; + vsir_dst_operand_init(&ins->dst[0], VKD3DSPR_DEPTHOUT, VSIR_DATA_F32, 0); + ins->dst[0].reg.dimension = VSIR_DIMENSION_SCALAR; + vsir_src_operand_init_ssa_bool(&ins->src[0], ssa_eq); + vsir_src_operand_init_const_f32(&ins->src[1], 1.0f); + vsir_src_operand_init_ssa_f32(&ins->src[2], ssa_sat); + bitmap_set(program->io_dcls, VKD3DSPR_DEPTHOUT); return VKD3D_OK; + +fail: + vsir_program_iterator_nop_range(&it, texdepth, &location); + + return VKD3D_ERROR_OUT_OF_MEMORY; } static enum vkd3d_result vsir_program_lower_texld_sm1(struct vsir_program *program, @@ -2091,13 +2293,6 @@ static enum vkd3d_result vsir_program_lower_texld_sm1(struct vsir_program *progr /* texld DST, t# -> sample DST, t#, resource#, sampler# */ - if (ins->src[0].modifiers) - { - vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: texld source modifier."); - return VKD3D_ERROR_NOT_IMPLEMENTED; - } - if (!(srcs = vsir_program_get_src_operands(program, 4))) return VKD3D_ERROR_OUT_OF_MEMORY; @@ -2323,12 +2518,44 @@ static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, { struct vkd3d_shader_instruction *ins = vsir_program_iterator_current(it); const struct vkd3d_shader_location location = ins->location; + unsigned int idxN = ins->dst[0].reg.idx[0].offset, idxM; const struct vkd3d_shader_descriptor_info1 *sampler; - unsigned int idx = ins->dst[0].reg.idx[0].offset; struct vsir_src_operand *srcs; + uint32_t coords_swizzle; - /* tex t# -> sample t#, t#, resource#, sampler# - * Note that the t# destination will subsequently be turned into a temp. */ + /* tex tN -> sample tN, tN, resourceN, samplerN + * texreg2ar tN, tM -> sample tN, tM.wx, resourceN, samplerN + * texreg2gb tN, tM -> sample tN, tM.yz, resourceN, samplerN + * texreg2rgb tN, tM -> sample tN, tM.xyz, resourceN, samplerN + * + * Note that the tN destination will subsequently be turned into a temp. + */ + + switch (ins->opcode) + { + case VSIR_OP_TEX: + idxM = idxN; + coords_swizzle = VKD3D_SHADER_SWIZZLE(X, Y, Z, W); + break; + + case VSIR_OP_TEXREG2AR: + idxM = ins->src[0].reg.idx[0].offset; + coords_swizzle = VKD3D_SHADER_SWIZZLE(W, X, X, X); + break; + + case VSIR_OP_TEXREG2GB: + idxM = ins->src[0].reg.idx[0].offset; + coords_swizzle = VKD3D_SHADER_SWIZZLE(Y, Z, X, X); + break; + + case VSIR_OP_TEXREG2RGB: + idxM = ins->src[0].reg.idx[0].offset; + coords_swizzle = VKD3D_SHADER_SWIZZLE(X, Y, Z, X); + break; + + default: + vkd3d_unreachable(); + } /* We run before I/O normalization. */ VKD3D_ASSERT(program->normalisation_level < VSIR_NORMALISED_SM6); @@ -2336,12 +2563,12 @@ static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, if (!(srcs = vsir_program_get_src_operands(program, 4))) return VKD3D_ERROR_OUT_OF_MEMORY; - if (is_texture_projected(program, message_context, idx)) + if (ins->opcode == VSIR_OP_TEX && is_texture_projected(program, message_context, idxN)) { struct vsir_dst_operand *dst = ins->dst; uint32_t coords = program->ssa_count++; - /* div sr0, t#, t#.w */ + /* div sr0, tN, tN.w */ if (!vsir_program_iterator_insert_after(it, 1)) return VKD3D_ERROR_OUT_OF_MEMORY; @@ -2351,7 +2578,7 @@ static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, return VKD3D_ERROR_OUT_OF_MEMORY; vsir_dst_operand_init_ssa_f32v4(&ins->dst[0], coords); vsir_src_operand_init(&ins->src[0], VKD3DSPR_TEXTURE, VSIR_DATA_F32, 1); - ins->src[0].reg.idx[0].offset = idx; + ins->src[0].reg.idx[0].offset = idxM; ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; ins->src[1] = ins->src[0]; @@ -2366,16 +2593,16 @@ static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, else { vsir_src_operand_init(&srcs[0], VKD3DSPR_TEXTURE, VSIR_DATA_F32, 1); - srcs[0].reg.idx[0].offset = idx; + srcs[0].reg.idx[0].offset = idxM; srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; - srcs[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; + srcs[0].swizzle = coords_swizzle; } - vsir_src_operand_init_resource(&srcs[1], idx, idx); - vsir_src_operand_init_sampler(&srcs[2], idx, idx); + vsir_src_operand_init_resource(&srcs[1], idxN, idxN); + vsir_src_operand_init_sampler(&srcs[2], idxN, idxN); - sampler = vkd3d_shader_find_descriptor(&program->descriptors, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, idx); - if (sampler->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE) + sampler = vkd3d_shader_find_descriptor(&program->descriptors, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, idxN); + if (ins->opcode == VSIR_OP_TEX && sampler->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE) { ins->opcode = VSIR_OP_SAMPLE_C; ins->src = srcs; @@ -2448,6 +2675,9 @@ static struct vkd3d_shader_instruction *generate_bump_coords(struct vsir_program ins->src[0].swizzle = vsir_combine_swizzles(perturbation->swizzle, VKD3D_SHADER_SWIZZLE(X, X, X, X)); vsir_src_operand_init_parameter_vec4(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_BUMP_MATRIX_0 + idx, VSIR_DATA_F32); ins->src[2] = *coords; + /* Copy rel_addrs in copy of *perturbation, to avoid multiple owners. */ + if (!vsir_operand_clone_indirect_indices(&ins->src[0].reg, program)) + return false; ins = vsir_program_iterator_next(it); if (!vsir_instruction_init_with_params(program, ins, loc, VSIR_OP_MAD, 1, 3)) @@ -2460,6 +2690,9 @@ static struct vkd3d_shader_instruction *generate_bump_coords(struct vsir_program ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(Z, W, W, W); vsir_src_operand_init_ssa_f32v4(&ins->src[2], ssa_temp); ins->src[2].swizzle = VKD3D_SHADER_SWIZZLE(X, Y, Y, Y); + /* Copy rel_addrs in copy of *perturbation, to avoid multiple owners. */ + if (!vsir_operand_clone_indirect_indices(&ins->src[0].reg, program)) + return false; return ins; } @@ -2534,8 +2767,8 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; } -static bool vsir_register_replace_loop_register_with_temp(struct vsir_program *program, - struct vkd3d_shader_register *reg, unsigned int idxtemp_idx, unsigned int temp_id) +static bool vsir_operand_replace_loop_register_with_temp(struct vsir_operand *reg, + struct vsir_program *program, unsigned int idxtemp_idx, unsigned int temp_id) { if (reg->type == VKD3DSPR_LOOP) { @@ -2548,7 +2781,7 @@ static bool vsir_register_replace_loop_register_with_temp(struct vsir_program *p if (reg->idx[0].rel_addr && reg->idx[0].rel_addr->reg.type == VKD3DSPR_LOOP) { - if (!shader_register_clone_relative_addresses(reg, program)) + if (!vsir_operand_clone_indirect_indices(reg, program)) return false; if (reg->type == VKD3DSPR_INPUT) { @@ -2671,13 +2904,13 @@ static enum vkd3d_result vsir_program_lower_loop(struct vsir_program *program, for (i = 0; i < ins->dst_count; ++i) { - if (!vsir_register_replace_loop_register_with_temp(program, &ins->dst[i].reg, idxtmp_idx, val_id)) + if (!vsir_operand_replace_loop_register_with_temp(&ins->dst[i].reg, program, idxtmp_idx, val_id)) return VKD3D_ERROR_OUT_OF_MEMORY; } for (i = 0; i < ins->src_count; ++i) { - if (!vsir_register_replace_loop_register_with_temp(program, &ins->src[i].reg, idxtmp_idx, val_id)) + if (!vsir_operand_replace_loop_register_with_temp(&ins->src[i].reg, program, idxtmp_idx, val_id)) return VKD3D_ERROR_OUT_OF_MEMORY; } } @@ -2803,6 +3036,9 @@ static enum vkd3d_result vsir_program_lower_texbem(struct vsir_program *program, VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_SCALE_0 + idx, VSIR_DATA_F32); vsir_src_operand_init_parameter(&ins->src[2], VKD3D_SHADER_PARAMETER_NAME_BUMP_LUMINANCE_OFFSET_0 + idx, VSIR_DATA_F32); + /* Copy rel_addrs in additional use of SRC0, to avoid multiple owners. */ + if (!vsir_operand_clone_indirect_indices(&ins->src[0].reg, program)) + return VKD3D_ERROR_OUT_OF_MEMORY; ins = vsir_program_iterator_next(it); if (!vsir_instruction_init_with_params(program, ins, &location, VSIR_OP_MUL, 1, 2)) @@ -2927,10 +3163,23 @@ static enum vkd3d_result vsir_program_lower_d3dbc_instructions(struct vsir_progr ret = vsir_program_lower_lrp(program, &it); break; + case VSIR_OP_M3x2: + case VSIR_OP_M3x3: + case VSIR_OP_M3x4: + case VSIR_OP_M4x3: + case VSIR_OP_M4x4: + ret = vsir_program_lower_mnxn(program, &it); + break; + case VSIR_OP_NRM: ret = vsir_program_lower_nrm(program, &it); break; + case VSIR_OP_PHASE: + vkd3d_shader_instruction_make_nop(ins); + ret = VKD3D_OK; + break; + case VSIR_OP_POW: ret = vsir_program_lower_pow(program, &it); break; @@ -2953,11 +3202,18 @@ static enum vkd3d_result vsir_program_lower_d3dbc_instructions(struct vsir_progr ret = vsir_program_lower_texcrd(program, ins, message_context); break; + case VSIR_OP_TEXDEPTH: + ret = vsir_program_lower_texdepth(program, &it, message_context); + break; + case VSIR_OP_TEXKILL: ret = vsir_program_lower_texkill(program, &it, &tmp_idx); break; case VSIR_OP_TEX: + case VSIR_OP_TEXREG2AR: + case VSIR_OP_TEXREG2GB: + case VSIR_OP_TEXREG2RGB: ret = vsir_program_lower_tex(program, &it, message_context); break; @@ -2978,7 +3234,6 @@ static enum vkd3d_result vsir_program_lower_d3dbc_instructions(struct vsir_progr ret = vsir_program_lower_texldl(program, ins); break; - case VSIR_OP_TEXDEPTH: case VSIR_OP_TEXDP3: case VSIR_OP_TEXDP3TEX: case VSIR_OP_TEXM3x2PAD: @@ -2988,9 +3243,6 @@ static enum vkd3d_result vsir_program_lower_d3dbc_instructions(struct vsir_progr case VSIR_OP_TEXM3x3SPEC: case VSIR_OP_TEXM3x3TEX: case VSIR_OP_TEXM3x3VSPEC: - case VSIR_OP_TEXREG2AR: - case VSIR_OP_TEXREG2GB: - case VSIR_OP_TEXREG2RGB: vkd3d_shader_error(ctx->message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Aborting due to unimplemented feature: Combined sampler instruction \"%s\" (%#x).", vsir_opcode_get_name(ins->opcode, "<unknown>"), ins->opcode); @@ -3116,7 +3368,7 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; } -static bool is_input_register_with_loop_relative_addressing(struct vkd3d_shader_register *reg) +static bool is_input_register_with_loop_relative_addressing(struct vsir_operand *reg) { if (reg->type != VKD3DSPR_INPUT) return false; @@ -3202,6 +3454,38 @@ static enum vkd3d_result vsir_program_lower_modifiers(struct vsir_program *progr new_opcodes[1] = VSIR_OP_NEG; break; + case VKD3DSPSM_DW: + case VKD3DSPSM_DZ: + if (!(new_ins = vsir_program_iterator_insert_before(&it, &new_it, 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + ins = vsir_program_iterator_current(&it); + + if (!vsir_instruction_init_with_params(program, new_ins, &ins->location, VSIR_OP_DIV, 1, 2)) + { + vkd3d_shader_instruction_make_nop(new_ins); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + vsir_dst_operand_init_ssa_f32v4(&new_ins->dst[0], program->ssa_count); + new_ins->src[0] = *src; + new_ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; + new_ins->src[0].modifiers = VKD3DSPSM_NONE; + new_ins->src[1] = new_ins->src[0]; + if (!vsir_operand_clone_indirect_indices(&new_ins->src[1].reg, program)) + { + vkd3d_shader_instruction_make_nop(new_ins); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + if (src->modifiers == VKD3DSPSM_DW) + new_ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); + else + new_ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z); + + vsir_src_operand_init_ssa_f32v4(src, program->ssa_count); + + ++program->ssa_count; + continue; + default: vkd3d_shader_error(ctx->message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Aborting due to unimplemented feature: Source modifier %#x.", src->modifiers); @@ -3481,6 +3765,69 @@ static enum vkd3d_result vsir_program_normalise_ps1_output(struct vsir_program * return VKD3D_OK; } +/* In vs_3_0, the point size output is a normal output register declared with dcl_psize. + * Remap writes to that register to the point size system value register. */ +static enum vkd3d_result vsir_program_normalise_vs3_point_size_output(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vkd3d_shader_instruction *ins; + const struct signature_element *e; + + if (!program->has_point_size) + return VKD3D_OK; + + e = vsir_signature_find_element_by_name(&program->output_signature, "PSIZE", 0); + VKD3D_ASSERT(e); + + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) + { + if (vsir_instruction_is_dcl(ins)) + continue; + + for (size_t j = 0; j < ins->dst_count; ++j) + { + struct vsir_operand *reg = &ins->dst[j].reg; + + /* Remap the PSIZE output register to the point size sysval register. */ + if (reg->type == VKD3DSPR_OUTPUT && reg->idx[0].offset == e->register_index) + { + reg->type = VKD3DSPR_RASTOUT; + reg->idx[0].offset = VSIR_RASTOUT_POINT_SIZE; + reg->idx_count = 1; + } + } + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_clamp_vs2_color_output(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vkd3d_shader_instruction *ins; + + if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX) + return VKD3D_OK; + + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) + { + if (vsir_instruction_is_dcl(ins)) + continue; + + for (size_t j = 0; j < ins->dst_count; ++j) + { + struct vsir_operand *reg = &ins->dst[j].reg; + + if (reg->type == VKD3DSPR_ATTROUT) + ins->dst[j].modifiers |= VKD3DSPDM_SATURATE; + } + } + + return VKD3D_OK; +} + static struct signature_element *add_signature_element(struct shader_signature *signature, const char *semantic_name, uint32_t semantic_index, uint32_t mask, uint32_t register_index, enum vkd3d_shader_interpolation_mode interpolation_mode) @@ -3756,6 +4103,8 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program map->input_register_index, map->input_mask); e = &signature->elements[signature->element_count++]; memset(e, 0, sizeof(*e)); + if (!(e->semantic_name = vkd3d_strdup("$unused"))) + return VKD3D_ERROR_OUT_OF_MEMORY; e->sysval_semantic = VKD3D_SHADER_SV_NONE; e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; e->register_count = 1; @@ -3819,11 +4168,11 @@ struct hull_flattener }; static void flattener_fixup_ssa_register(struct hull_flattener *normaliser, - struct vkd3d_shader_register *reg, unsigned int instance_id) + struct vsir_operand *reg, unsigned int instance_id) { unsigned int id; - if (!register_is_ssa(reg)) + if (!vsir_operand_is_ssa(reg)) return; /* No need to alter the first copy, they are already not conflicting. */ @@ -3838,7 +4187,7 @@ static void flattener_fixup_ssa_register(struct hull_flattener *normaliser, } static void flattener_fixup_register_indices(struct hull_flattener *normaliser, - struct vkd3d_shader_register *reg, unsigned int instance_id) + struct vsir_operand *reg, unsigned int instance_id) { unsigned int i; @@ -3849,7 +4198,7 @@ static void flattener_fixup_register_indices(struct hull_flattener *normaliser, if (reg->idx[i].rel_addr) { flattener_fixup_ssa_register(normaliser, ®->idx[i].rel_addr->reg, instance_id); - if (shader_register_is_phase_instance_id(®->idx[i].rel_addr->reg)) + if (vsir_operand_is_phase_instance_id(®->idx[i].rel_addr->reg)) { reg->idx[i].rel_addr = NULL; reg->idx[i].offset += instance_id; @@ -3861,15 +4210,15 @@ static void flattener_fixup_register_indices(struct hull_flattener *normaliser, static void flattener_fixup_registers(struct hull_flattener *normaliser, struct vkd3d_shader_instruction *ins, unsigned int instance_id) { - struct vkd3d_shader_register *reg; + struct vsir_operand *reg; unsigned int i; for (i = 0; i < ins->src_count; ++i) { reg = &ins->src[i].reg; - if (shader_register_is_phase_instance_id(reg)) + if (vsir_operand_is_phase_instance_id(reg)) { - vsir_register_init(reg, VKD3DSPR_IMMCONST, reg->data_type, 0); + vsir_operand_init(reg, VKD3DSPR_IMMCONST, reg->data_type, 0); reg->u.immconst_u32[0] = instance_id; continue; } @@ -4025,7 +4374,6 @@ struct control_point_normaliser { struct vsir_program *program; enum vkd3d_shader_opcode phase; - struct vsir_src_operand *outpointid_param; }; struct vsir_src_operand *vsir_program_create_outpointid_param(struct vsir_program *program) @@ -4033,19 +4381,22 @@ struct vsir_src_operand *vsir_program_create_outpointid_param(struct vsir_progra struct vsir_src_operand *rel_addr; if (!(rel_addr = vsir_program_get_src_operands(program, 1))) + { + ERR("Failed to allocate source operand.\n"); return NULL; + } - vsir_register_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VSIR_DATA_U32, 0); + vsir_operand_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VSIR_DATA_U32, 0); rel_addr->swizzle = 0; rel_addr->modifiers = 0; return rel_addr; } -static void vsir_dst_operand_normalise_outpointid(struct vsir_dst_operand *dst, +static bool vsir_dst_operand_normalise_outpointid(struct vsir_dst_operand *dst, struct control_point_normaliser *normaliser) { - struct vkd3d_shader_register *reg = &dst->reg; + struct vsir_operand *reg = &dst->reg; if (vsir_opcode_is_control_point_phase(normaliser->phase) && reg->type == VKD3DSPR_OUTPUT) { @@ -4054,9 +4405,11 @@ static void vsir_dst_operand_normalise_outpointid(struct vsir_dst_operand *dst, reg->idx[1] = reg->idx[0]; /* The control point id param is implicit here. Avoid later complications by inserting it. */ reg->idx[0].offset = 0; - reg->idx[0].rel_addr = normaliser->outpointid_param; + if (!(reg->idx[0].rel_addr = vsir_program_create_outpointid_param(normaliser->program))) + return false; ++reg->idx_count; } + return true; } static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser, @@ -4094,18 +4447,18 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p return VKD3D_ERROR_OUT_OF_MEMORY; } - VKD3D_ASSERT(normaliser->outpointid_param); - vsir_dst_operand_init_io(&ins->dst[0], VKD3DSPR_OUTPUT, e, 2); ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ins->dst[0].reg.idx[0].offset = 0; - ins->dst[0].reg.idx[0].rel_addr = normaliser->outpointid_param; + if (!(ins->dst[0].reg.idx[0].rel_addr = vsir_program_create_outpointid_param(normaliser->program))) + return VKD3D_ERROR_OUT_OF_MEMORY; ins->dst[0].reg.idx[1].offset = e->register_index; vsir_src_operand_init_io(&ins->src[0], VKD3DSPR_INPUT, e, 2); ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ins->src[0].reg.idx[0].offset = 0; - ins->src[0].reg.idx[0].rel_addr = normaliser->outpointid_param; + if (!(ins->src[0].reg.idx[0].rel_addr = vsir_program_create_outpointid_param(normaliser->program))) + return VKD3D_ERROR_OUT_OF_MEMORY; ins->src[0].reg.idx[1].offset = e->register_index; ins = vsir_program_iterator_next(dst_it); @@ -4135,11 +4488,6 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i return VKD3D_OK; } - if (!(normaliser.outpointid_param = vsir_program_create_outpointid_param(program))) - { - ERR("Failed to allocate src param.\n"); - return VKD3D_ERROR_OUT_OF_MEMORY; - } normaliser.program = program; it = vsir_program_iterator(&normaliser.program->instructions); normaliser.phase = VSIR_OP_INVALID; @@ -4158,7 +4506,8 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i break; for (j = 0; j < ins->dst_count; ++j) { - vsir_dst_operand_normalise_outpointid(&ins->dst[j], &normaliser); + if (!vsir_dst_operand_normalise_outpointid(&ins->dst[j], &normaliser)) + return VKD3D_ERROR_OUT_OF_MEMORY; } break; } @@ -4224,6 +4573,7 @@ struct io_normaliser struct io_normaliser_register_data input_range_map[MAX_REG_OUTPUT]; struct io_normaliser_register_data output_range_map[MAX_REG_OUTPUT]; struct io_normaliser_register_data pc_range_map[MAX_REG_OUTPUT]; + uint32_t vpos_ssa, vface_ssa; bool use_vocp; }; @@ -4281,7 +4631,6 @@ static enum vkd3d_result range_map_set_register_range(struct io_normaliser *norm { if (range_map[register_idx].component[component_idx].register_count == UINT8_MAX) { - WARN("Conflicting index ranges.\n"); vkd3d_shader_error(normaliser->message_context, NULL, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "Conflicting index ranges."); return VKD3D_ERROR_INVALID_SHADER; @@ -4309,7 +4658,6 @@ static enum vkd3d_result range_map_set_register_range(struct io_normaliser *norm * The latter is validated in the TPF reader. */ if (range_map[r].component[c].register_count && is_dcl_indexrange) { - WARN("Conflicting index ranges.\n"); vkd3d_shader_error(normaliser->message_context, NULL, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "Conflicting index ranges."); return VKD3D_ERROR_INVALID_SHADER; @@ -4327,7 +4675,7 @@ static enum vkd3d_result io_normaliser_add_index_range(struct io_normaliser *nor const struct vkd3d_shader_instruction *ins) { const struct vkd3d_shader_index_range *range = &ins->declaration.index_range; - const struct vkd3d_shader_register *reg = &range->dst.reg; + const struct vsir_operand *reg = &range->dst.reg; struct io_normaliser_register_data *range_map; const struct shader_signature *signature; uint32_t mask, used_mask; @@ -4597,7 +4945,7 @@ out: return ret; } -static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_shader_register *reg, +static unsigned int vsir_operand_normalise_arrayed_addressing(struct vsir_operand *reg, unsigned int id_idx, unsigned int register_index) { VKD3D_ASSERT(id_idx < ARRAY_SIZE(reg->idx) - 1); @@ -4621,8 +4969,8 @@ static bool vsir_dst_operand_io_normalise(struct vsir_dst_operand *dst, struct io_normaliser *normaliser, struct vkd3d_shader_instruction *ins) { unsigned int id_idx, reg_idx, write_mask, element_idx; - struct vkd3d_shader_register *reg = &dst->reg; const struct shader_signature *signature; + struct vsir_operand *reg = &dst->reg; const struct signature_element *e; write_mask = dst->write_mask; @@ -4701,7 +5049,7 @@ static bool vsir_dst_operand_io_normalise(struct vsir_dst_operand *dst, e = &signature->elements[element_idx]; if (vsir_signature_element_is_array(e, normaliser->normalisation_flags)) - id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); + id_idx = vsir_operand_normalise_arrayed_addressing(reg, id_idx, e->register_index); /* Replace the register index with the signature element index */ reg->idx[id_idx].offset = element_idx; @@ -4714,8 +5062,8 @@ static void vsir_src_operand_io_normalise(struct vsir_src_operand *src, struct io_normaliser *normaliser, struct vkd3d_shader_instruction *ins) { unsigned int i, id_idx, reg_idx, write_mask, element_idx, component_idx; - struct vkd3d_shader_register *reg = &src->reg; const struct shader_signature *signature; + struct vsir_operand *reg = &src->reg; const struct signature_element *e; /* Input/output registers from one phase can be used as inputs in @@ -4767,6 +5115,19 @@ static void vsir_src_operand_io_normalise(struct vsir_src_operand *src, signature = normaliser->input_signature; break; + case VKD3DSPR_MISCTYPE: + if (reg->idx[0].offset == 0) + { + reg->type = VKD3DSPR_SSA; + reg->idx[0].offset = normaliser->vpos_ssa; + } + else if (reg->idx[0].offset == 1) + { + reg->type = VKD3DSPR_SSA; + reg->idx[0].offset = normaliser->vface_ssa; + } + return; + default: return; } @@ -4784,7 +5145,7 @@ static void vsir_src_operand_io_normalise(struct vsir_src_operand *src, e = &signature->elements[element_idx]; if (vsir_signature_element_is_array(e, normaliser->normalisation_flags)) - id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); + id_idx = vsir_operand_normalise_arrayed_addressing(reg, id_idx, e->register_index); reg->idx[id_idx].offset = element_idx; reg->idx_count = id_idx + 1; @@ -4798,6 +5159,66 @@ static void vsir_src_operand_io_normalise(struct vsir_src_operand *src, } } +static enum vkd3d_result io_normaliser_init_misctype(struct vsir_program_iterator *it, + struct vsir_program *program, struct io_normaliser *normaliser) +{ + struct vkd3d_shader_location location = vsir_program_iterator_current(it)->location; + const struct signature_element *element; + struct vkd3d_shader_instruction *ins; + unsigned int signature_idx; + + if (normaliser->shader_type != VKD3D_SHADER_TYPE_PIXEL || normaliser->major != 3) + return VKD3D_OK; + + if (vsir_signature_find_sysval(normaliser->input_signature, VKD3D_SHADER_SV_POSITION, 0, &signature_idx)) + { + element = &normaliser->input_signature->elements[signature_idx]; + + /* sm4 (and therefore vsir) SV_Position returns the pixel center, + * i.e. coordinates ending in .5, but vPos should return integer + * coordinates. Take the floor. */ + + if (!(ins = vsir_program_iterator_insert_before_and_move(it, 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + normaliser->vpos_ssa = program->ssa_count++; + if (!vsir_instruction_init_with_params(program, ins, &location, VSIR_OP_ROUND_Z, 1, 1)) + { + vsir_instruction_init(ins, &location, VSIR_OP_NOP); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + vsir_dst_operand_init_ssa_f32v4(&ins->dst[0], normaliser->vpos_ssa); + vsir_src_operand_init_io(&ins->src[0], VKD3DSPR_INPUT, element, 1); + ins->src[0].reg.idx[0].offset = signature_idx; + vsir_program_iterator_next(it); + } + + if (vsir_signature_find_sysval(normaliser->input_signature, VKD3D_SHADER_SV_IS_FRONT_FACE, 0, &signature_idx)) + { + element = &normaliser->input_signature->elements[signature_idx]; + + /* sm4 (and therefore vsir) SV_IsFrontFace returns ~0u for front + * and 0 for back. vFace returns 1.0 for front and -1.0 for back. */ + + if (!(ins = vsir_program_iterator_insert_before_and_move(it, 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + normaliser->vface_ssa = program->ssa_count++; + if (!vsir_instruction_init_with_params(program, ins, &location, VSIR_OP_MOVC, 1, 3)) + { + vsir_instruction_init(ins, &location, VSIR_OP_NOP); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + vsir_dst_operand_init_ssa_f32v4(&ins->dst[0], normaliser->vface_ssa); + vsir_src_operand_init_io(&ins->src[0], VKD3DSPR_INPUT, element, 1); + ins->src[0].reg.idx[0].offset = signature_idx; + ins->src[0].reg.data_type = VSIR_DATA_U32; + vsir_src_operand_init_const_f32(&ins->src[1], 1.0f); + vsir_src_operand_init_const_f32(&ins->src[2], -1.0f); + vsir_program_iterator_next(it); + } + + return VKD3D_OK; +} + static void shader_instruction_normalise_io_params(struct vkd3d_shader_instruction *ins, struct io_normaliser *normaliser) { @@ -4834,6 +5255,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); struct io_normaliser normaliser = {ctx->message_context, VKD3D_OK}; struct vkd3d_shader_instruction *ins; + bool misctype_initialized = false; enum vkd3d_result ret; VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_HULL_CONTROL_POINT_IO); @@ -4845,6 +5267,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program normaliser.output_signature = &program->output_signature; normaliser.patch_constant_signature = &program->patch_constant_signature; normaliser.normalisation_flags = &program->normalisation_flags; + normaliser.vpos_ssa = normaliser.vface_ssa = ~0u; for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) { @@ -4861,6 +5284,13 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program normaliser.phase = ins->opcode; break; default: + if (!misctype_initialized && ins->opcode != VSIR_OP_NOP) + { + if ((ret = io_normaliser_init_misctype(&it, program, &normaliser))) + return ret; + misctype_initialized = true; + ins = vsir_program_iterator_current(&it); + } break; } } @@ -4896,7 +5326,7 @@ struct flat_constants_normaliser size_t def_count, defs_capacity; }; -static bool get_flat_constant_register_type(const struct vkd3d_shader_register *reg, +static bool get_flat_constant_register_type(const struct vsir_operand *reg, enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index, struct vsir_src_operand **rel_addr) { static const struct @@ -5626,7 +6056,6 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X)) { - WARN("Unexpected src swizzle %#x.\n", src->swizzle); vkd3d_shader_error(message_context, &instruction->location, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "The swizzle for a switch case value is not scalar X."); @@ -5975,8 +6404,8 @@ static bool ssas_to_temps_alloc_init(struct ssas_to_temps_alloc *alloc, unsigned /* This is idempotent: it can be safely applied more than once on the * same register. */ -static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct ssas_to_temps_alloc *alloc, - struct vkd3d_shader_register *reg) +static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, + struct ssas_to_temps_alloc *alloc, struct vsir_operand *reg) { unsigned int i; @@ -6596,7 +7025,7 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) break; case VSIR_OP_BRANCH: - shape = vsir_register_is_label(&end->src[0].reg) ? "ellipse" : "box"; + shape = vsir_operand_is_label(&end->src[0].reg) ? "ellipse" : "box"; break; default: @@ -6798,7 +7227,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program break; case VSIR_OP_BRANCH: - if (vsir_register_is_label(&end->src[0].reg)) + if (vsir_operand_is_label(&end->src[0].reg)) { if ((ret = vsir_cfg_add_edge(cfg, block, &end->src[0])) < 0) goto fail; @@ -6994,7 +7423,6 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) { struct vkd3d_shader_instruction *begin = vsir_program_iterator_current(&header->begin); - FIXME("Block %u is header to more than one loop, this is not implemented.\n", header->label); vkd3d_shader_error(cfg->message_context, &begin->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Block %u is header to more than one loop, this is not implemented.", header->label); return VKD3D_ERROR_NOT_IMPLEMENTED; @@ -7079,7 +7507,6 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) { struct vkd3d_shader_instruction *begin = vsir_program_iterator_current(&block->begin); - WARN("Unexpected entry point %u.\n", block->label); vkd3d_shader_error(cfg->message_context, &begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Block %u is unreachable from the entry point.", block->label); ret = VKD3D_ERROR_INVALID_SHADER; @@ -7091,7 +7518,6 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) { struct vkd3d_shader_instruction *begin = vsir_program_iterator_current(&cfg->entry->begin); - WARN("Entry point has %u incoming forward edges.\n", in_degrees[cfg->entry->label - 1]); vkd3d_shader_error(cfg->message_context, &begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "The entry point block has %u incoming forward edges.", in_degrees[cfg->entry->label - 1]); ret = VKD3D_ERROR_INVALID_SHADER; @@ -7197,7 +7623,6 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) struct vkd3d_shader_instruction *begin = vsir_program_iterator_current(&cfg->entry->begin); /* There is a cycle of forward edges. */ - WARN("The control flow graph is not reducible.\n"); vkd3d_shader_error(cfg->message_context, &begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "The control flow graph is not reducible."); ret = VKD3D_ERROR_INVALID_SHADER; @@ -7491,7 +7916,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) struct vsir_cfg_edge_action action_true, action_false; bool invert_condition = false; - if (vsir_register_is_label(&end->src[0].reg)) + if (vsir_operand_is_label(&end->src[0].reg)) { unsigned int target = label_from_src_operand(&end->src[0]); struct vsir_block *successor = &cfg->blocks[target - 1]; @@ -8405,12 +8830,12 @@ fail: return ret; } -static void register_map_undominated_use(struct vkd3d_shader_register *reg, struct ssas_to_temps_alloc *alloc, +static void register_map_undominated_use(struct vsir_operand *reg, struct ssas_to_temps_alloc *alloc, struct vsir_block *block, struct vsir_block **origin_blocks) { unsigned int i; - if (register_is_ssa(reg)) + if (vsir_operand_is_ssa(reg)) { i = reg->idx[0].offset; if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block)) @@ -8461,7 +8886,7 @@ static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct v { for (j = 0; j < ins->dst_count; ++j) { - if (register_is_ssa(&ins->dst[j].reg)) + if (vsir_operand_is_ssa(&ins->dst[j].reg)) origin_blocks[ins->dst[j].reg.idx[0].offset] = block; } } @@ -9083,7 +9508,6 @@ static enum vkd3d_result sysval_array_normaliser_add_output_copy( struct sysval_array_normaliser *normaliser, struct vsir_program_iterator *it) { struct vsir_program *program = normaliser->ctx->program; - struct vsir_src_operand *outpointid_param = NULL; unsigned int output_component_count = 0; struct vkd3d_shader_instruction *mov; struct signature_element *element; @@ -9099,13 +9523,6 @@ static enum vkd3d_result sysval_array_normaliser_add_output_copy( element = &program->output_signature.elements[normaliser->element_idx]; loc = vsir_program_iterator_current(it)->location; - if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL - && !(outpointid_param = vsir_program_create_outpointid_param(program))) - { - ERR("Failed to allocate outpointid param.\n"); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - for (unsigned int q = 0; q < normaliser->reg_count; ++q) { for (unsigned int k = 0; k < VKD3D_VEC4_SIZE; ++k) @@ -9128,11 +9545,12 @@ static enum vkd3d_result sysval_array_normaliser_add_output_copy( dst->reg.idx[1].offset = normaliser->element_idx; dst->reg.dimension = VSIR_DIMENSION_VEC4; dst->write_mask = VKD3DSP_WRITEMASK_0; - if (outpointid_param) + if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) { dst->reg.idx_count = 3; dst->reg.idx[2] = dst->reg.idx[1]; - dst->reg.idx[1].rel_addr = outpointid_param; + if (!(dst->reg.idx[1].rel_addr = vsir_program_create_outpointid_param(program))) + return VKD3D_ERROR_OUT_OF_MEMORY; dst->reg.idx[1].offset = 0; } @@ -9275,9 +9693,9 @@ static enum vkd3d_result sysval_array_normaliser_dcl_indexable_temp( return VKD3D_OK; } -static bool vsir_program_validate_outpointid_control_point_index(const struct vkd3d_shader_register *reg) +static bool vsir_program_validate_outpointid_control_point_index(const struct vsir_operand *reg) { - const struct vkd3d_shader_register_index *index; + const struct vsir_register_index *index; if (reg->idx_count < 2) return false; @@ -9337,10 +9755,10 @@ static bool vsir_program_validate_outpointid_control_point_index(const struct vk * relevant signature element 'e'. */ static enum vkd3d_result sysval_array_normaliser_map_register(struct sysval_array_normaliser *normaliser, - struct vsir_program_iterator *it, struct vkd3d_shader_register *reg, unsigned int *src_swizzle) + struct vsir_program_iterator *it, struct vsir_operand *reg, unsigned int *src_swizzle) { - struct vkd3d_shader_register_index i_idx = {0}, p_idx = {0}; struct vsir_program *program = normaliser->ctx->program; + struct vsir_register_index i_idx = {0}, p_idx = {0}; unsigned int element_index, control_point_count; struct vkd3d_shader_instruction *ssa_ins; struct shader_signature *signature; @@ -9415,7 +9833,7 @@ static enum vkd3d_result sysval_array_normaliser_map_register(struct sysval_arra if (!vsir_instruction_init_with_params(program, ssa_ins, &loc, VSIR_OP_IMUL_LOW, 1, 2)) return VKD3D_ERROR_OUT_OF_MEMORY; - vsir_register_init(&ssa_ins->dst[0].reg, VKD3DSPR_SSA, VSIR_DATA_U32, 1); + vsir_operand_init(&ssa_ins->dst[0].reg, VKD3DSPR_SSA, VSIR_DATA_U32, 1); ssa_ins->dst[0].reg.idx[0].offset = program->ssa_count++; ssa_ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ssa_ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; @@ -9428,11 +9846,11 @@ static enum vkd3d_result sysval_array_normaliser_map_register(struct sysval_arra if (!vsir_instruction_init_with_params(program, ssa_ins, &loc, VSIR_OP_ADD, 1, 2)) return VKD3D_ERROR_OUT_OF_MEMORY; - vsir_register_init(&ssa_ins->dst[0].reg, VKD3DSPR_SSA, VSIR_DATA_U32, 1); + vsir_operand_init(&ssa_ins->dst[0].reg, VKD3DSPR_SSA, VSIR_DATA_U32, 1); ssa_ins->dst[0].reg.idx[0].offset = program->ssa_count++; ssa_ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ssa_ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; - vsir_register_init(&ssa_ins->src[0].reg, VKD3DSPR_SSA, VSIR_DATA_U32, 1); + vsir_operand_init(&ssa_ins->src[0].reg, VKD3DSPR_SSA, VSIR_DATA_U32, 1); ssa_ins->src[0].reg.idx[0].offset = program->ssa_count - 2; ssa_ins->src[1] = *i_idx.rel_addr; } @@ -9442,7 +9860,7 @@ static enum vkd3d_result sysval_array_normaliser_map_register(struct sysval_arra reg->idx[1].offset = normaliser->reg_count * p_idx.offset + i_idx.offset + q; if (!(reg->idx[1].rel_addr = vsir_program_get_src_operands(program, 1))) return VKD3D_ERROR_OUT_OF_MEMORY; - vsir_register_init(®->idx[1].rel_addr->reg, VKD3DSPR_SSA, VSIR_DATA_U32, 1); + vsir_operand_init(®->idx[1].rel_addr->reg, VKD3DSPR_SSA, VSIR_DATA_U32, 1); reg->idx[1].rel_addr->reg.idx[0].offset = program->ssa_count - 1; reg->idx[1].rel_addr->reg.dimension = VSIR_DIMENSION_VEC4; reg->idx[1].rel_addr->swizzle = VKD3D_SHADER_SWIZZLE_X; @@ -9451,10 +9869,16 @@ static enum vkd3d_result sysval_array_normaliser_map_register(struct sysval_arra else { reg->idx[1].offset = normaliser->reg_count * p_idx.offset + i_idx.offset + q; - reg->idx[1].rel_addr = i_idx.rel_addr; - } - - if (src_swizzle) + reg->idx[1].rel_addr = NULL; + if (i_idx.rel_addr) + { + if (!(reg->idx[1].rel_addr = vsir_program_get_src_operands(program, 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + *reg->idx[1].rel_addr = *i_idx.rel_addr; + } + } + + if (src_swizzle) *src_swizzle = vsir_combine_swizzles(vsir_swizzle_from_writemask(element->mask), *src_swizzle); return VKD3D_OK; @@ -9490,7 +9914,7 @@ static enum vkd3d_result sysval_array_normaliser_map_instruction( return VKD3D_OK; } -static void shader_register_remove_signature_element(struct vkd3d_shader_register *reg, +static void vsir_operand_remove_signature_element(struct vsir_operand *reg, enum vkd3d_shader_register_type type, unsigned int index) { unsigned int current_idx; @@ -9498,7 +9922,7 @@ static void shader_register_remove_signature_element(struct vkd3d_shader_registe for (unsigned int i = 0; i < reg->idx_count; ++i) { if (reg->idx[i].rel_addr) - shader_register_remove_signature_element(®->idx[i].rel_addr->reg, type, index); + vsir_operand_remove_signature_element(®->idx[i].rel_addr->reg, type, index); } if (reg->type != type) @@ -9538,9 +9962,9 @@ static void vsir_program_remove_signature_element(struct vsir_program *program, if (vsir_instruction_is_dcl(ins)) continue; for (unsigned int i = 0; i < ins->dst_count; ++i) - shader_register_remove_signature_element(&ins->dst[i].reg, type, index); + vsir_operand_remove_signature_element(&ins->dst[i].reg, type, index); for (unsigned int i = 0; i < ins->src_count; ++i) - shader_register_remove_signature_element(&ins->src[i].reg, type, index); + vsir_operand_remove_signature_element(&ins->src[i].reg, type, index); } vsir_signature_element_cleanup(&signature->elements[index]); @@ -10848,12 +11272,15 @@ struct liveness_tracker bool written; bool fixed_mask; uint8_t mask; + unsigned int interior_loop_start, interior_loop_depth; unsigned int first_write, last_access, last_read; } *ssa_regs, *temp_regs; }; static void liveness_track_src(struct liveness_tracker *tracker, struct vsir_src_operand *src, unsigned int index) { + struct liveness_tracker_reg *reg; + for (unsigned int k = 0; k < src->reg.idx_count; ++k) { if (src->reg.idx[k].rel_addr) @@ -10861,14 +11288,21 @@ static void liveness_track_src(struct liveness_tracker *tracker, struct vsir_src } if (src->reg.type == VKD3DSPR_SSA) - { - tracker->ssa_regs[src->reg.idx[0].offset].last_read = index; - tracker->ssa_regs[src->reg.idx[0].offset].last_access = index; - } + reg = &tracker->ssa_regs[src->reg.idx[0].offset]; else if (src->reg.type == VKD3DSPR_TEMP) + reg = &tracker->temp_regs[src->reg.idx[0].offset]; + else + return; + + reg->last_read = index; + reg->last_access = index; + if (!reg->written) { - tracker->temp_regs[src->reg.idx[0].offset].last_read = index; - tracker->temp_regs[src->reg.idx[0].offset].last_access = index; + WARN("Register %s%u used uninitialised in instruction %u. Recording implicit write.\n", + vsir_register_type_get_name(src->reg.type, NULL), src->reg.idx[0].offset, index); + reg->first_write = index; + reg->last_access = index; + reg->written = true; } } @@ -10993,49 +11427,70 @@ static enum vkd3d_result track_liveness(struct vsir_program *program, struct liv { if (!loop_depth++) loop_start = i; + + for (unsigned int j = 0; j < program->ssa_count; ++j) + { + struct liveness_tracker_reg *reg = &tracker->ssa_regs[j]; + + /* Only track the topmost loop started after this SSA was + * written. We need to extend the liveness to its end + * if it is in fact read at any point during that loop. */ + if (reg->written && !reg->interior_loop_depth) + { + reg->interior_loop_start = i; + reg->interior_loop_depth = loop_depth; + } + } } else if (ins->opcode == VSIR_OP_ENDLOOP || ins->opcode == VSIR_OP_ENDREP) { - if (!--loop_depth) + --loop_depth; + + /* SSA registers should always be written before they are read. + * Moreover, if they are written in a loop, all reads must be + * inside the same loop. However, reads can be inside a loop + * that follows the write. + * + * In this case, the register must not be scratched between + * iterations of the loop, so we need to extend the liveness + * to the end of the loop. We track the beginning of each + * immediate child loop that follows the write, and if the last + * access was after that loop began, extend it to the loop end. + * + * For temps, we don't have these restrictions. A temp can be + * written at the end of the loop and read earlier in the same + * loop, and the value has to be preserved, so we need to extend + * both the first write and last access. It may be possible to + * be more sophisticated, but many of the cases that matter are + * affected by other optimizations such as copy propagation + * anyway. + */ + for (unsigned int j = 0; j < program->ssa_count; ++j) { - /* Go through the allocator, find anything that was touched - * during the loop, and extend its liveness to the whole range - * of the loop. - * This isn't very sophisticated (e.g. we could try to avoid - * this for registers first written inside a loop body and only - * ever read inside one), but many of the cases that matter are - * affected by other optimizations such as copy propagation - * anyway. - * - * This is overkill for SSA registers. If an SSA register is - * written in loop L and last read in L, we don't need to touch - * its liveness. If it's last read in an inferior loop of L, we - * only need to extend its last-read to the end of L. (And it - * should be illegal for an SSA value to be read in a block - * containing L.) - * We don't try to perform this optimization yet, in the name of - * maximal simplicity. */ - for (unsigned int j = 0; j < program->ssa_count; ++j) - { - struct liveness_tracker_reg *reg = &tracker->ssa_regs[j]; + struct liveness_tracker_reg *reg = &tracker->ssa_regs[j]; - if (reg->first_write > loop_start) - reg->first_write = loop_start; - if (reg->last_access < i) + if (reg->written && reg->interior_loop_depth == loop_depth + 1) + { + if (reg->last_access > reg->interior_loop_start) reg->last_access = i; - if (reg->last_read < i) + if (reg->last_read > reg->interior_loop_start) reg->last_read = i; + + reg->interior_loop_depth = 0; } + } + if (!loop_depth) + { for (unsigned int j = 0; j < program->temp_count; ++j) { struct liveness_tracker_reg *reg = &tracker->temp_regs[j]; if (reg->first_write > loop_start) reg->first_write = loop_start; - if (reg->last_access < i) + if (reg->last_access > loop_start && reg->last_access < i) reg->last_access = i; - if (reg->last_read < i) + if (reg->last_read > loop_start && reg->last_read < i) reg->last_read = i; } } @@ -11288,8 +11743,10 @@ static void temp_allocator_open_register(struct temp_allocator *allocator, struc * We currently only handle cases where the mask is zero-based and * contiguous, so we need to fill in the missing components to * ensure this. */ - uint8_t mask = (1u << (vkd3d_log2i(liveness_reg->mask) + 1)) - 1; + uint8_t mask = 0; + if (liveness_reg->mask) + mask = (1u << (vkd3d_log2i(liveness_reg->mask) + 1)) - 1; if (vkd3d_popcount(available_mask) >= vkd3d_popcount(mask)) { if (mask != liveness_reg->mask) @@ -11672,8 +12129,10 @@ struct validation_context } input_signature_data, output_signature_data, patch_constant_signature_data; }; -static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, - enum vkd3d_shader_error error, const char *format, ...) +#define validator_error(ctx, error, ...) \ + validator_error_(ctx, error, __FUNCTION__, __VA_ARGS__) +static void VKD3D_PRINTF_FUNC(4, 5) validator_error_(struct validation_context *ctx, + enum vkd3d_shader_error error, const char *function, const char *format, ...) { struct vkd3d_string_buffer buf; va_list args; @@ -11686,12 +12145,12 @@ static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *c if (ctx->invalid_instruction_idx) { - vkd3d_shader_error(ctx->message_context, &ctx->location, error, "%s", buf.buffer); + vkd3d_shader_error_(ctx->message_context, &ctx->location, error, function, "%s", buf.buffer); WARN("VSIR validation error: %s\n", buf.buffer); } else { - vkd3d_shader_error(ctx->message_context, &ctx->location, error, + vkd3d_shader_error_(ctx->message_context, &ctx->location, error, function, "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); WARN("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); } @@ -11702,8 +12161,7 @@ static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *c ctx->status = VKD3D_ERROR_INVALID_SHADER; } -static void vsir_validate_register_without_indices(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) +static void vsir_validate_register_without_indices(struct validation_context *ctx, const struct vsir_operand *reg) { if (reg->idx_count != 0) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, @@ -11919,7 +12377,7 @@ static const bool vsir_get_io_register_data(struct validation_context *ctx, } } -static void vsir_validate_io_register(struct validation_context *ctx, const struct vkd3d_shader_register *reg) +static void vsir_validate_io_register(struct validation_context *ctx, const struct vsir_operand *reg) { unsigned int control_point_index, control_point_count; const struct shader_signature *signature; @@ -12019,7 +12477,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, const stru reg->idx[control_point_index].offset, control_point_count, reg->type); } -static void vsir_validate_texture_register(struct validation_context *ctx, const struct vkd3d_shader_register *reg) +static void vsir_validate_texture_register(struct validation_context *ctx, const struct vsir_operand *reg) { const struct vkd3d_shader_version *version = &ctx->program->shader_version; uint32_t idx; @@ -12050,8 +12508,7 @@ static void vsir_validate_texture_register(struct validation_context *ctx, const "Register t%u exceeds limits for version %u.%u.", idx, version->major, version->minor); } -static void vsir_validate_temp_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) +static void vsir_validate_temp_register(struct validation_context *ctx, const struct vsir_operand *reg) { struct validation_context_temp_data *data; @@ -12101,8 +12558,7 @@ static void vsir_validate_temp_register(struct validation_context *ctx, } } -static void vsir_validate_rastout_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) +static void vsir_validate_rastout_register(struct validation_context *ctx, const struct vsir_operand *reg) { if (reg->idx_count != 1) { @@ -12121,8 +12577,7 @@ static void vsir_validate_rastout_register(struct validation_context *ctx, "Invalid offset for a RASTOUT register."); } -static void vsir_validate_misctype_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) +static void vsir_validate_misctype_register(struct validation_context *ctx, const struct vsir_operand *reg) { if (reg->idx_count != 1) { @@ -12141,8 +12596,7 @@ static void vsir_validate_misctype_register(struct validation_context *ctx, "Invalid offset for a MISCTYPE register."); } -static void vsir_validate_label_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) +static void vsir_validate_label_register(struct validation_context *ctx, const struct vsir_operand *reg) { if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, @@ -12178,7 +12632,7 @@ static void vsir_validate_label_register(struct validation_context *ctx, } static void vsir_validate_descriptor_indices(struct validation_context *ctx, - const struct vkd3d_shader_register *reg, enum vkd3d_shader_descriptor_type type, const char *name) + const struct vsir_operand *reg, enum vkd3d_shader_descriptor_type type, const char *name) { const struct vkd3d_shader_descriptor_info1 *descriptor; @@ -12204,8 +12658,7 @@ static void vsir_validate_descriptor_indices(struct validation_context *ctx, descriptor->register_index + descriptor->count - 1, name, reg->idx[0].offset); } -static void vsir_validate_constbuffer_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) +static void vsir_validate_constbuffer_register(struct validation_context *ctx, const struct vsir_operand *reg) { if (reg->dimension != VSIR_DIMENSION_VEC4) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, @@ -12221,8 +12674,7 @@ static void vsir_validate_constbuffer_register(struct validation_context *ctx, vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, "cb"); } -static void vsir_validate_sampler_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) +static void vsir_validate_sampler_register(struct validation_context *ctx, const struct vsir_operand *reg) { if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, @@ -12248,8 +12700,7 @@ static void vsir_validate_sampler_register(struct validation_context *ctx, vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, "s"); } -static void vsir_validate_resource_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) +static void vsir_validate_resource_register(struct validation_context *ctx, const struct vsir_operand *reg) { if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, @@ -12274,8 +12725,7 @@ static void vsir_validate_resource_register(struct validation_context *ctx, vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, "t"); } -static void vsir_validate_uav_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) +static void vsir_validate_uav_register(struct validation_context *ctx, const struct vsir_operand *reg) { if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, @@ -12304,8 +12754,7 @@ static void vsir_validate_uav_register(struct validation_context *ctx, vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, "u"); } -static void vsir_validate_ssa_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) +static void vsir_validate_ssa_register(struct validation_context *ctx, const struct vsir_operand *reg) { struct validation_context_ssa_data *data; @@ -12366,8 +12815,23 @@ static void vsir_validate_ssa_register(struct validation_context *ctx, static void vsir_validate_src_operand(struct validation_context *ctx, const struct vsir_src_operand *src); -static void vsir_validate_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) +static bool vsir_rel_addr_reg_type_is_valid(enum vkd3d_shader_register_type type) +{ + switch (type) + { + case VKD3DSPR_TEMP: + case VKD3DSPR_SSA: + case VKD3DSPR_ADDR: + case VKD3DSPR_LOOP: + case VKD3DSPR_OUTPOINTID: + return true; + + default: + return false; + } +} + +static void vsir_validate_register(struct validation_context *ctx, const struct vsir_operand *reg) { static const struct register_validation_data { @@ -12423,21 +12887,10 @@ static void vsir_validate_register(struct validation_context *ctx, { vsir_validate_src_operand(ctx, src); - switch (src->reg.type) - { - case VKD3DSPR_TEMP: - case VKD3DSPR_SSA: - case VKD3DSPR_ADDR: - case VKD3DSPR_LOOP: - case VKD3DSPR_OUTPOINTID: - break; - - default: - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid register type %#x for a relative address parameter.", - src->reg.type); - break; - } + if (!vsir_rel_addr_reg_type_is_valid(src->reg.type)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x for a relative address parameter.", + src->reg.type); } } @@ -12922,28 +13375,42 @@ static const char * const signature_type_names[] = #define DS_BIT (1u << VKD3D_SHADER_TYPE_DOMAIN) #define CS_BIT (1u << VKD3D_SHADER_TYPE_COMPUTE) +#define FLOAT_BIT (1u << VKD3D_SHADER_COMPONENT_FLOAT) +#define INT_BIT (1u << VKD3D_SHADER_COMPONENT_INT) +#define UINT_BIT (1u << VKD3D_SHADER_COMPONENT_UINT) +#define FLOAT16_BIT (1u << VKD3D_SHADER_COMPONENT_FLOAT16) +#define INT16_BIT (1u << VKD3D_SHADER_COMPONENT_INT16) +#define UINT16_BIT (1u << VKD3D_SHADER_COMPONENT_UINT16) + static const struct sysval_validation_data_element { unsigned int input; unsigned int output; unsigned int patch_constant; - enum vkd3d_shader_component_type data_type; + unsigned int data_mask; unsigned int component_count; } sysval_validation_data[] = { - [VKD3D_SHADER_SV_POSITION] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, - VKD3D_SHADER_COMPONENT_FLOAT, 4}, - [VKD3D_SHADER_SV_CLIP_DISTANCE] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, - VKD3D_SHADER_COMPONENT_FLOAT, 4}, - [VKD3D_SHADER_SV_CULL_DISTANCE] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, - VKD3D_SHADER_COMPONENT_FLOAT, 4}, - [VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, - [VKD3D_SHADER_SV_TESS_FACTOR_QUADINT] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, - [VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, - [VKD3D_SHADER_SV_TESS_FACTOR_TRIINT] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, - [VKD3D_SHADER_SV_TESS_FACTOR_LINEDET] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, - [VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, + [VKD3D_SHADER_SV_POSITION] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, + VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, FLOAT_BIT, 4}, + [VKD3D_SHADER_SV_CLIP_DISTANCE] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, + PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, FLOAT_BIT, 4}, + [VKD3D_SHADER_SV_CULL_DISTANCE] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, + PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, FLOAT_BIT, 4}, + [VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX] = {HS_BIT | DS_BIT | GS_BIT | PS_BIT, + VS_BIT | HS_BIT | DS_BIT | GS_BIT, 0, UINT_BIT, 1}, + [VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX] = {HS_BIT | DS_BIT | GS_BIT | PS_BIT, + VS_BIT | HS_BIT | DS_BIT | GS_BIT, 0, UINT_BIT, 1}, + [VKD3D_SHADER_SV_VERTEX_ID] = {VS_BIT, 0, 0, UINT_BIT, 1}, + [VKD3D_SHADER_SV_TARGET] = {0, PS_BIT, 0, + FLOAT_BIT | UINT_BIT | INT_BIT | FLOAT16_BIT | UINT16_BIT | INT16_BIT, 4}, + [VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE] = {0, 0, HS_BIT | DS_BIT, FLOAT_BIT, 1}, + [VKD3D_SHADER_SV_TESS_FACTOR_QUADINT] = {0, 0, HS_BIT | DS_BIT, FLOAT_BIT, 1}, + [VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE] = {0, 0, HS_BIT | DS_BIT, FLOAT_BIT, 1}, + [VKD3D_SHADER_SV_TESS_FACTOR_TRIINT] = {0, 0, HS_BIT | DS_BIT, FLOAT_BIT, 1}, + [VKD3D_SHADER_SV_TESS_FACTOR_LINEDET] = {0, 0, HS_BIT | DS_BIT, FLOAT_BIT, 1}, + [VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN] = {0, 0, HS_BIT | DS_BIT, FLOAT_BIT, 1}, }; static void vsir_validate_signature_element(struct validation_context *ctx, @@ -13135,7 +13602,7 @@ static void vsir_validate_signature_element(struct validation_context *ctx, if (data->component_count != 0) { - if (element->component_type != data->data_type) + if (!((1u << element->component_type) & data->data_mask)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "element %u of %s signature: Invalid data type %#x for system value semantic %#x.", idx, signature_type_name, element->component_type, element->sysval_semantic); @@ -13750,7 +14217,7 @@ static void vsir_validate_branch(struct validation_context *ctx, const struct vk if (!vsir_validate_src_min_count(ctx, instruction, 1)) return; - if (vsir_register_is_label(&instruction->src[0].reg)) + if (vsir_operand_is_label(&instruction->src[0].reg)) { /* Unconditional branch: parameters are jump label, * optional merge label, optional continue label. */ @@ -13758,7 +14225,7 @@ static void vsir_validate_branch(struct validation_context *ctx, const struct vk for (i = 0; i < instruction->src_count; ++i) { - if (!vsir_register_is_label(&instruction->src[i].reg)) + if (!vsir_operand_is_label(&instruction->src[i].reg)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid register of type %#x in unconditional BRANCH instruction, expected LABEL.", instruction->src[i].reg.type); @@ -13774,7 +14241,7 @@ static void vsir_validate_branch(struct validation_context *ctx, const struct vk for (i = 1; i < instruction->src_count; ++i) { - if (!vsir_register_is_label(&instruction->src[i].reg)) + if (!vsir_operand_is_label(&instruction->src[i].reg)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid register of type %#x in conditional BRANCH instruction, expected LABEL.", instruction->src[i].reg.type); @@ -14358,7 +14825,7 @@ static void vsir_validate_itoi(struct validation_context *ctx, const struct vkd3 static void vsir_validate_label(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS); - if (instruction->src_count >= 1 && !vsir_register_is_label(&instruction->src[0].reg)) + if (instruction->src_count >= 1 && !vsir_operand_is_label(&instruction->src[0].reg)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid register of type %#x in a LABEL instruction, expected LABEL.", instruction->src[0].reg.type); @@ -14399,8 +14866,8 @@ static void vsir_validate_phi(struct validation_context *ctx, const struct vkd3d unsigned int value_idx = 2 * i; unsigned int label_idx = 2 * i + 1; - if (!register_is_constant_or_undef(&instruction->src[value_idx].reg) - && !register_is_ssa(&instruction->src[value_idx].reg)) + if (!vsir_operand_is_constant_or_undef(&instruction->src[value_idx].reg) + && !vsir_operand_is_ssa(&instruction->src[value_idx].reg)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid value register for incoming %u of type %#x in PHI instruction, " "expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); @@ -14410,7 +14877,7 @@ static void vsir_validate_phi(struct validation_context *ctx, const struct vkd3d "Invalid value dimension %#x for incoming %u in PHI instruction, expected scalar.", instruction->src[value_idx].reg.dimension, i); - if (!vsir_register_is_label(&instruction->src[label_idx].reg)) + if (!vsir_operand_is_label(&instruction->src[label_idx].reg)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid label register for case %u of type %#x in PHI instruction, " "expected LABEL.", i, instruction->src[value_idx].reg.type); @@ -14419,7 +14886,7 @@ static void vsir_validate_phi(struct validation_context *ctx, const struct vkd3d if (instruction->dst_count < 1) return; - if (!register_is_ssa(&instruction->dst[0].reg)) + if (!vsir_operand_is_ssa(&instruction->dst[0].reg)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid destination of type %#x in PHI instruction, expected SSA.", instruction->dst[0].reg.type); @@ -14524,12 +14991,12 @@ static void vsir_validate_switch_monolithic(struct validation_context *ctx, "Invalid source count %zu for a monolithic SWITCH instruction, it must be an odd number.", instruction->src_count); - if (!vsir_register_is_label(&instruction->src[1].reg)) + if (!vsir_operand_is_label(&instruction->src[1].reg)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid default label register of type %#x in monolithic SWITCH instruction, expected LABEL.", instruction->src[1].reg.type); - if (!vsir_register_is_label(&instruction->src[2].reg)) + if (!vsir_operand_is_label(&instruction->src[2].reg)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid merge label register of type %#x in monolithic SWITCH instruction, expected LABEL.", instruction->src[2].reg.type); @@ -14541,12 +15008,12 @@ static void vsir_validate_switch_monolithic(struct validation_context *ctx, unsigned int value_idx = 3 + 2 * i; unsigned int label_idx = 3 + 2 * i + 1; - if (!register_is_constant(&instruction->src[value_idx].reg)) + if (!vsir_operand_is_constant(&instruction->src[value_idx].reg)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid value register for case %u of type %#x in monolithic SWITCH instruction, " "expected IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); - if (!vsir_register_is_label(&instruction->src[label_idx].reg)) + if (!vsir_operand_is_label(&instruction->src[label_idx].reg)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid label register for case %u of type %#x in monolithic SWITCH instruction, " "expected LABEL.", i, instruction->src[value_idx].reg.type); @@ -14555,6 +15022,35 @@ static void vsir_validate_switch_monolithic(struct validation_context *ctx, ctx->inside_block = false; } +static void vsir_validate_texdepth(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_version *version = &ctx->program->shader_version; + + if (version->type != VKD3D_SHADER_TYPE_PIXEL) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "TEXDEPTH cannot be used in shader type %#x.", version->type); + + if (version->major != 1 || version->minor != 4) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_OPCODE, + "TEXDEPTH cannot be used in version %u.%u.", version->major, version->minor); + + if (instruction->dst[0].write_mask != VKD3DSP_WRITEMASK_ALL) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Invalid TEXDEPTH write mask %#x.", instruction->dst[0].write_mask); + + if (instruction->dst[0].reg.type != VKD3DSPR_TEMP) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid TEXDEPTH register type %#x.", instruction->dst[0].reg.type); + + if (instruction->dst[0].reg.idx[0].offset != 5) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Invalid TEXDEPTH register index %u.", instruction->dst[0].reg.idx[0].offset); + + /* Strictly not an elementwise operation, but we expect the dest to be float. */ + vsir_validate_float_elementwise_operation(ctx, instruction); +} + static void vsir_validate_ushr(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { @@ -14690,6 +15186,7 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ [VSIR_OP_SATURATE] = {1, 1, vsir_validate_float_or_double_elementwise_operation}, [VSIR_OP_SWITCH] = {0, 1, vsir_validate_switch}, [VSIR_OP_SWITCH_MONOLITHIC] = {0, ~0u, vsir_validate_switch_monolithic}, + [VSIR_OP_TEXDEPTH] = {1, 0, vsir_validate_texdepth}, [VSIR_OP_USHR] = {1, 2, vsir_validate_ushr}, }; @@ -14773,6 +15270,73 @@ static void vsir_validate_instruction(struct validation_context *ctx, } } +static void vsir_program_validate_update_src_operand_owner(struct validation_context *ctx, + struct vsir_src_operand *src, bool reset, size_t owner_index) +{ + if (!src) + return; + + if (reset) + { + src->owner_index = 0; + return; + } + + if (src->owner_index) + { + if (src->owner_index == owner_index) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_MULTIPLE_SRC_OPERAND_OWNERS, + "Source operand is used more than once by this instruction."); + else + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_MULTIPLE_SRC_OPERAND_OWNERS, + "Source operand is used by more than one instruction, first one is %zu.", src->owner_index); + return; + } + src->owner_index = owner_index; +} + +static enum vkd3d_result vsir_program_validate_src_operands_owners( + struct validation_context *ctx, struct vsir_program *program) +{ + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vkd3d_shader_instruction *ins; + struct vsir_src_operand *src; + struct vsir_dst_operand *dst; + + for (unsigned int t = 0; t < 2; ++t) + { + ctx->invalid_instruction_idx = false; + ctx->instruction_idx = 0; + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) + { + ctx->location = ins->location; + for (unsigned int i = 0; i < ins->src_count; ++i) + { + src = &ins->src[i]; + vsir_program_validate_update_src_operand_owner(ctx, src, !t, ctx->instruction_idx + 1); + for (unsigned int k = 0; k < src->reg.idx_count; ++k) + { + vsir_program_validate_update_src_operand_owner(ctx, + src->reg.idx[k].rel_addr, !t, ctx->instruction_idx + 1); + } + } + for (unsigned int i = 0; i < ins->dst_count; ++i) + { + dst = &ins->dst[i]; + for (unsigned int k = 0; k < dst->reg.idx_count; ++k) + { + vsir_program_validate_update_src_operand_owner(ctx, + dst->reg.idx[k].rel_addr, !t, ctx->instruction_idx + 1); + } + } + ++ctx->instruction_idx; + } + } + ctx->invalid_instruction_idx = true; + + return ctx->status; +} + enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, const char *source_name, struct vkd3d_shader_message_context *message_context) { @@ -14899,8 +15463,9 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas)))) goto fail; - ctx.invalid_instruction_idx = false; + vsir_program_validate_src_operands_owners(&ctx, program); + ctx.invalid_instruction_idx = false; ctx.instruction_idx = 0; for (ins = vsir_program_iterator_head(&it); ins && ctx.status != VKD3D_ERROR_OUT_OF_MEMORY; ins = vsir_program_iterator_next(&it)) @@ -15426,7 +15991,6 @@ static bool is_read_only(const struct vsir_program *program, enum vkd3d_shader_r /* Not applicable since they're not numeric or can't be sources. */ case VKD3DSPR_ATTROUT: case VKD3DSPR_COLOROUT: - case VKD3DSPR_COMBINED_SAMPLER: case VKD3DSPR_COUNT: case VKD3DSPR_DEPTHOUT: case VKD3DSPR_DEPTHOUTGE: @@ -15442,13 +16006,12 @@ static bool is_read_only(const struct vsir_program *program, enum vkd3d_shader_r case VKD3DSPR_PREDICATE: case VKD3DSPR_RASTERIZER: case VKD3DSPR_RASTOUT: - case VKD3DSPR_RESOURCE: - case VKD3DSPR_SAMPLER: case VKD3DSPR_STREAM: case VKD3DSPR_TEXCRDOUT: case VKD3DSPR_UAV: return false; + case VKD3DSPR_COMBINED_SAMPLER: case VKD3DSPR_CONST: case VKD3DSPR_CONSTBOOL: case VKD3DSPR_CONSTBUFFER: @@ -15471,7 +16034,9 @@ static bool is_read_only(const struct vsir_program *program, enum vkd3d_shader_r case VKD3DSPR_PATCHCONST: case VKD3DSPR_POINT_COORD: case VKD3DSPR_PRIMID: + case VKD3DSPR_RESOURCE: case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_SAMPLER: case VKD3DSPR_SSA: case VKD3DSPR_TESSCOORD: case VKD3DSPR_THREADGROUPID: @@ -15529,6 +16094,31 @@ static bool can_propagate_ssa_source(const struct vsir_program *program, const s return true; } +static bool vsir_program_copy_propagate_relative_addresses(struct vsir_copy_propagation_state *state, + struct vsir_operand *reg) +{ + bool progress = false; + + for (unsigned int i = 0; i < reg->idx_count; ++i) + { + const struct vsir_src_operand *rel_addr = reg->idx[i].rel_addr; + const struct vkd3d_shader_instruction *mov; + + if (!rel_addr || rel_addr->reg.type != VKD3DSPR_SSA) + continue; + + if ((mov = state->ssa_sources[rel_addr->reg.idx[0].offset]) + && vsir_rel_addr_reg_type_is_valid(mov->src[0].reg.type)) + { + *reg->idx[i].rel_addr = mov->src[0]; + TRACE("Propagating write to sr%u into relative address.\n", mov->src[0].reg.idx[0].offset); + progress = true; + } + } + + return progress; +} + static enum vkd3d_result vsir_program_copy_propagation(struct vsir_program *program, struct vsir_transformation_context *ctx) { @@ -15549,6 +16139,8 @@ static enum vkd3d_result vsir_program_copy_propagation(struct vsir_program *prog enum vsir_data_type data_type; uint32_t new_swizzle = 0; + ctx->progress |= vsir_program_copy_propagate_relative_addresses(&state, &src->reg); + if (src->reg.type != VKD3DSPR_SSA) continue; if (data_type_is_64_bit(src->reg.data_type)) @@ -15558,10 +16150,13 @@ static enum vkd3d_result vsir_program_copy_propagation(struct vsir_program *prog mov_src = &mov->src[0]; data_type = src->reg.data_type; + TRACE("Propagating write to sr%u into %s instruction.\n", + src->reg.idx[0].offset, vsir_opcode_get_name(ins->opcode, "<unknown>")); + src->reg = mov_src->reg; src->reg.data_type = data_type; - if (!shader_register_clone_relative_addresses(&src->reg, program)) + if (!vsir_operand_clone_indirect_indices(&src->reg, program)) { vkd3d_free(state.ssa_sources); return VKD3D_ERROR_OUT_OF_MEMORY; @@ -15596,6 +16191,9 @@ static enum vkd3d_result vsir_program_copy_propagation(struct vsir_program *prog ctx->progress = true; } + for (unsigned int j = 0; j < ins->dst_count; ++j) + ctx->progress |= vsir_program_copy_propagate_relative_addresses(&state, &ins->dst[j].reg); + if (can_propagate_ssa_source(program, ins)) state.ssa_sources[ins->dst[0].reg.idx[0].offset] = ins; } @@ -15604,46 +16202,711 @@ static enum vkd3d_result vsir_program_copy_propagation(struct vsir_program *prog return VKD3D_OK; } -static void vsir_optimize(struct vsir_transformation_context *ctx) +/* + * This pass is similar to vsir_program_copy_propagation(), but operating in + * "reverse", hoisting output MOVs earlier by combining them with the + * instruction where they're written. + * + * We attempt to combine two instructions, not necessarily consecutive, + * of the form + * + * XXX aaa.bbb, ... + * mov ccc.ddd, aaa + * + * into + * + * XXX ccc.ddd, ... + * + * Note that ddd does not need to be a subset of bbb; + * this pass should support temp registers written in pieces. + * Currently only SSA is supported, so for now ddd will be a subset of bbb. + * + * The constraints we operate under are a bit different, and stricter: + * + * - aaa.bbb cannot be written between the two instructions. + * + * - aaa.bbb must only be read once. This is not a correctness constraint, + * but rather we prefer emitting a mov to duplicating any instruction that + * does real work. We will get rid of the write to aaa.bbb entirely. + * + * - Only write-only output registers are supported. While this pass may be + * legal on temps, it's not clear that it will provide any utility there; + * temp copies should already be deleted by regular HLSL or vsir copy prop. + * + * - The two instructions must exist in the same control flow. We must write + * an output in exactly the same conditions. + */ +struct vsir_output_copy_hoisting_state +{ + struct vsir_output_copy_hoisting_reg + { + struct vkd3d_shader_instruction *source; + unsigned int source_dst_index; /* Which dst of 'source' writes this variable. */ + struct vkd3d_shader_instruction *mov; + /* Two instructions A and B are in the same control flow if: + * (1) they have the same depth; + * (2) the CF depth never decreases below A's depth between A and B. + * The depth of A (source) is tracked in 'source_depth'. + * Condition (2) should be impossible for SSAs. */ + uint32_t source_depth; + bool invalid; + bool read_once; + } *ssas; + + unsigned int depth; +}; + +static bool is_write_only(enum vkd3d_shader_register_type type) { - do + switch (type) { - ctx->progress = false; - vsir_transform(ctx, vsir_program_copy_propagation); - vsir_transform(ctx, vsir_program_dce); + case VKD3DSPR_ADDR: + case VKD3DSPR_IDXTEMP: + case VKD3DSPR_LOOP: + case VKD3DSPR_TEMP: + case VKD3DSPR_TEMPFLOAT16: + case VKD3DSPR_TEXTURE: + return false; + + /* Not applicable since they're not numeric or can't be destinations. */ + case VKD3DSPR_COMBINED_SAMPLER: + case VKD3DSPR_CONST: + case VKD3DSPR_CONSTBOOL: + case VKD3DSPR_CONSTBUFFER: + case VKD3DSPR_CONSTINT: + case VKD3DSPR_COUNT: + case VKD3DSPR_COVERAGE: + case VKD3DSPR_FORKINSTID: + case VKD3DSPR_FUNCTIONBODY: + case VKD3DSPR_FUNCTIONPOINTER: + case VKD3DSPR_GROUPSHAREDMEM: + case VKD3DSPR_GSINSTID: + case VKD3DSPR_IMMCONST: + case VKD3DSPR_IMMCONST64: + case VKD3DSPR_IMMCONSTBUFFER: + case VKD3DSPR_INCONTROLPOINT: + case VKD3DSPR_INPUT: + case VKD3DSPR_INVALID: + case VKD3DSPR_JOININSTID: + case VKD3DSPR_LABEL: + case VKD3DSPR_LOCALTHREADID: + case VKD3DSPR_LOCALTHREADINDEX: + case VKD3DSPR_MISCTYPE: + case VKD3DSPR_NULL: + case VKD3DSPR_OUTCONTROLPOINT: + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_PARAMETER: + case VKD3DSPR_PATCHCONST: + case VKD3DSPR_POINT_COORD: + case VKD3DSPR_PREDICATE: + case VKD3DSPR_PRIMID: + case VKD3DSPR_RASTERIZER: + case VKD3DSPR_RESOURCE: + case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_SAMPLER: + case VKD3DSPR_SSA: + case VKD3DSPR_STREAM: + case VKD3DSPR_TESSCOORD: + case VKD3DSPR_THREADGROUPID: + case VKD3DSPR_THREADID: + case VKD3DSPR_UAV: + case VKD3DSPR_UNDEF: + case VKD3DSPR_WAVELANECOUNT: + case VKD3DSPR_WAVELANEINDEX: + return false; + + case VKD3DSPR_ATTROUT: + case VKD3DSPR_COLOROUT: + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + case VKD3DSPR_OUTPUT: + case VKD3DSPR_OUTSTENCILREF: + case VKD3DSPR_RASTOUT: + case VKD3DSPR_TEXCRDOUT: + return true; } - while (ctx->progress); + + vkd3d_unreachable(); } -enum vkd3d_result vsir_program_optimize(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +static bool is_identity_swizzle(uint32_t swizzle, uint32_t write_mask) { - struct vsir_transformation_context ctx; + for (unsigned int i = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (bitmap_is_set(&write_mask, i) && vsir_swizzle_get_component(swizzle, i) != i) + return false; + } - vsir_transformation_context_init(&ctx, program, config_flags, compile_info, message_context); - vsir_optimize(&ctx); + return true; +} - if (TRACE_ON() && ctx.result >= 0) - vsir_program_trace(program); +static void vsir_program_output_copy_hoisting_record_read( + const struct vsir_output_copy_hoisting_state *state, const struct vsir_src_operand *src) +{ + struct vsir_output_copy_hoisting_reg *reg; - return ctx.result; + if (src->reg.type != VKD3DSPR_SSA) + return; + reg = &state->ssas[src->reg.idx[0].offset]; + if (!reg->read_once) + reg->read_once = true; + else + reg->invalid = true; } -/* Transformations which should happen at parse time, i.e. before scan - * information is returned to the user. - * - * In particular, some passes need to modify the signature, and - * vkd3d_shader_scan() should report the modified signature for the given - * target. */ -enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +static void vsir_program_output_copy_hoisting_record_mov( + const struct vsir_output_copy_hoisting_state *state, struct vkd3d_shader_instruction *ins) { - struct vsir_transformation_context ctx; + const struct vsir_src_operand *src = &ins->src[0]; + const struct vsir_dst_operand *dst = &ins->dst[0]; + struct vsir_output_copy_hoisting_reg *reg; - vsir_transformation_context_init(&ctx, program, config_flags, compile_info, message_context); - - /* For vsir_program_ensure_diffuse(). */ - if (program->shader_version.major <= 2) + if (src->reg.type != VKD3DSPR_SSA) + return; + if (!is_write_only(dst->reg.type)) + return; + if (src->modifiers || (dst->modifiers & ~VKD3DSPDM_SATURATE) + || dst->shift || data_type_is_64_bit(src->reg.data_type)) + return; + for (unsigned int i = 0; i < dst->reg.idx_count; ++i) + { + if (dst->reg.idx[i].rel_addr) + return; + } + /* We can sometimes mess with the swizzle in 'source', but for now don't. */ + if (dst->reg.dimension == VSIR_DIMENSION_VEC4 && !is_identity_swizzle(src->swizzle, dst->write_mask)) + return; + else if (dst->reg.dimension == VSIR_DIMENSION_SCALAR + && vsir_swizzle_get_component(src->swizzle, 0) != VKD3D_SHADER_SWIZZLE_X) + return; + + reg = &state->ssas[src->reg.idx[0].offset]; + if (reg->invalid) + return; + if (reg->mov || reg->source_depth != state->depth) + { + reg->invalid = true; + return; + } + reg->mov = ins; +} + +static enum vkd3d_result vsir_program_output_copy_hoisting(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vsir_output_copy_hoisting_state state = {0}; + struct vkd3d_shader_instruction *ins; + + if (!(state.ssas = vkd3d_calloc(program->ssa_count, sizeof(*state.ssas)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) + { + for (unsigned int j = 0; j < ins->src_count; ++j) + { + vsir_program_output_copy_hoisting_record_read(&state, &ins->src[j]); + + for (unsigned int k = 0; k < ins->src[j].reg.idx_count; ++k) + { + if (ins->src[j].reg.idx[k].rel_addr) + vsir_program_output_copy_hoisting_record_read(&state, ins->src[j].reg.idx[k].rel_addr); + } + } + + for (unsigned int j = 0; j < ins->dst_count; ++j) + { + for (unsigned int k = 0; k < ins->dst[j].reg.idx_count; ++k) + { + if (ins->dst[j].reg.idx[k].rel_addr) + vsir_program_output_copy_hoisting_record_read(&state, ins->dst[j].reg.idx[k].rel_addr); + } + } + + switch (ins->opcode) + { + case VSIR_OP_IF: + case VSIR_OP_IFC: + case VSIR_OP_LOOP: + case VSIR_OP_SWITCH: + ++state.depth; + break; + + case VSIR_OP_ENDIF: + case VSIR_OP_ENDLOOP: + case VSIR_OP_ENDSWITCH: + --state.depth; + break; + + case VSIR_OP_MOV: + vsir_program_output_copy_hoisting_record_mov(&state, ins); + break; + + default: + break; + } + + for (unsigned int j = 0; j < ins->dst_count; ++j) + { + const struct vsir_dst_operand *dst = &ins->dst[j]; + struct vsir_output_copy_hoisting_reg *reg; + + if (dst->reg.type != VKD3DSPR_SSA) + continue; + reg = &state.ssas[dst->reg.idx[0].offset]; + + /* This is an SSA; it should not have been written already. */ + VKD3D_ASSERT(!reg->source); + reg->source = ins; + reg->source_dst_index = j; + reg->source_depth = state.depth; + } + } + + for (unsigned int i = 0; i < program->ssa_count; ++i) + { + struct vsir_output_copy_hoisting_reg *reg = &state.ssas[i]; + struct vsir_dst_operand *source_dst; + enum vsir_data_type orig_data_type; + + if (reg->invalid || !reg->mov) + continue; + + TRACE("Hoisting 'mov %s%u, sr%u' to %s instruction.\n", + vsir_register_type_get_name(reg->mov->dst[0].reg.type, "<unknown>"), + reg->mov->dst[0].reg.idx[0].offset, + reg->mov->src[0].reg.idx[0].offset, + vsir_opcode_get_name(reg->source->opcode, "<unknown>")); + + source_dst = ®->source->dst[reg->source_dst_index]; + orig_data_type = source_dst->reg.data_type; + source_dst->reg = reg->mov->dst[0].reg; + source_dst->reg.data_type = orig_data_type; + source_dst->write_mask = reg->mov->dst[0].write_mask; + /* We validate that the modifiers must only be SATURATE, which is idempotent. */ + source_dst->modifiers |= reg->mov->dst[0].modifiers; + vkd3d_shader_instruction_make_nop(reg->mov); + ctx->progress = true; + } + + vkd3d_free(state.ssas); + return VKD3D_OK; +} + +static bool vsir_operand_has_rel_addr(const struct vsir_operand *reg) +{ + for (unsigned int i = 0; i < reg->idx_count; ++i) + { + if (reg->idx[i].rel_addr) + return true; + } + return false; +} + +static bool vsir_instruction_is_pure(const struct vsir_program *program, const struct vkd3d_shader_instruction *ins) +{ + if (vsir_instruction_has_side_effects(ins)) + return false; + + for (unsigned int i = 0; i < ins->src_count; ++i) + { + const struct vsir_operand *reg = &ins->src[i].reg; + + if (!is_read_only(program, reg->type)) + return false; + if (vsir_operand_has_rel_addr(reg)) + return false; + + } + + for (unsigned int i = 0; i < ins->dst_count; ++i) + { + if (vsir_operand_has_rel_addr(&ins->dst[i].reg)) + return false; + } + + return true; +} + +static bool vsir_op_is_commutative(enum vkd3d_shader_opcode opcode) +{ + switch (opcode) + { + case VSIR_OP_AND: + case VSIR_OP_EQO: + case VSIR_OP_EQU: + case VSIR_OP_IADD: + case VSIR_OP_IEQ: + case VSIR_OP_IMAX: + case VSIR_OP_IMIN: + case VSIR_OP_IMUL: + case VSIR_OP_INE: + case VSIR_OP_MAX: + case VSIR_OP_MIN: + case VSIR_OP_NEO: + case VSIR_OP_NEU: + case VSIR_OP_OR: + case VSIR_OP_UMAX: + case VSIR_OP_UMIN: + case VSIR_OP_XOR: + return true; + + default: + return false; + } +} + +struct vsir_cse_expr +{ + struct rb_entry entry; + + struct vkd3d_shader_instruction ins; + unsigned int ins_index; +}; + +static int vsir_register_index_compare(const struct vsir_register_index *a, const struct vsir_register_index *b) +{ + int ret; + + if ((ret = vkd3d_ptr_compare(a->rel_addr, b->rel_addr))) + return ret; + if ((ret = vkd3d_u32_compare(a->offset, b->offset))) + return ret; + return vkd3d_u32_compare(!!a->is_in_bounds, !!b->is_in_bounds); +} + +static int vsir_operand_compare(const struct vsir_operand *a, const struct vsir_operand *b) +{ + int ret; + + if ((ret = vkd3d_u32_compare(a->type, b->type))) + return ret; + if ((ret = vkd3d_u32_compare(a->precision, b->precision))) + return ret; + if ((ret = vkd3d_u32_compare(!!a->non_uniform, !!b->non_uniform))) + return ret; + if ((ret = vkd3d_u32_compare(a->data_type, b->data_type))) + return ret; + if ((ret = vkd3d_u32_compare(a->idx_count, b->idx_count))) + return ret; + for (unsigned int i = 0; i < a->idx_count; ++i) + { + if ((ret = vsir_register_index_compare(&a->idx[i], &b->idx[i]))) + return ret; + } + if ((ret = vkd3d_u32_compare(a->dimension, b->dimension))) + return ret; + if ((ret = vkd3d_u32_compare(a->alignment, b->alignment))) + return ret; + + if (a->type == VKD3DSPR_IMMCONST) + { + unsigned int count; + + VKD3D_ASSERT(a->dimension == VSIR_DIMENSION_SCALAR || a->dimension == VSIR_DIMENSION_VEC4); + count = a->dimension == VSIR_DIMENSION_VEC4 ? 4 : 1; + + return memcmp(a->u.immconst_u32, b->u.immconst_u32, sizeof(a->u.immconst_u32[0]) * count); + } + else if (a->type == VKD3DSPR_IMMCONST64) + { + unsigned int count; + + VKD3D_ASSERT(a->dimension == VSIR_DIMENSION_SCALAR || a->dimension == VSIR_DIMENSION_VEC4); + count = a->dimension == VSIR_DIMENSION_VEC4 ? 2 : 1; + + return memcmp(a->u.immconst_u64, b->u.immconst_u64, sizeof(a->u.immconst_u64[0]) * count); + } + + return 0; +} + +static int vsir_src_operand_compare(const struct vsir_src_operand *a, const struct vsir_src_operand *b) +{ + int ret; + + if ((ret = vkd3d_u32_compare(a->swizzle, b->swizzle))) + return ret; + if ((ret = vkd3d_u32_compare(a->modifiers, b->modifiers))) + return ret; + return vsir_operand_compare(&a->reg, &b->reg); +} + +static int vsir_cse_expr_key_compare(const void *key, const struct rb_entry *e) +{ + const struct vkd3d_shader_instruction *a = &((struct vsir_cse_expr *)key)->ins; + const struct vkd3d_shader_instruction *b = &RB_ENTRY_VALUE(e, struct vsir_cse_expr, entry)->ins; + int ret; + + if ((ret = vkd3d_u32_compare(a->opcode, b->opcode))) + return ret; + if ((ret = vkd3d_u32_compare(a->flags, b->flags))) + return ret; + if ((ret = vkd3d_u32_compare(a->dst->write_mask, b->dst->write_mask))) + return ret; + if ((ret = vkd3d_u32_compare(a->dst->modifiers, b->dst->modifiers))) + return ret; + if ((ret = vkd3d_u32_compare(a->dst->shift, b->dst->shift))) + return ret; + + VKD3D_ASSERT(a->src_count == b->src_count); + for (unsigned int i = 0; i < a->src_count; ++i) + { + if ((ret = vsir_src_operand_compare(&a->src[i], &b->src[i]))) + return ret; + } + + if ((ret = vkd3d_u32_compare(a->texel_offset.u, b->texel_offset.u))) + return ret; + if ((ret = vkd3d_u32_compare(a->texel_offset.v, b->texel_offset.v))) + return ret; + if ((ret = vkd3d_u32_compare(a->texel_offset.w, b->texel_offset.w))) + return ret; + if ((ret = vkd3d_u32_compare(a->resource_type, b->resource_type))) + return ret; + if ((ret = vkd3d_u32_compare(a->resource_stride, b->resource_stride))) + return ret; + return memcmp(a->resource_data_type, b->resource_data_type, sizeof(a->resource_data_type)); +} + +static void vsir_cse_expr_destroy(struct vsir_cse_expr *expr) +{ + vkd3d_free(expr->ins.src); + vkd3d_free(expr); +} + +static struct vsir_cse_expr *vsir_cse_make_expr(const struct vkd3d_shader_instruction *ins, unsigned int ins_index) +{ + struct vsir_cse_expr *expr; + + if (!(expr = vkd3d_calloc(sizeof(*expr), 1))) + return NULL; + + expr->ins.opcode = ins->opcode; + expr->ins.flags = ins->flags; + + VKD3D_ASSERT(ins->dst_count == 1); + expr->ins.dst_count = 1; + expr->ins.dst = ins->dst; + + expr->ins.src_count = ins->src_count; + + if (!(expr->ins.src = vkd3d_malloc(sizeof(*expr->ins.src) * ins->src_count))) + { + vkd3d_free(expr); + return NULL; + } + + if (vsir_op_is_commutative(ins->opcode)) + { + /* Normalize operand order for commutative operations. */ + if (vsir_src_operand_compare(&ins->src[0], &ins->src[1]) <= 0) + { + expr->ins.src[0] = ins->src[0]; + expr->ins.src[1] = ins->src[1]; + } + else + { + expr->ins.src[0] = ins->src[1]; + expr->ins.src[1] = ins->src[0]; + } + } + else + { + memcpy(expr->ins.src, ins->src, expr->ins.src_count * sizeof(expr->ins.src[0])); + } + + expr->ins.texel_offset = ins->texel_offset; + + expr->ins.resource_type = ins->resource_type; + expr->ins.resource_stride = ins->resource_stride; + memcpy(expr->ins.resource_data_type, ins->resource_data_type, sizeof(expr->ins.resource_data_type)); + + expr->ins_index = ins_index; + + return expr; +} + +struct vsir_cse_state +{ + struct rb_tree *expr_tables; + size_t capacity; + unsigned int depth; +}; + +static void vsir_cse_free_expr(struct rb_entry *e, void *context) +{ + struct vsir_cse_expr *expr = RB_ENTRY_VALUE(e, struct vsir_cse_expr, entry); + + vsir_cse_expr_destroy(expr); +} + +static struct vsir_cse_expr *vsir_cse_find_expr(struct vsir_cse_state *state, const struct vsir_cse_expr *key) +{ + for (int i = state->depth; i >= 0; --i) + { + struct rb_entry *entry = rb_get(&state->expr_tables[i], key); + + if (entry) + return RB_ENTRY_VALUE(entry, struct vsir_cse_expr, entry); + } + return NULL; +} + +static enum vkd3d_result vsir_program_cse(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vkd3d_shader_instruction *ins; + struct vsir_cse_state state = {0}; + unsigned int ins_idx; + + if (!vkd3d_array_reserve((void **)&state.expr_tables, &state.capacity, 1, sizeof(*state.expr_tables))) + return VKD3D_ERROR_OUT_OF_MEMORY; + rb_init(state.expr_tables, vsir_cse_expr_key_compare); + + for (ins = vsir_program_iterator_head(&it), ins_idx = 0; ins; ins = vsir_program_iterator_next(&it), ++ins_idx) + { + struct vsir_cse_expr *expr, *prev_expr; + + switch (ins->opcode) + { + case VSIR_OP_IF: + case VSIR_OP_IFC: + case VSIR_OP_LOOP: + case VSIR_OP_SWITCH: + ++state.depth; + if (!vkd3d_array_reserve((void **)&state.expr_tables, &state.capacity, + state.depth + 1, sizeof(*state.expr_tables))) + return VKD3D_ERROR_OUT_OF_MEMORY; + rb_init(&state.expr_tables[state.depth], vsir_cse_expr_key_compare); + continue; + + case VSIR_OP_ENDIF: + case VSIR_OP_ENDLOOP: + case VSIR_OP_ENDSWITCH: + rb_destroy(&state.expr_tables[state.depth], vsir_cse_free_expr, NULL); + --state.depth; + continue; + + case VSIR_OP_ELSE: + case VSIR_OP_CASE: + case VSIR_OP_DEFAULT: + case VSIR_OP_HS_CONTROL_POINT_PHASE: + case VSIR_OP_HS_FORK_PHASE: + case VSIR_OP_HS_JOIN_PHASE: + rb_destroy(&state.expr_tables[state.depth], vsir_cse_free_expr, NULL); + rb_init(&state.expr_tables[state.depth], vsir_cse_expr_key_compare); + continue; + + case VSIR_OP_MOV: + continue; + + default: + break; + } + + if (ins->dst_count != 1) + continue; + if (!ins->src_count) + continue; + if (!vsir_instruction_is_pure(program, ins)) + continue; + + if (!(expr = vsir_cse_make_expr(ins, ins_idx))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if ((prev_expr = vsir_cse_find_expr(&state, expr))) + { + struct vkd3d_shader_instruction *mov; + + vsir_cse_expr_destroy(expr); + + /* Replace the current instruction with a MOV from the existing SSA. */ + if (!vsir_program_iterator_insert_after(&it, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + ins = vsir_program_iterator_current(&it); + mov = vsir_program_iterator_next(&it); + + if (!(vsir_instruction_init_with_params(program, mov, &ins->location, VSIR_OP_MOV, 1, 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + mov->dst[0] = ins->dst[0]; + + VKD3D_ASSERT(prev_expr->ins.dst->reg.type == VKD3DSPR_SSA); + vsir_src_operand_init_ssa(&mov->src[0], prev_expr->ins.dst->reg.idx[0].offset, + prev_expr->ins.dst->reg.data_type, prev_expr->ins.dst->reg.dimension); + mov->src[0].swizzle = vsir_swizzle_from_writemask(prev_expr->ins.dst->write_mask); + + vkd3d_shader_instruction_make_nop(ins); + ctx->progress = true; + + TRACE("Replaced @%u with a mov from @%u.\n", ins_idx, prev_expr->ins_index); + } + else if (ins->dst[0].reg.type == VKD3DSPR_SSA) + { + if (rb_put(&state.expr_tables[state.depth], expr, &expr->entry) == -1) + { + vsir_cse_expr_destroy(expr); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + } + else + { + vsir_cse_expr_destroy(expr); + } + } + + VKD3D_ASSERT(state.depth == 0); + rb_destroy(&state.expr_tables[0], vsir_cse_free_expr, NULL); + vkd3d_free(state.expr_tables); + + return VKD3D_OK; +} + +static void vsir_optimize(struct vsir_transformation_context *ctx) +{ + do + { + ctx->progress = false; + vsir_transform(ctx, vsir_program_copy_propagation); + vsir_transform(ctx, vsir_program_output_copy_hoisting); + vsir_transform(ctx, vsir_program_cse); + vsir_transform(ctx, vsir_program_dce); + } + while (ctx->progress); +} + +enum vkd3d_result vsir_program_optimize(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ + struct vsir_transformation_context ctx; + + vsir_transformation_context_init(&ctx, program, config_flags, compile_info, message_context); + vsir_optimize(&ctx); + + if (TRACE_ON() && ctx.result >= 0) + vsir_program_trace(program); + + return ctx.result; +} + +/* Transformations which should happen at parse time, i.e. before scan + * information is returned to the user. + * + * In particular, some passes need to modify the signature, and + * vkd3d_shader_scan() should report the modified signature for the given + * target. */ +enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ + struct vsir_transformation_context ctx; + + vsir_transformation_context_init(&ctx, program, config_flags, compile_info, message_context); + + /* For vsir_program_ensure_diffuse(). */ + if (program->shader_version.major <= 2) vsir_transform(&ctx, vsir_program_add_diffuse_output); /* For vsir_program_insert_fragment_fog(). */ @@ -15672,6 +16935,11 @@ enum vkd3d_result vsir_program_lower_d3dbc(struct vsir_program *program, uint64_ vsir_transform(&ctx, vsir_program_normalise_ps1_output); } + if (program->shader_version.major == 3 && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX) + vsir_transform(&ctx, vsir_program_normalise_vs3_point_size_output); + if (program->shader_version.major <= 2 && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX) + vsir_transform(&ctx, vsir_program_clamp_vs2_color_output); + if (TRACE_ON() && ctx.result >= 0) vsir_program_trace(program); diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c index 2049871752c..00604c24031 100644 --- a/libs/vkd3d/libs/vkd3d-shader/msl.c +++ b/libs/vkd3d/libs/vkd3d-shader/msl.c @@ -73,13 +73,15 @@ struct msl_resource_type_info static void msl_print_subscript(struct vkd3d_string_buffer *buffer, struct msl_generator *gen, const struct vsir_src_operand *rel_addr, unsigned int offset); -static void VKD3D_PRINTF_FUNC(3, 4) msl_compiler_error(struct msl_generator *gen, - enum vkd3d_shader_error error, const char *fmt, ...) +#define msl_compiler_error(gen, error, ...) \ + msl_compiler_error_(gen, error, __FUNCTION__, __VA_ARGS__) +static void VKD3D_PRINTF_FUNC(4, 5) msl_compiler_error_(struct msl_generator *gen, + enum vkd3d_shader_error error, const char *function, const char *fmt, ...) { va_list args; va_start(args, fmt); - vkd3d_shader_verror(gen->message_context, &gen->location, error, fmt, args); + vkd3d_shader_verror(gen->message_context, &gen->location, error, function, fmt, args); va_end(args); gen->failed = true; } @@ -307,7 +309,7 @@ static void msl_print_uav_name(struct vkd3d_string_buffer *buffer, struct msl_ge } static enum msl_data_type msl_print_register_name(struct vkd3d_string_buffer *buffer, - struct msl_generator *gen, const struct vkd3d_shader_register *reg) + struct msl_generator *gen, const struct vsir_operand *reg) { const struct vkd3d_shader_descriptor_info1 *descriptor; unsigned int binding, cbv_id, cbv_idx; @@ -458,6 +460,14 @@ static enum msl_data_type msl_print_register_name(struct vkd3d_string_buffer *bu vkd3d_string_buffer_printf(buffer, "v_local_thread_index"); return MSL_DATA_UNION; + case VKD3DSPR_OUTSTENCILREF: + if (gen->program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled stencil reference output in shader type #%x.", + gen->program->shader_version.type); + vkd3d_string_buffer_printf(buffer, "o_stencil_ref"); + return MSL_DATA_UNION; + case VKD3DSPR_UNDEF: switch (reg->dimension) { @@ -556,7 +566,7 @@ static void msl_print_bitcast(struct vkd3d_string_buffer *dst, struct msl_genera static void msl_print_src_with_type(struct vkd3d_string_buffer *buffer, struct msl_generator *gen, const struct vsir_src_operand *vsir_src, uint32_t mask, enum vsir_data_type data_type) { - const struct vkd3d_shader_register *reg = &vsir_src->reg; + const struct vsir_operand *reg = &vsir_src->reg; struct vkd3d_string_buffer *register_name; enum msl_data_type src_data_type; @@ -1957,6 +1967,12 @@ static void msl_generate_output_struct_declarations(struct msl_generator *gen) vkd3d_string_buffer_printf(buffer, "uint shader_out_mask [[sample_mask]];\n"); } + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_OUTSTENCILREF)) + { + msl_print_indent(gen->buffer, 1); + vkd3d_string_buffer_printf(buffer, "uint shader_out_stencil_ref [[stencil]];\n"); + } + vkd3d_string_buffer_printf(buffer, "};\n\n"); } @@ -2132,6 +2148,8 @@ static void msl_generate_entrypoint_epilogue(struct msl_generator *gen) if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_SAMPLEMASK)) vkd3d_string_buffer_printf(gen->buffer, " output.shader_out_mask = o_mask.u;\n"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_OUTSTENCILREF)) + vkd3d_string_buffer_printf(gen->buffer, " output.shader_out_stencil_ref = o_stencil_ref.u;\n"); } static void msl_generate_entrypoint(struct msl_generator *gen) @@ -2220,6 +2238,8 @@ static void msl_generate_entrypoint(struct msl_generator *gen) vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 v_local_thread_id;\n"); if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_LOCALTHREADINDEX)) vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 v_local_thread_index;\n"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_OUTSTENCILREF)) + vkd3d_string_buffer_printf(gen->buffer, " vkd3d_scalar o_stencil_ref;\n"); vkd3d_string_buffer_printf(gen->buffer, "\n"); msl_generate_entrypoint_prologue(gen); @@ -2239,6 +2259,8 @@ static void msl_generate_entrypoint(struct msl_generator *gen) vkd3d_string_buffer_printf(gen->buffer, ", v_local_thread_id"); if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_LOCALTHREADINDEX)) vkd3d_string_buffer_printf(gen->buffer, ", v_local_thread_index"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_OUTSTENCILREF)) + vkd3d_string_buffer_printf(gen->buffer, ", o_stencil_ref"); if (gen->program->descriptors.descriptor_count) vkd3d_string_buffer_printf(gen->buffer, ", descriptors"); vkd3d_string_buffer_printf(gen->buffer, ");\n\n"); @@ -2252,18 +2274,39 @@ static void msl_generate_entrypoint(struct msl_generator *gen) static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader_code *out) { + enum vsir_global_flags flags = gen->program->global_flags; struct vkd3d_shader_instruction *ins; struct vsir_program_iterator it; + static const uint64_t ignored_flags = VKD3DSGF_REFACTORING_ALLOWED + | VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL + | VKD3DSGF_BIND_FOR_DURATION + | VKD3DSGF_ENABLE_STENCIL_REF; + MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); vkd3d_string_buffer_printf(gen->buffer, "#include <metal_stdlib>\n"); vkd3d_string_buffer_printf(gen->buffer, "using namespace metal;\n\n"); - if (gen->program->global_flags & ~(VKD3DSGF_REFACTORING_ALLOWED | VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL)) + if (flags & ignored_flags) + { + TRACE("Ignoring global flags %#"PRIx64".\n", flags & ignored_flags); + flags &= ~ignored_flags; + } + + if (flags) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)gen->program->global_flags); + "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)flags); + + if (gen->program->f16_denormal_mode != VKD3D_SHADER_DENORMAL_MODE_ANY + || gen->program->f32_denormal_mode != VKD3D_SHADER_DENORMAL_MODE_ANY + || gen->program->f64_denormal_mode != VKD3D_SHADER_DENORMAL_MODE_ANY) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED, + "Cannot emit denormal modes. The target environment does not support float controls."); + return VKD3D_ERROR; + } vkd3d_string_buffer_printf(gen->buffer, "union vkd3d_scalar\n{\n"); vkd3d_string_buffer_printf(gen->buffer, " uint u;\n"); @@ -2320,6 +2363,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader vkd3d_string_buffer_printf(gen->buffer, ", thread vkd3d_vec4 &v_local_thread_id"); if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_LOCALTHREADINDEX)) vkd3d_string_buffer_printf(gen->buffer, ", thread vkd3d_vec4 &v_local_thread_index"); + if (bitmap_is_set(gen->program->io_dcls, VKD3DSPR_OUTSTENCILREF)) + vkd3d_string_buffer_printf(gen->buffer, ", thread vkd3d_scalar &o_stencil_ref"); if (gen->program->descriptors.descriptor_count) vkd3d_string_buffer_printf(gen->buffer, ", constant descriptor *descriptors"); vkd3d_string_buffer_printf(gen->buffer, ")\n{\n"); diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h index 9217237d8d3..c1d752ccb7a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.h +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.h @@ -64,7 +64,7 @@ struct preproc_expansion struct preproc_text *arg_values; /* Back-pointer to the macro, if this expansion a macro body. This is * necessary so that argument tokens can be correctly replaced. */ - struct preproc_macro *macro; + const struct preproc_macro *macro; }; struct preproc_macro @@ -109,7 +109,7 @@ struct preproc_ctx */ struct preproc_func_state { - struct preproc_macro *macro; + const struct preproc_macro *macro; size_t arg_count; enum { @@ -137,8 +137,10 @@ void preproc_close_include(struct preproc_ctx *ctx, const struct vkd3d_shader_co struct preproc_macro *preproc_find_macro(struct preproc_ctx *ctx, const char *name); void preproc_free_macro(struct preproc_macro *macro); bool preproc_push_include(struct preproc_ctx *ctx, char *filename, const struct vkd3d_shader_code *code); -void preproc_warning(struct preproc_ctx *ctx, const struct vkd3d_shader_location *loc, - enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(4, 5); +#define preproc_warning(ctx, loc, error, ...) \ + preproc_warning_(ctx, loc, error, __FUNCTION__, __VA_ARGS__) +void preproc_warning_(struct preproc_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *function, const char *format, ...) VKD3D_PRINTF_FUNC(5, 6); static inline struct preproc_file *preproc_get_top_file(struct preproc_ctx *ctx) { diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index d7130f57c34..9b7760ebb48 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -18,6 +18,10 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ +%top{ +#include "config.h" +} + %{ #include "preproc.tab.h" @@ -211,19 +215,23 @@ INT_SUFFIX [uUlL]{0,2} %% -static void preproc_func_state_cleanup_arg_values(struct preproc_func_state *f) +static void preproc_func_state_cleanup(struct preproc_func_state *f) { - const struct preproc_macro *m; + const struct preproc_macro *m = f->macro; size_t i; - if (!(m = f->macro)) - return; - - for (i = 0; i < m->arg_count; ++i) + if (f->arg_values) { - vkd3d_string_buffer_cleanup(&f->arg_values[i].text); + for (i = 0; i < m->arg_count; ++i) + { + vkd3d_string_buffer_cleanup(&f->arg_values[i].text); + } + free(f->arg_values); + f->arg_values = NULL; } - free(f->arg_values); + f->state = STATE_NONE; + f->arg_count = 0; + f->macro = NULL; } static void update_location(struct preproc_ctx *ctx) @@ -265,7 +273,7 @@ static bool preproc_is_writing(struct preproc_ctx *ctx) * function-like macro bodies and their arguments. */ static bool should_concat(struct preproc_ctx *ctx) { - struct preproc_macro *macro; + const struct preproc_macro *macro; if (!ctx->expansion_count) return false; @@ -368,7 +376,7 @@ static void preproc_text_add(struct preproc_text *text, const char *string) } static bool preproc_push_expansion(struct preproc_ctx *ctx, - const struct preproc_text *text, struct preproc_macro *macro, struct preproc_text *arg_values) + const struct preproc_text *text, const struct preproc_macro *macro, struct preproc_text *arg_values) { struct preproc_expansion *exp; @@ -674,8 +682,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) const char *name = func_state->macro->name; ctx->lookahead_token = token; - func_state->macro = NULL; - func_state->state = STATE_NONE; + preproc_func_state_cleanup(func_state); if (ctx->current_directive) return return_token(T_IDENTIFIER, lval, name); @@ -740,31 +747,34 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) case '}': if (!--func_state->paren_depth) { + const struct preproc_macro *macro = func_state->macro; + /* Not only can you end a macro with ] or }, if * you do, it's treated as part of the last * argument. */ if (token != ')' && current_arg) preproc_text_add(current_arg, text); - if (++func_state->arg_count == func_state->macro->arg_count) + if (++func_state->arg_count == macro->arg_count) { - preproc_push_expansion(ctx, &func_state->macro->body, - func_state->macro, func_state->arg_values); + preproc_push_expansion(ctx, ¯o->body, macro, func_state->arg_values); + func_state->arg_values = NULL; } else { preproc_warning(ctx, lloc, VKD3D_SHADER_WARNING_PP_ARGUMENT_COUNT_MISMATCH, "Wrong number of arguments to macro \"%s\": expected %zu, got %zu.", - func_state->macro->name, func_state->macro->arg_count, func_state->arg_count); + macro->name, macro->arg_count, func_state->arg_count); if (ctx->current_directive) - return return_token(T_IDENTIFIER, lval, func_state->macro->name); + { + preproc_func_state_cleanup(func_state); + return return_token(T_IDENTIFIER, lval, macro->name); + } - vkd3d_string_buffer_printf(&ctx->buffer, "%s ", func_state->macro->name); - preproc_func_state_cleanup_arg_values(func_state); + vkd3d_string_buffer_printf(&ctx->buffer, "%s ", macro->name); } - func_state->macro = NULL; - func_state->state = STATE_NONE; + preproc_func_state_cleanup(func_state); } else { @@ -887,6 +897,18 @@ int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, preproc_yyparse(ctx.scanner, &ctx); + switch (ctx.directive_func.state) + { + case STATE_NONE: + case STATE_IDENTIFIER: + break; + + case STATE_ARGS: + preproc_warning(&ctx, &loc, VKD3D_SHADER_WARNING_PP_UNTERMINATED_MACRO, + "Unterminated macro invocation."); + break; + } + switch (ctx.text_func.state) { case STATE_NONE: @@ -902,7 +924,8 @@ int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, break; } - preproc_func_state_cleanup_arg_values(&ctx.text_func); + preproc_func_state_cleanup(&ctx.directive_func); + preproc_func_state_cleanup(&ctx.text_func); while (ctx.file_count) preproc_pop_buffer(&ctx); yylex_destroy(ctx.scanner); diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.y b/libs/vkd3d/libs/vkd3d-shader/preproc.y index 95987831faa..7432bb12d2e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.y +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.y @@ -24,7 +24,6 @@ #include "vkd3d_shader_private.h" #include "preproc.h" #include <stdio.h> -#include <sys/stat.h> #define PREPROC_YYLTYPE struct vkd3d_shader_location @@ -45,31 +44,30 @@ int preproc_yylex(PREPROC_YYSTYPE *yylval_param, PREPROC_YYLTYPE *yylloc_param, %code { +#include "vkd3d_shader_utils.h" #define YYLLOC_DEFAULT(cur, rhs, n) (cur) = YYRHSLOC(rhs, !!n) -#ifndef S_ISREG -# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) -#endif - -static void preproc_error(struct preproc_ctx *ctx, const struct vkd3d_shader_location *loc, - enum vkd3d_shader_error error, const char *format, ...) +#define preproc_error(ctx, loc, error, ...) \ + preproc_error_(ctx, loc, error, __FUNCTION__, __VA_ARGS__) +static void VKD3D_PRINTF_FUNC(5, 6) preproc_error_(struct preproc_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *function, const char *format, ...) { va_list args; va_start(args, format); - vkd3d_shader_verror(ctx->message_context, loc, error, format, args); + vkd3d_shader_verror(ctx->message_context, loc, error, function, format, args); va_end(args); ctx->error = true; } -void preproc_warning(struct preproc_ctx *ctx, const struct vkd3d_shader_location *loc, - enum vkd3d_shader_error error, const char *format, ...) +void preproc_warning_(struct preproc_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *function, const char *format, ...) { va_list args; va_start(args, format); - vkd3d_shader_vwarning(ctx->message_context, loc, error, format, args); + vkd3d_shader_vwarning(ctx->message_context, loc, error, function, format, args); va_end(args); } @@ -155,11 +153,7 @@ static bool preproc_push_if(struct preproc_ctx *ctx, bool condition) static int default_open_include(const char *filename, bool local, const char *parent_data, void *context, struct vkd3d_shader_code *out) { - uint8_t *data, *new_data; - size_t size = 4096; - struct stat st; - size_t pos = 0; - size_t ret; + enum vkd3d_result res; FILE *f; if (!(f = fopen(filename, "rb"))) @@ -168,63 +162,9 @@ static int default_open_include(const char *filename, bool local, return VKD3D_ERROR; } - if (fstat(fileno(f), &st) == -1) - { - ERR("Could not stat file %s.\n", debugstr_a(filename)); - fclose(f); - return VKD3D_ERROR; - } - - if (S_ISREG(st.st_mode)) - size = st.st_size; - - if (!size) - { - fclose(f); - - out->code = NULL; - out->size = 0; - - return VKD3D_OK; - } - - if (!(data = vkd3d_malloc(size))) - { - fclose(f); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - - for (;;) - { - if (pos >= size) - { - if (size > SIZE_MAX / 2 || !(new_data = vkd3d_realloc(data, size * 2))) - { - vkd3d_free(data); - fclose(f); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - data = new_data; - size *= 2; - } - - if (!(ret = fread(&data[pos], 1, size - pos, f))) - break; - pos += ret; - } - - if (!feof(f)) - { - vkd3d_free(data); - return VKD3D_ERROR; - } - + res = vkd3d_shader_code_from_file(out, f); fclose(f); - - out->code = data; - out->size = pos; - - return VKD3D_OK; + return res; } static void default_close_include(const struct vkd3d_shader_code *code, void *context) diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index dd62f34eb08..be798c147cd 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -38,7 +38,7 @@ #define VKD3D_SPIRV_VERSION_1_0 0x00010000 #define VKD3D_SPIRV_VERSION_1_3 0x00010300 #define VKD3D_SPIRV_GENERATOR_ID 18 -#define VKD3D_SPIRV_GENERATOR_VERSION 19 +#define VKD3D_SPIRV_GENERATOR_VERSION 20 #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) #ifndef VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER # define VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER 0 @@ -236,24 +236,28 @@ struct spirv_parser struct vkd3d_string_buffer *text; }; -static void VKD3D_PRINTF_FUNC(3, 4) spirv_parser_error(struct spirv_parser *parser, - enum vkd3d_shader_error error, const char *format, ...) +#define spirv_parser_error(parser, error, ...) \ + spirv_parser_error_(parser, error, __FUNCTION__, __VA_ARGS__) +static void VKD3D_PRINTF_FUNC(4, 5) spirv_parser_error_(struct spirv_parser *parser, + enum vkd3d_shader_error error, const char *function, const char *format, ...) { va_list args; va_start(args, format); - vkd3d_shader_verror(parser->message_context, &parser->location, error, format, args); + vkd3d_shader_verror(parser->message_context, &parser->location, error, function, format, args); va_end(args); parser->failed = true; } -static void VKD3D_PRINTF_FUNC(3, 4) spirv_parser_warning(struct spirv_parser *parser, - enum vkd3d_shader_error error, const char *format, ...) +#define spirv_parser_warning(parser, error, ...) \ + spirv_parser_warning_(parser, error, __FUNCTION__, __VA_ARGS__) +static void VKD3D_PRINTF_FUNC(4, 5) spirv_parser_warning_(struct spirv_parser *parser, + enum vkd3d_shader_error error, const char *function, const char *format, ...) { va_list args; va_start(args, format); - vkd3d_shader_vwarning(parser->message_context, &parser->location, error, format, args); + vkd3d_shader_vwarning(parser->message_context, &parser->location, error, function, format, args); va_end(args); } @@ -840,7 +844,7 @@ static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, enum vkd3d_s if (!vkd3d_spirv_binary_to_text(spirv, NULL, environment, formatting, &text, &message_context)) { - vkd3d_shader_trace_text(text.code, text.size); + TRACE_TEXT(text.code, text.size); vkd3d_shader_free_shader_code(&text); } @@ -2587,6 +2591,9 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_shader_stencil_export"); if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityShaderViewportIndexLayerEXT)) vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_shader_viewport_index_layer"); + if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityDenormPreserve) + || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityDenormFlushToZero)) + vkd3d_spirv_build_op_extension(&stream, "SPV_KHR_float_controls"); if (builder->ext_instr_set_glsl_450) vkd3d_spirv_build_op_ext_inst_import(&stream, builder->ext_instr_set_glsl_450, "GLSL.std.450"); @@ -2712,7 +2719,7 @@ struct vkd3d_symbol_register_data struct vkd3d_symbol_resource_data { - struct vkd3d_shader_register_range range; + struct vsir_register_range range; enum vsir_data_type sampled_type; uint32_t type_id; const struct vkd3d_spirv_resource_type *resource_type_info; @@ -2726,7 +2733,7 @@ struct vkd3d_symbol_resource_data struct vkd3d_symbol_sampler_data { - struct vkd3d_shader_register_range range; + struct vsir_register_range range; }; struct vkd3d_descriptor_binding_address @@ -2794,8 +2801,7 @@ static void vkd3d_symbol_free(struct rb_entry *entry, void *context) vkd3d_free(s); } -static void vkd3d_symbol_make_register(struct vkd3d_symbol *symbol, - const struct vkd3d_shader_register *reg) +static void vkd3d_symbol_make_register(struct vkd3d_symbol *symbol, const struct vsir_operand *reg) { symbol->type = VKD3D_SYMBOL_REGISTER; memset(&symbol->key, 0, sizeof(symbol->key)); @@ -2842,8 +2848,7 @@ static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, uint32_t symbol->info.reg.is_aggregate = false; } -static void vkd3d_symbol_make_resource(struct vkd3d_symbol *symbol, - const struct vkd3d_shader_register *reg) +static void vkd3d_symbol_make_resource(struct vkd3d_symbol *symbol, const struct vsir_operand *reg) { symbol->type = VKD3D_SYMBOL_RESOURCE; memset(&symbol->key, 0, sizeof(symbol->key)); @@ -2851,8 +2856,7 @@ static void vkd3d_symbol_make_resource(struct vkd3d_symbol *symbol, symbol->key.resource.idx = reg->idx[0].offset; } -static void vkd3d_symbol_make_sampler(struct vkd3d_symbol *symbol, - const struct vkd3d_shader_register *reg) +static void vkd3d_symbol_make_sampler(struct vkd3d_symbol *symbol, const struct vsir_operand *reg) { symbol->type = VKD3D_SYMBOL_SAMPLER; memset(&symbol->key, 0, sizeof(symbol->key)); @@ -2860,7 +2864,7 @@ static void vkd3d_symbol_make_sampler(struct vkd3d_symbol *symbol, } static void vkd3d_symbol_make_combined_sampler(struct vkd3d_symbol *symbol, - const struct vkd3d_shader_register *resource_reg, unsigned int sampler_space, unsigned int sampler_index) + const struct vsir_operand *resource_reg, unsigned int sampler_space, unsigned int sampler_index) { symbol->type = VKD3D_SYMBOL_COMBINED_SAMPLER; memset(&symbol->key, 0, sizeof(symbol->key)); @@ -2900,7 +2904,7 @@ static const char *debug_vkd3d_symbol(const struct vkd3d_symbol *symbol) struct vkd3d_push_constant_buffer_binding { - struct vkd3d_shader_register reg; + struct vsir_operand reg; struct vkd3d_shader_push_constant_buffer pc; unsigned int size; }; @@ -3155,6 +3159,9 @@ static struct spirv_compiler *spirv_compiler_create(struct vsir_program *program case VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT: case VKD3D_SHADER_COMPILE_OPTION_WARN_IMPLICIT_TRUNCATION: case VKD3D_SHADER_COMPILE_OPTION_INCLUDE_EMPTY_BUFFERS_IN_EFFECTS: + case VKD3D_SHADER_COMPILE_OPTION_DENORMAL_MODE_F16: + case VKD3D_SHADER_COMPILE_OPTION_DENORMAL_MODE_F32: + case VKD3D_SHADER_COMPILE_OPTION_DENORMAL_MODE_F64: /* Explicitly ignored for this target. */ break; @@ -3280,7 +3287,7 @@ static bool spirv_compiler_check_shader_visibility(const struct spirv_compiler * } static struct vkd3d_push_constant_buffer_binding *spirv_compiler_find_push_constant_buffer( - const struct spirv_compiler *compiler, const struct vkd3d_shader_register_range *range) + const struct spirv_compiler *compiler, const struct vsir_register_range *range) { unsigned int register_space = range->space; unsigned int reg_idx = range->first; @@ -3304,7 +3311,7 @@ static struct vkd3d_push_constant_buffer_binding *spirv_compiler_find_push_const } static bool spirv_compiler_has_combined_sampler_for_resource(const struct spirv_compiler *compiler, - const struct vkd3d_shader_register_range *range) + const struct vsir_register_range *range) { const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; const struct vkd3d_shader_combined_resource_sampler *combined_sampler; @@ -3332,7 +3339,7 @@ static bool spirv_compiler_has_combined_sampler_for_resource(const struct spirv_ } static bool spirv_compiler_has_combined_sampler_for_sampler(const struct spirv_compiler *compiler, - const struct vkd3d_shader_register_range *range) + const struct vsir_register_range *range) { const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; const struct vkd3d_shader_combined_resource_sampler *combined_sampler; @@ -3359,29 +3366,33 @@ static bool spirv_compiler_has_combined_sampler_for_sampler(const struct spirv_c return false; } -static void VKD3D_PRINTF_FUNC(3, 4) spirv_compiler_error(struct spirv_compiler *compiler, - enum vkd3d_shader_error error, const char *format, ...) +#define spirv_compiler_error(compiler, error, ...) \ + spirv_compiler_error_(compiler, error, __FUNCTION__, __VA_ARGS__) +static void VKD3D_PRINTF_FUNC(4, 5) spirv_compiler_error_(struct spirv_compiler *compiler, + enum vkd3d_shader_error error, const char *function, const char *format, ...) { va_list args; va_start(args, format); - vkd3d_shader_verror(compiler->message_context, &compiler->location, error, format, args); + vkd3d_shader_verror(compiler->message_context, &compiler->location, error, function, format, args); va_end(args); compiler->failed = true; } -static void VKD3D_PRINTF_FUNC(3, 4) spirv_compiler_warning(struct spirv_compiler *compiler, - enum vkd3d_shader_error error, const char *format, ...) +#define spirv_compiler_warning(compiler, error, ...) \ + spirv_compiler_warning_(compiler, error, __FUNCTION__, __VA_ARGS__) +static void VKD3D_PRINTF_FUNC(4, 5) spirv_compiler_warning_(struct spirv_compiler *compiler, + enum vkd3d_shader_error error, const char *function, const char *format, ...) { va_list args; va_start(args, format); - vkd3d_shader_vwarning(compiler->message_context, &compiler->location, error, format, args); + vkd3d_shader_vwarning(compiler->message_context, &compiler->location, error, function, format, args); va_end(args); } -static struct vkd3d_string_buffer *vkd3d_shader_register_range_string(struct spirv_compiler *compiler, - const struct vkd3d_shader_register_range *range) +static struct vkd3d_string_buffer *vsir_register_range_string(struct spirv_compiler *compiler, + const struct vsir_register_range *range) { struct vkd3d_string_buffer *buffer = vkd3d_string_buffer_get(&compiler->string_buffers); @@ -3473,8 +3484,8 @@ static uint32_t spirv_compiler_get_label_id(struct spirv_compiler *compiler, uns } static struct vkd3d_shader_descriptor_binding spirv_compiler_get_descriptor_binding( - struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, - const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, + struct spirv_compiler *compiler, const struct vsir_operand *reg, + const struct vsir_register_range *range, enum vkd3d_shader_resource_type resource_type, bool is_uav_counter, struct vkd3d_descriptor_binding_address *binding_address) { const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; @@ -3495,7 +3506,6 @@ static struct vkd3d_shader_descriptor_binding spirv_compiler_get_descriptor_bind descriptor_type = VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER; else { - FIXME("Unhandled register type %#x.\n", reg->type); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE, "Encountered invalid/unhandled register type %#x.", reg->type); goto done; @@ -3524,12 +3534,9 @@ static struct vkd3d_shader_descriptor_binding spirv_compiler_get_descriptor_bind continue; if (current->offset) - { - FIXME("Atomic counter offsets are not supported yet.\n"); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_DESCRIPTOR_BINDING, "Descriptor binding for UAV counter %u, space %u has unsupported ‘offset’ %u.", range->first, range->space, current->offset); - } binding_address->binding_base_idx = current->register_index - (binding_offsets ? binding_offsets[i].static_offset : 0); @@ -3537,11 +3544,8 @@ static struct vkd3d_shader_descriptor_binding spirv_compiler_get_descriptor_bind return current->binding; } if (shader_interface->uav_counter_count) - { - FIXME("Could not find descriptor binding for UAV counter %u, space %u.\n", range->first, range->space); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_BINDING_NOT_FOUND, "Could not find descriptor binding for UAV counter %u, space %u.", range->first, range->space); - } } else { @@ -3568,10 +3572,9 @@ static struct vkd3d_shader_descriptor_binding spirv_compiler_get_descriptor_bind } if (shader_interface->binding_count) { - struct vkd3d_string_buffer *buffer = vkd3d_shader_register_range_string(compiler, range); + struct vkd3d_string_buffer *buffer = vsir_register_range_string(compiler, range); const char *range_str = buffer ? buffer->buffer : ""; - FIXME("Could not find descriptor binding for type %#x, space %u, registers %s, shader type %#x.\n", - descriptor_type, range->space, range_str, compiler->shader_type); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_BINDING_NOT_FOUND, "Could not find descriptor binding for type %#x, space %u, registers %s, shader type %#x.", descriptor_type, range->space, range_str, compiler->shader_type); @@ -3746,8 +3749,8 @@ static uint32_t spirv_compiler_get_constant_uint64_vector(struct spirv_compiler return spirv_compiler_get_constant64(compiler, VSIR_DATA_U64, component_count, values); } -static uint32_t spirv_compiler_get_type_id_for_reg(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t write_mask) +static uint32_t spirv_compiler_get_type_id_for_operand(struct spirv_compiler *compiler, + const struct vsir_operand *reg, uint32_t write_mask) { return spirv_get_type_id(compiler, reg->data_type, vsir_write_mask_component_count(write_mask)); } @@ -3755,11 +3758,10 @@ static uint32_t spirv_compiler_get_type_id_for_reg(struct spirv_compiler *compil static uint32_t spirv_compiler_get_type_id_for_dst(struct spirv_compiler *compiler, const struct vsir_dst_operand *dst) { - return spirv_compiler_get_type_id_for_reg(compiler, &dst->reg, dst->write_mask); + return spirv_compiler_get_type_id_for_operand(compiler, &dst->reg, dst->write_mask); } -static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_size, - const struct vkd3d_shader_register *reg) +static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_size, const struct vsir_operand *reg) { unsigned int idx; @@ -3860,7 +3862,7 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s /* TODO: UAV counters: vkd3d_spirv_build_op_name(builder, counter_var_id, "u%u_counter", reg->idx[0].offset); */ static void spirv_compiler_emit_register_debug_name(struct vkd3d_spirv_builder *builder, - uint32_t id, const struct vkd3d_shader_register *reg) + uint32_t id, const struct vsir_operand *reg) { char debug_name[256]; if (spirv_compiler_get_register_name(debug_name, ARRAY_SIZE(debug_name), reg)) @@ -4109,7 +4111,7 @@ static uint32_t spirv_compiler_emit_load_src(struct spirv_compiler *compiler, const struct vsir_src_operand *src, uint32_t write_mask); static uint32_t spirv_compiler_emit_register_addressing(struct spirv_compiler *compiler, - const struct vkd3d_shader_register_index *reg_index) + const struct vsir_register_index *reg_index) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t type_id, addr_id; @@ -4142,12 +4144,12 @@ struct vkd3d_shader_register_info }; static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_info *register_info) + const struct vsir_operand *reg, struct vkd3d_shader_register_info *register_info) { struct vkd3d_symbol reg_symbol, *symbol; struct rb_entry *entry; - VKD3D_ASSERT(!register_is_constant_or_undef(reg)); + VKD3D_ASSERT(!vsir_operand_is_constant_or_undef(reg)); if (reg->type == VKD3DSPR_TEMP) { @@ -4243,24 +4245,19 @@ static bool spirv_compiler_enable_descriptor_indexing(struct spirv_compiler *com } static uint32_t spirv_compiler_get_descriptor_index(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, const struct vkd3d_symbol *array_symbol, + const struct vsir_operand *reg, const struct vkd3d_symbol *array_symbol, unsigned int binding_base_idx, enum vkd3d_shader_resource_type resource_type) { const struct vkd3d_symbol_descriptor_array *array_key = &array_symbol->key.descriptor_array; - struct vkd3d_shader_register_index index = reg->idx[1]; + struct vsir_register_index index = reg->idx[1]; unsigned int push_constant_index; uint32_t index_id; - if ((push_constant_index = array_key->push_constant_index) != ~0u || index.rel_addr) - { - if (!spirv_compiler_enable_descriptor_indexing(compiler, reg->type, resource_type)) - { - FIXME("The target environment does not support descriptor indexing.\n"); - spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_IDX_UNSUPPORTED, - "Cannot dynamically index a descriptor array of type %#x, id %u. " - "The target environment does not support descriptor indexing.", reg->type, reg->idx[0].offset); - } - } + if (((push_constant_index = array_key->push_constant_index) != ~0u || index.rel_addr) + && !spirv_compiler_enable_descriptor_indexing(compiler, reg->type, resource_type)) + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_IDX_UNSUPPORTED, + "Cannot dynamically index a descriptor array of type %#x, id %u. " + "The target environment does not support descriptor indexing.", reg->type, reg->idx[0].offset); index.offset -= binding_base_idx; index_id = spirv_compiler_emit_register_addressing(compiler, &index); @@ -4291,7 +4288,7 @@ static uint32_t spirv_compiler_get_descriptor_index(struct spirv_compiler *compi } static void spirv_compiler_emit_dereference_register(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_info *register_info) + const struct vsir_operand *reg, struct vkd3d_shader_register_info *register_info) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; unsigned int component_count, index_count = 0; @@ -4332,7 +4329,7 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp FIXME("Relative addressing not implemented.\n"); /* Handle arrayed registers, e.g. v[3][0]. */ - if (reg->idx_count > 1 && !vsir_register_is_descriptor(reg)) + if (reg->idx_count > 1 && !vsir_operand_is_descriptor(reg)) indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); } @@ -4352,8 +4349,7 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp } } -static uint32_t spirv_compiler_get_register_id(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg) +static uint32_t spirv_compiler_get_register_id(struct spirv_compiler *compiler, const struct vsir_operand *reg) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; struct vkd3d_shader_register_info register_info; @@ -4373,7 +4369,7 @@ static bool vkd3d_swizzle_is_equal(uint32_t dst_write_mask, uint32_t swizzle, ui return vkd3d_compact_swizzle(VKD3D_SHADER_NO_SWIZZLE, dst_write_mask) == vkd3d_compact_swizzle(swizzle, write_mask); } -static bool vkd3d_swizzle_is_scalar(uint32_t swizzle, const struct vkd3d_shader_register *reg) +static bool vkd3d_swizzle_is_scalar(uint32_t swizzle, const struct vsir_operand *reg) { unsigned int component_idx = vsir_swizzle_get_component(swizzle, 0); @@ -4531,7 +4527,7 @@ static uint32_t spirv_compiler_emit_bool_to_double(struct spirv_compiler *compil } static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t swizzle, uint32_t write_mask) + const struct vsir_operand *reg, uint32_t swizzle, uint32_t write_mask) { unsigned int component_count = vsir_write_mask_component_count(write_mask); uint32_t values[VKD3D_VEC4_SIZE] = {0}; @@ -4557,7 +4553,7 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile } static uint32_t spirv_compiler_emit_load_constant64(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t swizzle, uint32_t write_mask) + const struct vsir_operand *reg, uint32_t swizzle, uint32_t write_mask) { unsigned int component_count = vsir_write_mask_component_count(write_mask); uint64_t values[VKD3D_DVEC2_SIZE] = {0}; @@ -4583,7 +4579,7 @@ static uint32_t spirv_compiler_emit_load_constant64(struct spirv_compiler *compi } static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t write_mask) + const struct vsir_operand *reg, uint32_t write_mask) { unsigned int component_count = vsir_write_mask_component_count(write_mask); struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -4596,16 +4592,15 @@ static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler, return vkd3d_spirv_get_op_undef(builder, type_id); } -static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t swizzle, - uint32_t write_mask, const struct vkd3d_shader_register_info *reg_info) +static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, const struct vsir_operand *reg, + uint32_t swizzle, uint32_t write_mask, const struct vkd3d_shader_register_info *reg_info) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t type_id, ptr_type_id, index, reg_id, val_id; unsigned int component_idx, reg_component_count; uint32_t skipped_component_mask; - VKD3D_ASSERT(!register_is_constant_or_undef(reg)); + VKD3D_ASSERT(!vsir_operand_is_constant_or_undef(reg)); VKD3D_ASSERT(vsir_write_mask_component_count(write_mask) == 1); component_idx = vsir_write_mask_get_component_idx(write_mask); @@ -4716,7 +4711,7 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil } static void spirv_compiler_set_ssa_register_info(const struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t write_mask, uint32_t val_id) + const struct vsir_operand *reg, uint32_t write_mask, uint32_t val_id) { unsigned int i = reg->idx[0].offset; VKD3D_ASSERT(i < compiler->ssa_register_count); @@ -4726,7 +4721,7 @@ static void spirv_compiler_set_ssa_register_info(const struct spirv_compiler *co } static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t swizzle, uint32_t write_mask) + const struct vsir_operand *reg, uint32_t swizzle, uint32_t write_mask) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; struct vkd3d_shader_register_info reg_info; @@ -4905,7 +4900,7 @@ static void spirv_compiler_emit_store(struct spirv_compiler *compiler, uint32_t } static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t write_mask, uint32_t val_id) + const struct vsir_operand *reg, uint32_t write_mask, uint32_t val_id) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; struct vkd3d_shader_register_info reg_info; @@ -4913,7 +4908,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, enum vsir_data_type data_type; uint32_t type_id; - VKD3D_ASSERT(!register_is_constant_or_undef(reg)); + VKD3D_ASSERT(!vsir_operand_is_constant_or_undef(reg)); if (reg->type == VKD3DSPR_SSA) { @@ -4944,7 +4939,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, } static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t write_mask, uint32_t val_id) + const struct vsir_operand *reg, uint32_t write_mask, uint32_t val_id) { unsigned int component_count = vsir_write_mask_component_count(write_mask); struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -4961,7 +4956,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, one_id = spirv_compiler_get_constant_float_vector(compiler, 1.0f, component_count); } - type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); + type_id = spirv_compiler_get_type_id_for_operand(compiler, reg, write_mask); if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_glsl_std450_nclamp(builder, type_id, val_id, zero_id, one_id); @@ -5052,12 +5047,9 @@ static void spirv_compiler_decorate_builtin(struct spirv_compiler *compiler, case VKD3D_SHADER_TYPE_DOMAIN: if (!spirv_compiler_is_target_extension_supported(compiler, VKD3D_SHADER_SPIRV_EXTENSION_EXT_VIEWPORT_INDEX_LAYER)) - { - FIXME("The target environment does not support decoration Layer.\n"); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, "Cannot use SV_RenderTargetArrayIndex. " "The target environment does not support decoration Layer."); - } vkd3d_spirv_enable_capability(builder, SpvCapabilityShaderViewportIndexLayerEXT); break; @@ -5079,12 +5071,9 @@ static void spirv_compiler_decorate_builtin(struct spirv_compiler *compiler, case VKD3D_SHADER_TYPE_DOMAIN: if (!spirv_compiler_is_target_extension_supported(compiler, VKD3D_SHADER_SPIRV_EXTENSION_EXT_VIEWPORT_INDEX_LAYER)) - { - FIXME("The target environment does not support decoration ViewportIndex.\n"); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, "Cannot use SV_ViewportArrayIndex. " "The target environment does not support decoration ViewportIndex."); - } vkd3d_spirv_enable_capability(builder, SpvCapabilityShaderViewportIndexLayerEXT); break; @@ -5320,12 +5309,9 @@ static void spirv_compiler_emit_register_execution_mode(struct spirv_compiler *c case VKD3DSPR_OUTSTENCILREF: if (!spirv_compiler_is_target_extension_supported(compiler, VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT)) - { - FIXME("The target environment does not support stencil export.\n"); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, "Cannot export stencil reference value. " "The target environment does not support stencil export."); - } vkd3d_spirv_enable_capability(&compiler->spirv_builder, SpvCapabilityStencilExportEXT); spirv_compiler_emit_execution_mode(compiler, SpvExecutionModeStencilRefReplacingEXT, NULL, 0); break; @@ -5399,11 +5385,11 @@ static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct sp static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler) { - struct vkd3d_shader_register r; + struct vsir_operand r; VKD3D_ASSERT(compiler->shader_type == VKD3D_SHADER_TYPE_HULL); - vsir_register_init(&r, VKD3DSPR_OUTPOINTID, VSIR_DATA_F32, 0); + vsir_operand_init(&r, VKD3DSPR_OUTPOINTID, VSIR_DATA_F32, 0); return spirv_compiler_get_register_id(compiler, &r); } @@ -5555,7 +5541,7 @@ static const struct vkd3d_symbol *spirv_compiler_emit_io_register(struct spirv_c const struct vsir_dst_operand *dst) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_register *reg = &dst->reg; + const struct vsir_operand *reg = &dst->reg; const struct vkd3d_spirv_builtin *builtin; struct vkd3d_symbol reg_symbol; SpvStorageClass storage_class; @@ -5736,9 +5722,9 @@ static void spirv_compiler_emit_input(struct spirv_compiler *compiler, if (use_private_var) { - struct vkd3d_shader_register dst_reg; + struct vsir_operand dst_reg; - vsir_register_init(&dst_reg, reg_type, VSIR_DATA_F32, 1); + vsir_operand_init(&dst_reg, reg_type, VSIR_DATA_F32, 1); dst_reg.idx[0].offset = element_idx; type_id = spirv_get_type_id(compiler, data_type, input_component_count); @@ -6156,23 +6142,71 @@ static size_t spirv_compiler_get_current_function_location(struct spirv_compiler return builder->main_function_location; } +static void spirv_compiler_emit_denormal_mode(struct spirv_compiler *compiler, + unsigned int bit_width, enum vkd3d_shader_denormal_mode mode) +{ + if (mode == VKD3D_SHADER_DENORMAL_MODE_ANY) + return; + + if (!spirv_compiler_is_target_extension_supported(compiler, + VKD3D_SHADER_SPIRV_EXTENSION_KHR_FLOAT_CONTROLS)) + { + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, + "Cannot emit denormal mode for %u-bit floats. " + "The target environment does not support float controls.", bit_width); + return; + } + + if (compiler->program->f16_denormal_mode == VKD3D_SHADER_DENORMAL_MODE_PRESERVE) + { + vkd3d_spirv_enable_capability(&compiler->spirv_builder, SpvCapabilityDenormPreserve); + spirv_compiler_emit_execution_mode(compiler, SpvExecutionModeDenormPreserve, &bit_width, 1); + } + else if (compiler->program->f16_denormal_mode == VKD3D_SHADER_DENORMAL_MODE_FLUSH_TO_ZERO) + { + vkd3d_spirv_enable_capability(&compiler->spirv_builder, SpvCapabilityDenormFlushToZero); + spirv_compiler_emit_execution_mode(compiler, SpvExecutionModeDenormFlushToZero, &bit_width, 1); + } +} + static void spirv_compiler_emit_global_flags(struct spirv_compiler *compiler, enum vsir_global_flags flags) { + static const uint64_t ignored_flags = VKD3DSGF_REFACTORING_ALLOWED + | VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS + | VKD3DSGF_SKIP_OPTIMIZATION + | VKD3DSGF_BIND_FOR_DURATION + | VKD3DSGF_ENABLE_VP_AND_RT_ARRAY_INDEX + | VKD3DSGF_ENABLE_STENCIL_REF + | VKD3DSGF_ENABLE_UP_TO_64_UAVS + | VKD3DSGF_ENABLE_UAVS_AT_EVERY_STAGE + | VKD3DSGF_ENABLE_RASTERIZER_ORDERED_VIEWS; + if (flags & VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL) { spirv_compiler_emit_execution_mode(compiler, SpvExecutionModeEarlyFragmentTests, NULL, 0); flags &= ~VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; } + /* We're free to ignore this. There may be performance advantages to using + * 16-bit operations for minimum precision types when supported, but in + * terms of correctness the current behaviour should be fine. */ + if (flags & VKD3DSGF_ENABLE_MINIMUM_PRECISION) + { + WARN("Ignoring the \"enableMinimumPrecision\" global flag.\n"); + flags &= ~VKD3DSGF_ENABLE_MINIMUM_PRECISION; + } + + spirv_compiler_emit_denormal_mode(compiler, 32, compiler->program->f32_denormal_mode); + if (flags & (VKD3DSGF_ENABLE_DOUBLE_PRECISION_FLOAT_OPS | VKD3DSGF_ENABLE_11_1_DOUBLE_EXTENSIONS)) { if (compiler->features & VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLOAT64) { vkd3d_spirv_enable_capability(&compiler->spirv_builder, SpvCapabilityFloat64); + spirv_compiler_emit_denormal_mode(compiler, 64, compiler->program->f64_denormal_mode); } else { - WARN("Unsupported 64-bit float ops.\n"); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, "The target environment does not support 64-bit floating point."); } @@ -6182,39 +6216,41 @@ static void spirv_compiler_emit_global_flags(struct spirv_compiler *compiler, en if (flags & VKD3DSGF_ENABLE_INT64) { if (compiler->features & VKD3D_SHADER_COMPILE_OPTION_FEATURE_INT64) - { vkd3d_spirv_enable_capability(&compiler->spirv_builder, SpvCapabilityInt64); - } else - { - WARN("Unsupported 64-bit integer ops.\n"); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, "The target environment does not support 64-bit integers."); - } flags &= ~VKD3DSGF_ENABLE_INT64; } + if (flags & VKD3DSGF_ENABLE_RELAXED_TYPED_UAV_FORMATS) + { + if (!compiler->uav_read_without_format) + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, + "The target environment does not support accessing storage images without specifying a format."); + flags &= ~VKD3DSGF_ENABLE_RELAXED_TYPED_UAV_FORMATS; + } + if (flags & VKD3DSGF_ENABLE_WAVE_INTRINSICS) { if (!(compiler->features & VKD3D_SHADER_COMPILE_OPTION_FEATURE_WAVE_OPS)) - { - WARN("Unsupported wave ops.\n"); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, "The target environment does not support wave ops."); - } else if (!spirv_compiler_is_spirv_min_1_3_target(compiler)) - { - WARN("Wave ops enabled but environment does not support SPIR-V 1.3 or greater.\n"); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, "The target environment uses wave ops but does not support SPIR-V 1.3 or greater."); - } flags &= ~VKD3DSGF_ENABLE_WAVE_INTRINSICS; } - if (flags & ~(VKD3DSGF_REFACTORING_ALLOWED | VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS)) - FIXME("Unhandled global flags %#"PRIx64".\n", (uint64_t)flags); - else if (flags) - WARN("Unhandled global flags %#"PRIx64".\n", (uint64_t)flags); + if (flags & ignored_flags) + { + TRACE("Ignoring global flags %#"PRIx64".\n", flags & ignored_flags); + flags &= ~ignored_flags; + } + + if (flags) + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "Unhandled global flags %#"PRIx64".", (uint64_t)flags); } static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t count) @@ -6247,11 +6283,8 @@ static void spirv_compiler_allocate_ssa_register_ids(struct spirv_compiler *comp { VKD3D_ASSERT(!compiler->ssa_register_info); if (!(compiler->ssa_register_info = vkd3d_calloc(count, sizeof(*compiler->ssa_register_info)))) - { - ERR("Failed to allocate SSA register value id array, count %u.\n", count); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_OUT_OF_MEMORY, "Failed to allocate SSA register value id array of count %u.", count); - } compiler->ssa_register_count = count; } @@ -6261,17 +6294,17 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil const struct vkd3d_shader_indexable_temp *temp = &instruction->declaration.indexable_temp; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t id, type_id, length_id, ptr_type_id, init_id = 0; - struct vkd3d_shader_register reg; struct vkd3d_symbol reg_symbol; SpvStorageClass storage_class; size_t function_location; + struct vsir_operand reg; /* Indexable temps may be used by more than one function in hull shaders, and * declarations generally should not occur within VSIR code blocks unless function * scope is specified, e.g. DXIL alloca. */ storage_class = temp->has_function_scope ? SpvStorageClassFunction : SpvStorageClassPrivate; - vsir_register_init(®, VKD3DSPR_IDXTEMP, VSIR_DATA_F32, 1); + vsir_operand_init(®, VKD3DSPR_IDXTEMP, VSIR_DATA_F32, 1); reg.idx[0].offset = temp->register_idx; /* Alignment is supported only in the Kernel execution model and is an optimisation only. */ @@ -6386,7 +6419,7 @@ static void spirv_compiler_emit_push_constant_buffers(struct spirv_compiler *com static const struct vkd3d_shader_descriptor_info1 *spirv_compiler_get_descriptor_info( struct spirv_compiler *compiler, enum vkd3d_shader_descriptor_type type, - const struct vkd3d_shader_register_range *range) + const struct vsir_register_range *range) { const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = &compiler->program->descriptors; unsigned int register_last = (range->last == ~0u) ? range->first : range->last; @@ -6422,8 +6455,8 @@ static void spirv_compiler_decorate_descriptor(struct spirv_compiler *compiler, } static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler *compiler, - SpvStorageClass storage_class, uint32_t type_id, const struct vkd3d_shader_register *reg, - const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, + SpvStorageClass storage_class, uint32_t type_id, const struct vsir_operand *reg, + const struct vsir_register_range *range, enum vkd3d_shader_resource_type resource_type, const struct vkd3d_shader_descriptor_info1 *descriptor, bool is_uav_counter, struct vkd3d_descriptor_variable_info *var_info) { @@ -6497,7 +6530,7 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * } static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, - const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) + const struct vsir_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t vec4_id, array_type_id, length_id, struct_id, var_id; @@ -6505,11 +6538,11 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, unsigned int size_in_bytes = descriptor->buffer_size; struct vkd3d_push_constant_buffer_binding *push_cb; struct vkd3d_descriptor_variable_info var_info; - struct vkd3d_shader_register reg; struct vkd3d_symbol reg_symbol; + struct vsir_operand reg; unsigned int size; - vsir_register_init(®, VKD3DSPR_CONSTBUFFER, VSIR_DATA_F32, 3); + vsir_operand_init(®, VKD3DSPR_CONSTBUFFER, VSIR_DATA_F32, 3); reg.idx[0].offset = descriptor->register_id; reg.idx[1].offset = range->first; reg.idx[2].offset = range->last; @@ -6558,16 +6591,16 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, } static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compiler, - const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) + const struct vsir_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) { const SpvStorageClass storage_class = SpvStorageClassUniformConstant; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; struct vkd3d_descriptor_variable_info var_info; - struct vkd3d_shader_register reg; struct vkd3d_symbol reg_symbol; uint32_t type_id, var_id; + struct vsir_operand reg; - vsir_register_init(®, VKD3DSPR_SAMPLER, VSIR_DATA_F32, 1); + vsir_operand_init(®, VKD3DSPR_SAMPLER, VSIR_DATA_F32, 1); reg.idx[0].offset = descriptor->register_id; vkd3d_symbol_make_sampler(®_symbol, ®); @@ -6628,7 +6661,7 @@ static SpvImageFormat image_format_for_image_read(enum vsir_data_type data_type) } static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, const struct vkd3d_shader_register_range *range, + const struct vsir_operand *reg, const struct vsir_register_range *range, const struct vkd3d_spirv_resource_type *resource_type_info, enum vsir_data_type data_type, bool raw_structured) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -6657,7 +6690,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler } static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *resource, const struct vkd3d_shader_register_range *resource_range, + const struct vsir_operand *resource, const struct vsir_register_range *resource_range, enum vkd3d_shader_resource_type resource_type, enum vsir_data_type sampled_type, unsigned int structure_stride, bool raw, const struct vkd3d_spirv_resource_type *resource_type_info) { @@ -6687,14 +6720,11 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi continue; if (current->binding.count != 1) - { - FIXME("Descriptor arrays are not supported.\n"); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_DESCRIPTOR_BINDING, "Combined descriptor binding for resource %u, space %u, " "and sampler %u, space %u has unsupported ‘count’ %u.", resource_range->first, resource_range->space, current->sampler_index, current->sampler_space, current->binding.count); - } image_type_id = spirv_compiler_get_image_type_id(compiler, resource, resource_range, resource_type_info, sampled_type, structure_stride || raw); @@ -6732,7 +6762,7 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi } static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *compiler, - const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) + const struct vsir_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) { bool raw = descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER; enum vkd3d_shader_resource_type resource_type = descriptor->resource_type; @@ -6745,10 +6775,10 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp const struct vkd3d_spirv_resource_type *resource_type_info; unsigned int sample_count = descriptor->sample_count; struct vkd3d_symbol resource_symbol; - struct vkd3d_shader_register reg; enum vsir_data_type sampled_type; + struct vsir_operand reg; - vsir_register_init(®, is_uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, VSIR_DATA_F32, 1); + vsir_operand_init(®, is_uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, VSIR_DATA_F32, 1); reg.idx[0].offset = descriptor->register_id; if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && sample_count == 1) @@ -6861,9 +6891,8 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp spirv_compiler_put_symbol(compiler, &resource_symbol); } -static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, unsigned int alignment, unsigned int size, - unsigned int structure_stride, bool zero_init) +static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, const struct vsir_operand *reg, + unsigned int alignment, unsigned int size, unsigned int structure_stride, bool zero_init) { uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id, init_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -6871,11 +6900,8 @@ static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler struct vkd3d_symbol reg_symbol; if (zero_init && !(compiler->features & VKD3D_SHADER_COMPILE_OPTION_FEATURE_ZERO_INITIALIZE_WORKGROUP_MEMORY)) - { - WARN("Unsupported zero-initialized workgroup memory.\n"); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, "The target environment does not support zero-initialized workgroup memory."); - } /* Alignment is supported only in the Kernel execution model. */ if (alignment) @@ -7339,34 +7365,23 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); if (dst->reg.data_type == VSIR_DATA_F16 || dst->reg.data_type == VSIR_DATA_F32) - { val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->opcode == VSIR_OP_ITOF); - } else if (dst->reg.data_type == VSIR_DATA_F64) - { /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->opcode == VSIR_OP_ITOF); - } else if (dst->reg.data_type == VSIR_DATA_I16 || dst->reg.data_type == VSIR_DATA_I32 || dst->reg.data_type == VSIR_DATA_U16 || dst->reg.data_type == VSIR_DATA_U32) - { val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->opcode == VSIR_OP_ITOI); - } else if (dst->reg.data_type == VSIR_DATA_I64 || dst->reg.data_type == VSIR_DATA_U64) - { val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->opcode == VSIR_OP_ITOI); - } else - { - WARN("Unhandled data type %u.\n", dst->reg.data_type); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_TYPE, "Register data type %u is unhandled.", dst->reg.data_type); - } spirv_compiler_emit_store_dst(compiler, dst, val_id); } -static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compiler *compiler, +static void spirv_compiler_emit_alu_instruction(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -7380,10 +7395,9 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil if (src->reg.data_type == VSIR_DATA_U64 && instruction->opcode == VSIR_OP_COUNTBITS) { /* At least some drivers support this anyway, but if validation is enabled it will fail. */ - FIXME("Unsupported 64-bit source for bit count.\n"); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, "64-bit source for bit count is not supported."); - return VKD3D_ERROR_INVALID_SHADER; + return; } if (src->reg.data_type == VSIR_DATA_BOOL) @@ -7399,7 +7413,7 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil /* VSIR supports cast from bool to signed/unsigned integer types and floating point types, * where bool is treated as a 1-bit integer and a signed 'true' value converts to -1. */ spirv_compiler_emit_bool_cast(compiler, instruction); - return VKD3D_OK; + return; } } else @@ -7412,7 +7426,7 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_HANDLER, "Encountered invalid/unhandled instruction \"%s\" (%#x).", vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode); - return VKD3D_ERROR_INVALID_SHADER; + return; } VKD3D_ASSERT(instruction->dst_count == 1); @@ -7444,7 +7458,6 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil vkd3d_spirv_build_op_decorate(builder, val_id, SpvDecorationNoContraction, NULL, 0); spirv_compiler_emit_store_dst(compiler, dst, val_id); - return VKD3D_OK; } static void spirv_compiler_emit_saturate(struct spirv_compiler *compiler, @@ -7600,7 +7613,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, unsigned int i, component_count, write_mask; uint32_t components[VKD3D_VEC4_SIZE]; - if (register_is_constant_or_undef(&src->reg) || src->reg.type == VKD3DSPR_SSA || dst->reg.type == VKD3DSPR_SSA + if (vsir_operand_is_constant_or_undef(&src->reg) || src->reg.type == VKD3DSPR_SSA || dst->reg.type == VKD3DSPR_SSA || src->reg.type == VKD3DSPR_PARAMETER || dst->modifiers || src->modifiers) goto general_implementation; @@ -7808,7 +7821,7 @@ static void spirv_compiler_emit_ftoi(struct spirv_compiler *compiler, float_max_id = spirv_compiler_get_constant_float_vector(compiler, 2147483648.0f, component_count); } - src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, write_mask); + src_type_id = spirv_compiler_get_type_id_for_operand(compiler, &src->reg, write_mask); dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); src_id = spirv_compiler_emit_load_src(compiler, src, write_mask); val_id = vkd3d_spirv_build_op_glsl_std450_max(builder, src_type_id, src_id, int_min_id); @@ -7863,7 +7876,7 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler, float_max_id = spirv_compiler_get_constant_float_vector(compiler, 4294967296.0f, component_count); } - src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, write_mask); + src_type_id = spirv_compiler_get_type_id_for_operand(compiler, &src->reg, write_mask); dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); src_id = spirv_compiler_emit_load_src(compiler, src, write_mask); val_id = vkd3d_spirv_build_op_glsl_std450_max(builder, src_type_id, src_id, zero_id); @@ -8336,7 +8349,7 @@ static void spirv_compiler_emit_branch(struct spirv_compiler *compiler, const struct vsir_src_operand *src = instruction->src; uint32_t condition_id; - if (vsir_register_is_label(&src[0].reg)) + if (vsir_operand_is_label(&src[0].reg)) { if (instruction->src_count > 1) { @@ -8351,11 +8364,8 @@ static void spirv_compiler_emit_branch(struct spirv_compiler *compiler, } if (!vkd3d_swizzle_is_scalar(src->swizzle, &src->reg)) - { - WARN("Unexpected src swizzle %#x.\n", src->swizzle); spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE, "The swizzle for a branch condition value is not scalar."); - } condition_id = spirv_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_0); if (src[0].reg.data_type != VSIR_DATA_BOOL) @@ -8382,11 +8392,8 @@ static void spirv_compiler_emit_switch(struct spirv_compiler *compiler, uint32_t *cases; if (!vkd3d_swizzle_is_scalar(src[0].swizzle, &src[0].reg)) - { - WARN("Unexpected src swizzle %#x.\n", src[0].swizzle); spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE, "The swizzle for a switch value is not scalar."); - } word_count = instruction->src_count - 3; if (!(cases = vkd3d_calloc(word_count, sizeof(*cases)))) @@ -8487,7 +8494,7 @@ struct vkd3d_shader_image #define VKD3D_IMAGE_FLAG_SAMPLED 0x4 static const struct vkd3d_symbol *spirv_compiler_find_resource(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *resource_reg) + const struct vsir_operand *resource_reg) { struct vkd3d_symbol resource_key; struct rb_entry *entry; @@ -8499,7 +8506,7 @@ static const struct vkd3d_symbol *spirv_compiler_find_resource(struct spirv_comp } static const struct vkd3d_symbol *spirv_compiler_find_combined_sampler(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *resource_reg, const struct vkd3d_shader_register *sampler_reg) + const struct vsir_operand *resource_reg, const struct vsir_operand *sampler_reg) { const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; unsigned int sampler_space, sampler_index; @@ -8532,9 +8539,8 @@ static const struct vkd3d_symbol *spirv_compiler_find_combined_sampler(struct sp return NULL; } -static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, - struct vkd3d_shader_image *image, const struct vkd3d_shader_register *resource_reg, - const struct vkd3d_shader_register *sampler_reg, unsigned int flags) +static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, struct vkd3d_shader_image *image, + const struct vsir_operand *resource_reg, const struct vsir_operand *sampler_reg, unsigned int flags) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t sampler_var_id, sampler_id, sampled_image_type_id; @@ -9897,11 +9903,8 @@ static void spirv_compiler_emit_sync(struct spirv_compiler *compiler, bool global_uav = flags & VKD3DSSF_GLOBAL_UAV; if (group_uav && global_uav) - { - WARN("Invalid UAV sync flag combination; assuming global.\n"); spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_INVALID_UAV_FLAGS, "The flags for a UAV sync instruction are contradictory; assuming global sync."); - } memory_scope = global_uav ? SpvScopeDevice : SpvScopeWorkgroup; memory_semantics |= SpvMemorySemanticsUniformMemoryMask | SpvMemorySemanticsImageMemoryMask; flags &= ~(VKD3DSSF_THREAD_GROUP_UAV | VKD3DSSF_GLOBAL_UAV); @@ -10005,7 +10008,7 @@ static void spirv_compiler_emit_quad_read_lane_at(struct spirv_compiler *compile const struct vsir_dst_operand *dst = instruction->dst; uint32_t type_id, val_id, lane_id; - if (!register_is_constant_or_undef(&src[1].reg)) + if (!vsir_operand_is_constant_or_undef(&src[1].reg)) { spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, "Non-constant quad read lane indices are not supported."); @@ -10174,7 +10177,7 @@ static void spirv_compiler_emit_wave_read_lane_at(struct spirv_compiler *compile lane_id = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); /* TODO: detect values loaded from a const buffer? */ - if (register_is_constant_or_undef(&src[1].reg)) + if (vsir_operand_is_constant_or_undef(&src[1].reg)) { /* Uniform lane_id only. */ val_id = vkd3d_spirv_build_op_group_nonuniform_broadcast(builder, type_id, val_id, lane_id); @@ -10217,11 +10220,9 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) vkd3d_spirv_build_op(&compiler->spirv_builder.function_stream, SpvOpBeginInvocationInterlockEXT); } -static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +static void spirv_compiler_handle_instruction(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { - int ret = VKD3D_OK; - compiler->location = instruction->location; /* radeonsi from Mesa 20.3.5 seems to get confused by OpLine instructions * before OpFunction, seemingly causing it to fail to find the entry @@ -10313,7 +10314,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VSIR_OP_UTOF: case VSIR_OP_UTOU: case VSIR_OP_XOR: - ret = spirv_compiler_emit_alu_instruction(compiler, instruction); + spirv_compiler_emit_alu_instruction(compiler, instruction); break; case VSIR_OP_ISFINITE: spirv_compiler_emit_isfinite(compiler, instruction); @@ -10590,8 +10591,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode); break; } - - return ret; } static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) @@ -10653,7 +10652,7 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c for (i = 0; i < descriptors->descriptor_count; ++i) { const struct vkd3d_shader_descriptor_info1 *descriptor = &descriptors->descriptors[i]; - struct vkd3d_shader_register_range range; + struct vsir_register_range range; range.first = descriptor->register_index; if (descriptor->count == ~0u) @@ -10689,8 +10688,8 @@ static void spirv_compiler_emit_immediate_constant_buffers(struct spirv_compiler const struct vkd3d_shader_immediate_constant_buffer *icb; const struct vsir_program *program = compiler->program; uint32_t type_id, const_id, ptr_type_id, icb_id; - struct vkd3d_shader_register reg; struct vkd3d_symbol reg_symbol; + struct vsir_operand reg; size_t i; for (i = 0; i < program->icb_count; ++i) @@ -10704,7 +10703,7 @@ static void spirv_compiler_emit_immediate_constant_buffers(struct spirv_compiler vkd3d_spirv_build_op_name(builder, icb_id, "icb%zu", icb->register_idx); /* Set an index count of 2 so vkd3d_symbol_make_register() uses idx[0] as a buffer id. */ - vsir_register_init(®, VKD3DSPR_IMMCONSTBUFFER, VSIR_DATA_F32, 2); + vsir_operand_init(®, VKD3DSPR_IMMCONSTBUFFER, VSIR_DATA_F32, 2); reg.idx[0].offset = icb->register_idx; vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, icb_id, SpvStorageClassPrivate, @@ -10722,7 +10721,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct vsir_program *program = compiler->program; enum vkd3d_shader_spirv_environment environment; struct vkd3d_shader_instruction *ins; - enum vkd3d_result result = VKD3D_OK; unsigned int i, max_element_count; struct vsir_program_iterator it; @@ -10791,14 +10789,11 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, spirv_compiler_emit_tessellator_domain(compiler, program->tess_domain); it = vsir_program_iterator(&program->instructions); - for (ins = vsir_program_iterator_head(&it); ins && result >= 0; ins = vsir_program_iterator_next(&it)) + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) { - result = spirv_compiler_handle_instruction(compiler, ins); + spirv_compiler_handle_instruction(compiler, ins); } - if (result < 0) - return result; - if (!is_in_default_phase(compiler)) spirv_compiler_leave_shader_phase(compiler); else @@ -10848,7 +10843,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, if (!vkd3d_spirv_validate(&buffer, spirv, environment)) { FIXME("Failed to validate SPIR-V binary.\n"); - vkd3d_shader_trace_text(buffer.buffer, buffer.content_size); + vkd3d_string_buffer_trace(&buffer); if (compiler->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION) { diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index 003a6635aaf..c50f6ebcbf5 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -84,7 +84,7 @@ STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); #define VKD3D_SM4_INTERPOLATION_MODE_MASK (0xfu << VKD3D_SM4_INTERPOLATION_MODE_SHIFT) #define VKD3D_SM4_GLOBAL_FLAGS_SHIFT 11 -#define VKD3D_SM4_GLOBAL_FLAGS_MASK (0xffu << VKD3D_SM4_GLOBAL_FLAGS_SHIFT) +#define VKD3D_SM4_GLOBAL_FLAGS_MASK (0x1ffu << VKD3D_SM4_GLOBAL_FLAGS_SHIFT) #define VKD3D_SM5_PRECISE_SHIFT 19 #define VKD3D_SM5_PRECISE_MASK (0xfu << VKD3D_SM5_PRECISE_SHIFT) @@ -837,7 +837,6 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui if (!(icb = vkd3d_malloc(offsetof(struct vkd3d_shader_immediate_constant_buffer, data[icb_size])))) { - ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); ins->opcode = VSIR_OP_INVALID; return; @@ -853,16 +852,13 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui } static void shader_sm4_set_descriptor_register_range(struct vkd3d_shader_sm4_parser *sm4, - const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_range *range) + const struct vsir_operand *reg, struct vsir_register_range *range) { range->first = reg->idx[1].offset; range->last = reg->idx[shader_is_sm_5_1(sm4) ? 2 : 1].offset; if (range->last < range->first) - { - FIXME("Invalid register range [%u:%u].\n", range->first, range->last); vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE, "Last register %u must not be less than first register %u in range.", range->last, range->first); - } } static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1015,7 +1011,6 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins break; default: - WARN("Unhandled register type %#x.\n", type); vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, "Invalid register type %#x for index range base %u, count %u, mask %#x.", type, register_idx, register_count, write_mask); @@ -1035,8 +1030,6 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins if ((r.index <= register_idx && register_idx - r.index < r.count) || (register_idx < r.index && r.index - register_idx < register_count)) { - WARN("Detected index range collision for base %u, count %u, mask %#x.\n", - register_idx, register_count, write_mask); vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, "Register index range base %u, count %u, mask %#x collides with a previous declaration.", register_idx, register_count, write_mask); @@ -1055,7 +1048,6 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins if (e && e->sysval_semantic && register_count > 1 && !vsir_sysval_semantic_is_tess_factor(e->sysval_semantic) && !vsir_sysval_semantic_is_clip_cull(e->sysval_semantic)) { - WARN("Sysval %u included in an index range declaration.\n", e->sysval_semantic); vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, "Index range base %u, count %u, mask %#x contains sysval %u.", register_idx, register_count, write_mask, e->sysval_semantic); @@ -1064,8 +1056,6 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins if ((io_masks[register_idx + i] & write_mask) != write_mask && (io_masks[register_idx + i] & write_mask) != 0) { - WARN("No matching declaration for index range base %u, count %u, mask %#x.\n", - register_idx, register_count, write_mask); vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, "Input/output registers matching index range base %u, count %u, mask %#x were not declared.", register_idx, register_count, write_mask); @@ -1204,7 +1194,7 @@ static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *in if (sm4->program->global_flags & (VKD3DSGF_ENABLE_DOUBLE_PRECISION_FLOAT_OPS | VKD3DSGF_ENABLE_11_1_DOUBLE_EXTENSIONS)) - sm4->program->f64_denorm_mode = VSIR_DENORM_PRESERVE; + sm4->program->f64_denormal_mode = VKD3D_SHADER_DENORMAL_MODE_PRESERVE; } static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, @@ -1410,13 +1400,15 @@ struct tpf_compiler struct dxbc_writer dxbc; }; -static void VKD3D_PRINTF_FUNC(3, 4) tpf_compiler_error(struct tpf_compiler *tpf, - enum vkd3d_shader_error error, const char *fmt, ...) +#define tpf_compiler_error(tpf, error, ...) \ + tpf_compiler_error_(tpf, error, __FUNCTION__, __VA_ARGS__) +static void VKD3D_PRINTF_FUNC(4, 5) tpf_compiler_error_(struct tpf_compiler *tpf, + enum vkd3d_shader_error error, const char *function, const char *fmt, ...) { va_list args; va_start(args, fmt); - vkd3d_shader_verror(tpf->message_context, &tpf->location, error, fmt, args); + vkd3d_shader_verror(tpf->message_context, &tpf->location, error, function, fmt, args); va_end(args); if (!tpf->first_error) @@ -2004,7 +1996,7 @@ static enum vsir_data_type map_data_type(char t) } static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *tpf, const uint32_t **ptr, - const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) + const uint32_t *end, uint32_t addressing, struct vsir_register_index *reg_idx) { if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) { @@ -2032,7 +2024,7 @@ static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *tpf, const u } static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, - enum vsir_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) + enum vsir_data_type data_type, struct vsir_operand *param, enum vkd3d_shader_src_modifier *modifier) { const struct vkd3d_sm4_register_type_info *register_type_info; enum vkd3d_shader_register_type vsir_register_type; @@ -2064,7 +2056,7 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui order = (token & VKD3D_SM4_REGISTER_ORDER_MASK) >> VKD3D_SM4_REGISTER_ORDER_SHIFT; - vsir_register_init(param, vsir_register_type, data_type, order); + vsir_operand_init(param, vsir_register_type, data_type, order); param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; param->non_uniform = false; @@ -2211,13 +2203,10 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui else if (register_type == VKD3D_SM4_RT_IMMCONSTBUFFER) { if (param->idx_count != 1) - { - WARN("Unexpected idx count %u.\n", param->idx_count); vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_INDEX_COUNT, "Invalid index count %u for immediate const buffer register; expected count 1.", param->idx_count); - } } - else if (!shader_is_sm_5_1(priv) && vsir_register_is_descriptor(param)) + else if (!shader_is_sm_5_1(priv) && vsir_operand_is_descriptor(param)) { /* SM5.1 places a symbol identifier in idx[0] and moves * other values up one slot. Normalize to SM5.1. */ @@ -2229,7 +2218,7 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui return true; } -bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) +bool shader_sm4_is_scalar_register(const struct vsir_operand *reg) { switch (reg->type) { @@ -2261,7 +2250,7 @@ static uint32_t swizzle_to_sm4(uint32_t s) return ret; } -static bool register_is_input_output(const struct vkd3d_shader_register *reg) +static bool register_is_input_output(const struct vsir_operand *reg) { switch (reg->type) { @@ -2278,7 +2267,7 @@ static bool register_is_input_output(const struct vkd3d_shader_register *reg) } } -static bool register_is_control_point_input(const struct vkd3d_shader_register *reg, +static bool register_is_control_point_input(const struct vsir_operand *reg, const struct vkd3d_shader_sm4_parser *priv) { return reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT @@ -2295,7 +2284,7 @@ static uint32_t mask_from_swizzle(uint32_t swizzle) } static bool shader_sm4_validate_input_output_register(struct vkd3d_shader_sm4_parser *priv, - const struct vkd3d_shader_register *reg, unsigned int mask) + const struct vsir_operand *reg, unsigned int mask) { unsigned int idx_count = 1 + register_is_control_point_input(reg, priv); const unsigned int *masks; @@ -2335,8 +2324,6 @@ static bool shader_sm4_validate_input_output_register(struct vkd3d_shader_sm4_pa /* The signature element registers have already been checked against MAX_REG_OUTPUT. */ if (register_idx >= MAX_REG_OUTPUT || (masks[register_idx] & mask) != mask) { - WARN("Failed to find signature element for register type %#x, index %u and mask %#x.\n", - reg->type, register_idx, mask); vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER, "Could not find signature element matching register type %#x, index %u and mask %#x.", reg->type, register_idx, mask); @@ -2384,7 +2371,7 @@ static bool tpf_read_src_operand(struct vkd3d_shader_sm4_parser *tpf, const uint mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; /* Mask seems only to be used for vec4 constants and is always zero. */ - if (!register_is_constant(&src->reg)) + if (!vsir_operand_is_constant(&src->reg)) vkd3d_shader_parser_warning(&tpf->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK, "Unhandled mask %#x for a non-constant source register.", mask); else if (mask) @@ -2799,7 +2786,7 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) return false; - program->f32_denorm_mode = VSIR_DENORM_FLUSH_TO_ZERO; + program->f32_denormal_mode = VKD3D_SHADER_DENORMAL_MODE_FLUSH_TO_ZERO; vkd3d_shader_parser_init(&sm4->p, message_context, compile_info->source_name); sm4->ptr = sm4->start; @@ -2831,8 +2818,6 @@ static bool shader_sm4_parser_validate_signature(struct vkd3d_shader_sm4_parser register_count = signature->elements[i].register_count; if (register_idx != ~0u && (register_idx >= MAX_REG_OUTPUT || MAX_REG_OUTPUT - register_idx < register_count)) { - WARN("%s signature element %u unhandled register index %u, count %u.\n", - name, i, register_idx, register_count); vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS, "%s signature element %u register index %u, count %u exceeds maximum index of %u.", name, i, register_idx, register_count, MAX_REG_OUTPUT - 1); @@ -2840,11 +2825,8 @@ static bool shader_sm4_parser_validate_signature(struct vkd3d_shader_sm4_parser } if (!vkd3d_bitmask_is_contiguous(mask = signature->elements[i].mask)) - { - WARN("%s signature element %u mask %#x is not contiguous.\n", name, i, mask); vkd3d_shader_parser_warning(&sm4->p, VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS, "%s signature element %u mask %#x is not contiguous.", name, i, mask); - } if (register_idx != ~0u) masks[register_idx] |= mask; @@ -2875,7 +2857,6 @@ static void shader_sm4_validate_default_phase_index_ranges(struct vkd3d_shader_s } /* This is very unlikely to occur and would complicate the default control point phase implementation. */ - WARN("Default phase index ranges are not identical.\n"); vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, "Default control point phase input and output index range declarations are not identical."); return; @@ -3010,6 +2991,7 @@ bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_OUTPUT, true}, {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_SAMPLEMASK, false}, + {"sv_stencilref", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_OUTSTENCILREF, false}, }; for (i = 0; i < ARRAY_SIZE(register_table); ++i) @@ -3077,10 +3059,12 @@ static bool get_insidetessfactor_sysval_semantic(enum vkd3d_shader_sysval_semant } bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, - const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, - const char *semantic_name, unsigned int semantic_idx, bool output, - bool is_patch_constant_func, bool is_primitive) + const struct vkd3d_shader_version *version, + enum vkd3d_shader_compile_option_backward_compatibility compatibility_flags, + enum vkd3d_tessellator_domain domain, const char *semantic_name, + unsigned int semantic_idx, bool output, bool is_patch_constant_func, bool is_primitive) { + bool semantic_compat_mapping = (compatibility_flags & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES); unsigned int i; static const struct @@ -3097,53 +3081,72 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s {"sv_groupindex", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + {"sv_clipdistance", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_NONE}, + {"sv_culldistance", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_NONE}, {"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, ~0u}, {"sv_position", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_NONE}, {"sv_primitiveid", false, VKD3D_SHADER_TYPE_DOMAIN, ~0u}, {"sv_rendertargetarrayindex", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_NONE}, {"sv_viewportarrayindex", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_NONE}, + {"sv_clipdistance", true, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_CLIP_DISTANCE}, + {"sv_culldistance", true, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_CULL_DISTANCE}, {"sv_position", true, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_POSITION}, {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, + {"sv_clipdistance", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_CLIP_DISTANCE}, + {"sv_culldistance", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_CULL_DISTANCE}, {"sv_gsinstanceid", false, VKD3D_SHADER_TYPE_GEOMETRY, ~0u}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_clipdistance", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_CLIP_DISTANCE}, + {"sv_culldistance", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_CULL_DISTANCE}, + {"sv_isfrontface", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_IS_FRONT_FACE}, {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, - {"sv_isfrontface", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_IS_FRONT_FACE}, {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, + {"sv_clipdistance", false, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_CLIP_DISTANCE}, + {"sv_culldistance", false, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_CULL_DISTANCE}, {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, ~0u}, {"sv_position", false, VKD3D_SHADER_TYPE_HULL, ~0u}, {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, ~0u}, + {"sv_clipdistance", true, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_CLIP_DISTANCE}, + {"sv_culldistance", true, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_CULL_DISTANCE}, {"sv_position", true, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_POSITION}, {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, {"position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, + {"sv_clipdistance", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_CLIP_DISTANCE}, + {"sv_culldistance", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_CULL_DISTANCE}, + {"sv_isfrontface", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_IS_FRONT_FACE}, {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, {"sv_primitiveid", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_PRIMITIVE_ID}, - {"sv_isfrontface", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_IS_FRONT_FACE}, {"sv_rendertargetarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, - {"sv_viewportarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, {"sv_sampleindex", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_SAMPLE_INDEX}, + {"sv_viewportarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, + {"vface", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_IS_FRONT_FACE}, + {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_TARGET}, {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_DEPTH}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_TARGET}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_DEPTH}, {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_COVERAGE}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_DEPTH}, + {"sv_stencilref", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_STENCIL_REF}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_TARGET}, + {"sv_instanceid", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_INSTANCE_ID}, {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_NONE}, {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_VERTEX_ID}, - {"sv_instanceid", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_INSTANCE_ID}, {"position", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_POSITION}, + {"sv_clipdistance", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_CLIP_DISTANCE}, + {"sv_culldistance", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_CULL_DISTANCE}, {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_POSITION}, {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, @@ -3161,6 +3164,10 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s *sysval_semantic = VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX; else if (!ascii_strcasecmp(semantic_name, "sv_viewportarrayindex")) *sysval_semantic = VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX; + else if (!ascii_strcasecmp(semantic_name, "sv_clipdistance") && semantic_idx <= 1) + *sysval_semantic = VKD3D_SHADER_SV_CLIP_DISTANCE; + else if (!ascii_strcasecmp(semantic_name, "sv_culldistance") && semantic_idx <= 1) + *sysval_semantic = VKD3D_SHADER_SV_CULL_DISTANCE; else if (has_sv_prefix) return false; else @@ -3206,6 +3213,8 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s && (semantic_compat_mapping || has_sv_prefix) && version->type == semantics[i].shader_type) { + if (vsir_sysval_semantic_is_clip_cull(semantics[i].semantic) && semantic_idx > 1) + return false; if (is_patch_constant_func && output && semantics[i].semantic != ~0u) *sysval_semantic = VKD3D_SHADER_SV_NONE; else @@ -3417,8 +3426,7 @@ struct sm4_instruction unsigned int idx_src_count; }; -static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_register *reg, - unsigned int i) +static unsigned int sm4_get_index_addressing_from_reg(const struct vsir_operand *reg, unsigned int i) { if (reg->idx[i].rel_addr) { @@ -3431,7 +3439,7 @@ static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_ return 0; } -static uint32_t sm4_encode_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, +static uint32_t sm4_encode_register(const struct tpf_compiler *tpf, const struct vsir_operand *reg, enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle) { const struct vkd3d_sm4_register_type_info *register_type_info; @@ -3471,7 +3479,7 @@ static uint32_t sm4_encode_register(const struct tpf_compiler *tpf, const struct switch (sm4_swizzle_type) { case VKD3D_SM4_SWIZZLE_NONE: - if (register_is_constant(reg)) + if (vsir_operand_is_constant(reg)) break; VKD3D_ASSERT(sm4_swizzle); token |= (sm4_swizzle << VKD3D_SM4_WRITEMASK_SHIFT) & VKD3D_SM4_WRITEMASK_MASK; @@ -3493,11 +3501,10 @@ static uint32_t sm4_encode_register(const struct tpf_compiler *tpf, const struct return token; } -static void sm4_write_register_index(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, - unsigned int j) +static void sm4_write_register_index(const struct tpf_compiler *tpf, const struct vsir_operand *reg, unsigned int j) { unsigned int addressing = sm4_get_index_addressing_from_reg(reg, j); - const struct vkd3d_shader_register_index *idx = ®->idx[j]; + const struct vsir_register_index *idx = ®->idx[j]; struct vkd3d_bytecode_buffer *buffer = tpf->buffer; unsigned int k; @@ -3944,7 +3951,7 @@ static void write_sm4_dcl_global_flags(struct tpf_compiler *tpf, uint32_t flags) struct sm4_instruction instr = { .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, - .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT, + .extra_bits = (flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT) & VKD3D_SM4_GLOBAL_FLAGS_MASK, }; write_sm4_instruction(tpf, &instr); @@ -4070,7 +4077,7 @@ static void tpf_write_dcl_vertices_out(struct tpf_compiler *tpf, unsigned int co /* Descriptor registers are stored in shader model 5.1 format regardless * of the program's version. Convert them to the 4.0 format if necessary. */ -static void rewrite_descriptor_register(const struct tpf_compiler *tpf, struct vkd3d_shader_register *reg) +static void rewrite_descriptor_register(const struct tpf_compiler *tpf, struct vsir_operand *reg) { if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 1)) return; @@ -4428,7 +4435,7 @@ static void tpf_write_shdr(struct tpf_compiler *tpf) put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type])); token_count_position = put_u32(&buffer, 0); - if (program->global_flags) + if (program->global_flags & (VKD3D_SM4_GLOBAL_FLAGS_MASK >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT)) write_sm4_dcl_global_flags(tpf, program->global_flags); if (version->type == VKD3D_SHADER_TYPE_HULL) @@ -4473,6 +4480,8 @@ static void tpf_write_sfi0(struct tpf_compiler *tpf) if (tpf->program->global_flags & VKD3DSGF_ENABLE_MINIMUM_PRECISION) *flags |= DXBC_SFI0_REQUIRES_MINIMUM_PRECISION; + if (tpf->program->global_flags & VKD3DSGF_ENABLE_STENCIL_REF) + *flags |= DXBC_SFI0_REQUIRES_STENCIL_REF; /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index ddbf91ac482..bf4ab9a88f7 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -23,6 +23,20 @@ #include <stdio.h> #include <math.h> +float vkd3d_parse_float(const char *s, vkd3d_locale l) +{ +#ifdef HAVE_STRTOF_L + return strtof_l(s, NULL, l); +#elif HAVE__STRTOF_L + return _strtof_l(s, NULL, l); +#elif HAVE__STRTOD_L + return _strtod_l(s, NULL, l); +#else +#warning "Neither strtof_l() no strtod_l() is available, using strtof()." + return strtof(s, NULL); +#endif +} + static inline int char_to_int(char c) { if ('0' <= c && c <= '9') @@ -164,6 +178,27 @@ int vkd3d_string_buffer_printf(struct vkd3d_string_buffer *buffer, const char *f return ret; } +int vkd3d_string_buffer_print_f16(struct vkd3d_string_buffer *buffer, uint16_t f) +{ + size_t idx = buffer->content_size + 1; + union + { + uint32_t u32; + float f32; + } v; + int ret; + + v.u32 = vkd3d_f32_from_f16(f); + if (!(ret = vkd3d_string_buffer_printf(buffer, "%.4e", v.f32)) && isfinite(v.f32)) + { + if (signbit(v.f32)) + ++idx; + buffer->buffer[idx] = '.'; + } + + return ret; +} + int vkd3d_string_buffer_print_f32(struct vkd3d_string_buffer *buffer, float f) { size_t idx = buffer->content_size + 1; @@ -250,24 +285,8 @@ fail: void vkd3d_string_buffer_trace_(const struct vkd3d_string_buffer *buffer, const char *function) { - vkd3d_shader_trace_text_(buffer->buffer, buffer->content_size, function); -} - -void vkd3d_shader_trace_text_(const char *text, size_t size, const char *function) -{ - const char *p, *q, *end = text + size; - - if (!TRACE_ON()) - return; - - for (p = text; p < end; p = q) - { - if (!(q = memchr(p, '\n', end - p))) - q = end; - else - ++q; - vkd3d_dbg_printf(VKD3D_DEBUG_ENV_NAME, VKD3D_DBG_LEVEL_TRACE, function, "%.*s", (int)(q - p), p); - } + vkd3d_debug_channel_print_text(vkd3d_debug_channel_default, VKD3D_DEBUG_ENV_NAME, + VKD3D_DEBUG_CLASS_TRACE, function, buffer->buffer, buffer->content_size); } void vkd3d_string_buffer_cache_init(struct vkd3d_string_buffer_cache *cache) @@ -366,7 +385,7 @@ void vkd3d_shader_string_from_message_context(char **out, struct vkd3d_shader_me } void vkd3d_shader_vnote(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, - enum vkd3d_shader_log_level level, const char *format, va_list args) + enum vkd3d_shader_log_level level, const char *function, const char *format, va_list args) { struct vkd3d_string_buffer *messages = &context->messages; size_t pos = messages->content_size; @@ -387,13 +406,14 @@ void vkd3d_shader_vnote(struct vkd3d_shader_message_context *context, const stru vkd3d_string_buffer_vprintf(messages, format, args); vkd3d_string_buffer_printf(messages, "\n"); - WARN("%.*s", (int)(messages->content_size - pos), &messages->buffer[pos]); + vkd3d_debug_channel_printf(vkd3d_debug_channel_default, VKD3D_DEBUG_ENV_NAME, VKD3D_DEBUG_CLASS_WARN, + function, "%.*s", (int)(messages->content_size - pos), &messages->buffer[pos]); if (context->log_level < level) messages->content_size = pos; } void vkd3d_shader_vwarning(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, - enum vkd3d_shader_error error, const char *format, va_list args) + enum vkd3d_shader_error error, const char *function, const char *format, va_list args) { struct vkd3d_string_buffer *messages = &context->messages; size_t pos = messages->content_size; @@ -418,23 +438,24 @@ void vkd3d_shader_vwarning(struct vkd3d_shader_message_context *context, const s vkd3d_string_buffer_vprintf(messages, format, args); vkd3d_string_buffer_printf(messages, "\n"); - WARN("%.*s", (int)(messages->content_size - pos), &messages->buffer[pos]); + vkd3d_debug_channel_printf(vkd3d_debug_channel_default, VKD3D_DEBUG_ENV_NAME, VKD3D_DEBUG_CLASS_WARN, + function, "%.*s", (int)(messages->content_size - pos), &messages->buffer[pos]); if (context->log_level < VKD3D_SHADER_LOG_WARNING) messages->content_size = pos; } -void vkd3d_shader_warning(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, - enum vkd3d_shader_error error, const char *format, ...) +void vkd3d_shader_warning_(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, + enum vkd3d_shader_error error, const char *function, const char *format, ...) { va_list args; va_start(args, format); - vkd3d_shader_vwarning(context, location, error, format, args); + vkd3d_shader_vwarning(context, location, error, function, format, args); va_end(args); } void vkd3d_shader_verror(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, - enum vkd3d_shader_error error, const char *format, va_list args) + enum vkd3d_shader_error error, const char *function, const char *format, va_list args) { struct vkd3d_string_buffer *messages = &context->messages; size_t pos = messages->content_size; @@ -459,18 +480,19 @@ void vkd3d_shader_verror(struct vkd3d_shader_message_context *context, const str vkd3d_string_buffer_vprintf(messages, format, args); vkd3d_string_buffer_printf(messages, "\n"); - WARN("%.*s", (int)(messages->content_size - pos), &messages->buffer[pos]); + vkd3d_debug_channel_printf(vkd3d_debug_channel_default, VKD3D_DEBUG_ENV_NAME, VKD3D_DEBUG_CLASS_WARN, + function, "%.*s", (int)(messages->content_size - pos), &messages->buffer[pos]); if (context->log_level < VKD3D_SHADER_LOG_ERROR) messages->content_size = pos; } -void vkd3d_shader_error(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, - enum vkd3d_shader_error error, const char *format, ...) +void vkd3d_shader_error_(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, + enum vkd3d_shader_error error, const char *function, const char *format, ...) { va_list args; va_start(args, format); - vkd3d_shader_verror(context, location, error, format, args); + vkd3d_shader_verror(context, location, error, function, format, args); va_end(args); } @@ -760,13 +782,13 @@ void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, parser->status = VKD3D_OK; } -void VKD3D_PRINTF_FUNC(3, 4) vkd3d_shader_parser_error(struct vkd3d_shader_parser *parser, - enum vkd3d_shader_error error, const char *format, ...) +void VKD3D_PRINTF_FUNC(4, 5) vkd3d_shader_parser_error_(struct vkd3d_shader_parser *parser, + enum vkd3d_shader_error error, const char *function, const char *format, ...) { va_list args; va_start(args, format); - vkd3d_shader_verror(parser->message_context, &parser->location, error, format, args); + vkd3d_shader_verror(parser->message_context, &parser->location, error, function, format, args); va_end(args); if (parser->status >= 0) @@ -797,13 +819,13 @@ void VKD3D_PRINTF_FUNC(3, 4) vkd3d_shader_parser_error(struct vkd3d_shader_parse } } -void VKD3D_PRINTF_FUNC(3, 4) vkd3d_shader_parser_warning(struct vkd3d_shader_parser *parser, - enum vkd3d_shader_error error, const char *format, ...) +void VKD3D_PRINTF_FUNC(4, 5) vkd3d_shader_parser_warning_(struct vkd3d_shader_parser *parser, + enum vkd3d_shader_error error, const char *function, const char *format, ...) { va_list args; va_start(args, format); - vkd3d_shader_vwarning(parser->message_context, &parser->location, error, format, args); + vkd3d_shader_vwarning(parser->message_context, &parser->location, error, function, format, args); va_end(args); } @@ -854,9 +876,19 @@ static enum vkd3d_result vsir_parse(const struct vkd3d_shader_compile_info *comp const struct shader_dump_data *dump_data, struct vkd3d_shader_message_context *message_context, struct vsir_program *program, struct vkd3d_shader_code *reflection_data) { + enum vkd3d_shader_api_version api_version = VKD3D_SHADER_API_VERSION_1_2; struct vkd3d_shader_compile_info preprocessed_info; struct vkd3d_shader_code preprocessed; enum vkd3d_result ret; + unsigned int i; + + for (i = 0; i < compile_info->option_count; ++i) + { + const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; + + if (option->name == VKD3D_SHADER_COMPILE_OPTION_API_VERSION) + api_version = option->value; + } switch (compile_info->source_type) { @@ -897,6 +929,36 @@ static enum vkd3d_result vsir_parse(const struct vkd3d_shader_compile_info *comp return ret; } + if (api_version <= VKD3D_SHADER_API_VERSION_1_19) + { + program->f16_denormal_mode = VKD3D_SHADER_DENORMAL_MODE_ANY; + program->f32_denormal_mode = VKD3D_SHADER_DENORMAL_MODE_ANY; + program->f64_denormal_mode = VKD3D_SHADER_DENORMAL_MODE_ANY; + } + + for (i = 0; i < compile_info->option_count; ++i) + { + const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; + + switch (option->name) + { + case VKD3D_SHADER_COMPILE_OPTION_DENORMAL_MODE_F16: + program->f16_denormal_mode = option->value; + break; + + case VKD3D_SHADER_COMPILE_OPTION_DENORMAL_MODE_F32: + program->f32_denormal_mode = option->value; + break; + + case VKD3D_SHADER_COMPILE_OPTION_DENORMAL_MODE_F64: + program->f64_denormal_mode = option->value; + break; + + default: + break; + } + } + if ((ret = vsir_program_validate(program, config_flags, compile_info->source_name, message_context)) < 0) { WARN("Failed to validate shader after parsing, ret %d.\n", ret); @@ -1009,23 +1071,27 @@ struct vkd3d_shader_scan_context enum vkd3d_shader_tessellator_partitioning partitioning; }; -static VKD3D_PRINTF_FUNC(3, 4) void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, - enum vkd3d_shader_error error, const char *format, ...) +#define vkd3d_shader_scan_error(context, error, ...) \ + vkd3d_shader_scan_error_(context, error, __FUNCTION__, __VA_ARGS__) +static VKD3D_PRINTF_FUNC(4, 5) void vkd3d_shader_scan_error_(struct vkd3d_shader_scan_context *context, + enum vkd3d_shader_error error, const char *function, const char *format, ...) { va_list args; va_start(args, format); - vkd3d_shader_verror(context->message_context, &context->location, error, format, args); + vkd3d_shader_verror(context->message_context, &context->location, error, function, format, args); va_end(args); } -static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_shader_scan_warning(struct vkd3d_shader_scan_context *context, - enum vkd3d_shader_error error, const char *format, ...) +#define vkd3d_shader_scan_warning(context, error, ...) \ + vkd3d_shader_scan_warning_(context, error, __FUNCTION__, __VA_ARGS__) +static void VKD3D_PRINTF_FUNC(4, 5) vkd3d_shader_scan_warning_(struct vkd3d_shader_scan_context *context, + enum vkd3d_shader_error error, const char *function, const char *format, ...) { va_list args; va_start(args, format); - vkd3d_shader_vwarning(context->message_context, &context->location, error, format, args); + vkd3d_shader_vwarning(context->message_context, &context->location, error, function, format, args); va_end(args); } @@ -1128,7 +1194,7 @@ static struct vkd3d_shader_cf_info *vkd3d_shader_scan_find_innermost_loop_cf_inf } static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_register *reg, uint32_t flag) + const struct vsir_operand *reg, uint32_t flag) { unsigned int range_id = reg->idx[0].offset; unsigned int i; @@ -1159,7 +1225,7 @@ static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instr } static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_register *reg) + const struct vsir_operand *reg) { vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); } @@ -1172,7 +1238,7 @@ static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_in } static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_register *reg) + const struct vsir_operand *reg) { vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER); } @@ -1186,14 +1252,14 @@ static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_ } static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_register *reg) + const struct vsir_operand *reg) { vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS); } static struct vkd3d_shader_descriptor_info1 *vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, - enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register *reg, - const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, + enum vkd3d_shader_descriptor_type type, const struct vsir_operand *reg, + const struct vsir_register_range *range, enum vkd3d_shader_resource_type resource_type, enum vsir_data_type resource_data_type) { struct vkd3d_shader_scan_descriptor_info1 *info = context->scan_descriptor_info; @@ -1265,7 +1331,7 @@ const struct vkd3d_shader_descriptor_info1 *vkd3d_shader_find_descriptor( } static void vkd3d_shader_scan_combined_sampler_usage(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_register *resource, const struct vkd3d_shader_register *sampler) + const struct vsir_operand *resource, const struct vsir_operand *sampler) { struct vkd3d_shader_scan_combined_resource_sampler_info *info; struct vkd3d_shader_combined_resource_sampler_info *s; @@ -1376,7 +1442,7 @@ static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_sca static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_instruction *instruction) { - const struct vkd3d_shader_register *sampler_reg; + const struct vsir_operand *sampler_reg; struct vkd3d_shader_cf_info *cf_info; unsigned int i; @@ -1702,6 +1768,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; struct vkd3d_shader_scan_hull_shader_tessellation_info *tessellation_info; struct vkd3d_shader_scan_thread_group_size_info *thread_group_size_info; + struct vkd3d_shader_scan_denormal_mode_info *denormal_mode_info; struct vkd3d_shader_scan_descriptor_info *descriptor_info; struct vkd3d_shader_scan_signature_info *signature_info; struct vkd3d_shader_scan_context context; @@ -1726,6 +1793,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh tessellation_info = vkd3d_find_struct(compile_info->next, SCAN_HULL_SHADER_TESSELLATION_INFO); thread_group_size_info = vkd3d_find_struct(compile_info->next, SCAN_THREAD_GROUP_SIZE_INFO); + denormal_mode_info = vkd3d_find_struct(compile_info->next, SCAN_DENORMAL_MODE_INFO); vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, add_descriptor_info ? &program->descriptors : NULL, combined_sampler_info, message_context); @@ -1770,6 +1838,13 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh thread_group_size_info->z = program->thread_group_size.z; } + if (!ret && denormal_mode_info) + { + denormal_mode_info->f16_denormal_mode = program->f16_denormal_mode; + denormal_mode_info->f32_denormal_mode = program->f32_denormal_mode; + denormal_mode_info->f64_denormal_mode = program->f64_denormal_mode; + } + if (ret < 0) { if (combined_sampler_info) diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index 7c278a63b80..755f5357022 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -55,6 +55,10 @@ #include <limits.h> #include <stdbool.h> #include <string.h> +#ifdef HAVE_XLOCALE_H +#include <xlocale.h> +#endif +#include <locale.h> #define VKD3D_VEC4_SIZE 4 #define VKD3D_DVEC2_SIZE 2 @@ -189,6 +193,7 @@ enum vkd3d_shader_error VKD3D_SHADER_WARNING_HLSL_IGNORED_DEFAULT_VALUE = 5306, VKD3D_SHADER_WARNING_HLSL_IGNORED_MODIFIER = 5307, VKD3D_SHADER_WARNING_HLSL_OVERRIDDEN_SEMANTIC = 5308, + VKD3D_SHADER_WARNING_HLSL_REDEFINED = 5309, VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND = 6001, @@ -279,6 +284,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_VSIR_INVALID_RESOURCE_TYPE = 9025, VKD3D_SHADER_ERROR_VSIR_INVALID_DESCRIPTOR_COUNT = 9026, VKD3D_SHADER_ERROR_VSIR_INVALID_FLAGS = 9027, + VKD3D_SHADER_ERROR_VSIR_MULTIPLE_SRC_OPERAND_OWNERS = 9028, VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, @@ -728,6 +734,8 @@ enum vkd3d_shader_register_type VKD3DSPR_INVALID = ~0u, }; +const char *vsir_register_type_get_name(enum vkd3d_shader_register_type type, const char *error); + enum vsir_rastout_register { VSIR_RASTOUT_POSITION = 0x0, @@ -872,7 +880,7 @@ enum vsir_global_flags VKD3DSGF_SKIP_OPTIMIZATION = 0x10, VKD3DSGF_ENABLE_MINIMUM_PRECISION = 0x20, VKD3DSGF_ENABLE_11_1_DOUBLE_EXTENSIONS = 0x40, - VKD3DSGF_ENABLE_SHADER_EXTENSIONS = 0x80, /* never emitted? */ + VKD3DSGF_ENABLE_11_1_SHADER_EXTENSIONS = 0x80, VKD3DSGF_BIND_FOR_DURATION = 0x100, VKD3DSGF_ENABLE_VP_AND_RT_ARRAY_INDEX = 0x200, VKD3DSGF_ENABLE_INNER_COVERAGE = 0x400, @@ -888,7 +896,7 @@ enum vsir_global_flags VKD3DSGF_ENABLE_INT64 = 0x100000, VKD3DSGF_ENABLE_VIEWID = 0x200000, VKD3DSGF_ENABLE_BARYCENTRICS = 0x400000, - VKD3DSGF_FORCE_NATIVE_LOW_PRECISION = 0x800000, + VKD3DSGF_ENABLE_NATIVE_LOW_PRECISION = 0x800000, VKD3DSGF_ENABLE_SHADINGRATE = 0x1000000, VKD3DSGF_ENABLE_RAYTRACING_TIER_1_1 = 0x2000000, VKD3DSGF_ENABLE_SAMPLER_FEEDBACK = 0x4000000, @@ -1033,7 +1041,9 @@ struct vkd3d_shader_indexable_temp const struct vkd3d_shader_immediate_constant_buffer *initialiser; }; -struct vkd3d_shader_register_index +/* This structure is used by vsir_register_index_compare(); changes to the + * structure should be reflected by the comparison function as well. */ +struct vsir_register_index { struct vsir_src_operand *rel_addr; unsigned int offset; @@ -1041,13 +1051,15 @@ struct vkd3d_shader_register_index bool is_in_bounds; }; -struct vkd3d_shader_register +/* This structure is used by vsir_operand_compare(); changes to the structure + * should be reflected by the comparison function as well. */ +struct vsir_operand { enum vkd3d_shader_register_type type; enum vkd3d_shader_register_precision precision; bool non_uniform; enum vsir_data_type data_type; - struct vkd3d_shader_register_index idx[3]; + struct vsir_register_index idx[3]; unsigned int idx_count; enum vsir_dimension dimension; /* known address alignment for optimisation, or zero */ @@ -1062,10 +1074,10 @@ struct vkd3d_shader_register } u; }; -void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, +void vsir_operand_init(struct vsir_operand *reg, enum vkd3d_shader_register_type reg_type, enum vsir_data_type data_type, unsigned int idx_count); -static inline bool vsir_register_is_descriptor(const struct vkd3d_shader_register *reg) +static inline bool vsir_operand_is_descriptor(const struct vsir_operand *reg) { switch (reg->type) { @@ -1080,6 +1092,31 @@ static inline bool vsir_register_is_descriptor(const struct vkd3d_shader_registe } } +static inline bool vsir_operand_is_constant(const struct vsir_operand *reg) +{ + return (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_IMMCONST64); +} + +static inline bool vsir_operand_is_undef(const struct vsir_operand *reg) +{ + return reg->type == VKD3DSPR_UNDEF; +} + +static inline bool vsir_operand_is_constant_or_undef(const struct vsir_operand *reg) +{ + return vsir_operand_is_constant(reg) || vsir_operand_is_undef(reg); +} + +static inline bool vsir_operand_is_label(const struct vsir_operand *reg) +{ + return reg->type == VKD3DSPR_LABEL; +} + +static inline bool vsir_operand_is_ssa(const struct vsir_operand *reg) +{ + return reg->type == VKD3DSPR_SSA; +} + static inline enum vkd3d_shader_register_type vsir_register_type_from_sysval_input( enum vkd3d_shader_sysval_semantic sysval) { @@ -1094,9 +1131,11 @@ static inline enum vkd3d_shader_register_type vsir_register_type_from_sysval_inp } } +/* This structure is used by vsir_cse_expr_key_compare(); changes to the + * structure should be reflected by that function as well. */ struct vsir_dst_operand { - struct vkd3d_shader_register reg; + struct vsir_operand reg; uint32_t write_mask; uint32_t modifiers; unsigned int shift; @@ -1105,17 +1144,24 @@ struct vsir_dst_operand void vsir_dst_operand_init(struct vsir_dst_operand *dst, enum vkd3d_shader_register_type reg_type, enum vsir_data_type data_type, unsigned int idx_count); void vsir_dst_operand_init_null(struct vsir_dst_operand *dst); +void vsir_dst_operand_init_ssa_f32v4(struct vsir_dst_operand *dst, unsigned int idx); +/* This structure is used by vsir_src_operand_compare(); changes to the + * structure should be reflected by the comparison function as well. */ struct vsir_src_operand { - struct vkd3d_shader_register reg; + struct vsir_operand reg; uint32_t swizzle; enum vkd3d_shader_src_modifier modifiers; + + /* Index of the instruction that uses the src operand, only used during validation. */ + size_t owner_index; }; void vsir_src_operand_init(struct vsir_src_operand *src, enum vkd3d_shader_register_type reg_type, enum vsir_data_type data_type, unsigned int idx_count); void vsir_src_operand_init_label(struct vsir_src_operand *src, unsigned int label_id); +void vsir_src_operand_init_ssa_f32v4(struct vsir_src_operand *src, unsigned int idx); struct vkd3d_shader_index_range { @@ -1123,7 +1169,7 @@ struct vkd3d_shader_index_range unsigned int register_count; }; -struct vkd3d_shader_register_range +struct vsir_register_range { unsigned int space; unsigned int first, last; @@ -1132,7 +1178,7 @@ struct vkd3d_shader_register_range struct vkd3d_shader_resource { struct vsir_dst_operand reg; - struct vkd3d_shader_register_range range; + struct vsir_register_range range; }; enum vkd3d_decl_usage @@ -1288,14 +1334,14 @@ struct vkd3d_shader_register_semantic struct vkd3d_shader_sampler { struct vsir_src_operand src; - struct vkd3d_shader_register_range range; + struct vsir_register_range range; }; struct vkd3d_shader_constant_buffer { struct vsir_src_operand src; unsigned int size; - struct vkd3d_shader_register_range range; + struct vsir_register_range range; }; struct vkd3d_shader_structured_resource @@ -1380,6 +1426,8 @@ struct vkd3d_shader_location unsigned int line, column; }; +/* This structure is used by vsir_cse_expr_key_compare(); changes to the + * structure should be reflected by that function as well. */ struct vkd3d_shader_instruction { struct vkd3d_shader_location location; @@ -1447,37 +1495,6 @@ static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_ return ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w; } -static inline bool register_is_constant(const struct vkd3d_shader_register *reg) -{ - return (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_IMMCONST64); -} - -static inline bool register_is_undef(const struct vkd3d_shader_register *reg) -{ - return reg->type == VKD3DSPR_UNDEF; -} - -static inline bool register_is_constant_or_undef(const struct vkd3d_shader_register *reg) -{ - return register_is_constant(reg) || register_is_undef(reg); -} - -static inline bool register_is_numeric_array(const struct vkd3d_shader_register *reg) -{ - return (reg->type == VKD3DSPR_IMMCONSTBUFFER || reg->type == VKD3DSPR_IDXTEMP - || reg->type == VKD3DSPR_GROUPSHAREDMEM); -} - -static inline bool vsir_register_is_label(const struct vkd3d_shader_register *reg) -{ - return reg->type == VKD3DSPR_LABEL; -} - -static inline bool register_is_ssa(const struct vkd3d_shader_register *reg) -{ - return reg->type == VKD3DSPR_SSA; -} - struct vkd3d_shader_param_node { struct vkd3d_shader_param_node *next; @@ -1638,14 +1655,7 @@ enum vsir_normalisation_level VSIR_NORMALISED_SM6, }; -enum vsir_denorm_mode -{ - VSIR_DENORM_ANY = 0, - VSIR_DENORM_PRESERVE, - VSIR_DENORM_FLUSH_TO_ZERO, -}; - -const char *vsir_denorm_mode_get_name(enum vsir_denorm_mode m, const char *error); +const char *vsir_denorm_mode_get_name(enum vkd3d_shader_denormal_mode m, const char *error); struct vkd3d_shader_descriptor_info1 { @@ -1726,9 +1736,9 @@ struct vsir_program struct vkd3d_shader_param_allocator src_operands; struct vkd3d_shader_param_allocator dst_operands; - enum vsir_denorm_mode f16_denorm_mode; - enum vsir_denorm_mode f32_denorm_mode; - enum vsir_denorm_mode f64_denorm_mode; + enum vkd3d_shader_denormal_mode f16_denormal_mode; + enum vkd3d_shader_denormal_mode f32_denormal_mode; + enum vkd3d_shader_denormal_mode f64_denormal_mode; }; enum vkd3d_result vsir_allocate_temp_registers(struct vsir_program *program, @@ -1737,8 +1747,7 @@ enum vkd3d_result vsir_update_dcl_temps(struct vsir_program *program, struct vkd3d_shader_message_context *message_context); struct vkd3d_shader_descriptor_info1 *vsir_program_add_descriptor(struct vsir_program *program, - enum vkd3d_shader_descriptor_type type, unsigned int register_id, - const struct vkd3d_shader_register_range *range, + enum vkd3d_shader_descriptor_type type, unsigned int register_id, const struct vsir_register_range *range, enum vkd3d_shader_resource_type resource_type, enum vsir_data_type resource_data_type); bool vsir_program_add_icb(struct vsir_program *program, struct vkd3d_shader_immediate_constant_buffer *icb); void vsir_program_cleanup(struct vsir_program *program); @@ -1797,12 +1806,16 @@ struct vkd3d_shader_parser enum vkd3d_result status; }; -void vkd3d_shader_parser_error(struct vkd3d_shader_parser *parser, - enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); +#define vkd3d_shader_parser_error(parser, error, ...) \ + vkd3d_shader_parser_error_(parser, error, __FUNCTION__, __VA_ARGS__) +void vkd3d_shader_parser_error_(struct vkd3d_shader_parser *parser, enum vkd3d_shader_error error, + const char *function, const char *format, ...) VKD3D_PRINTF_FUNC(4, 5); void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vkd3d_shader_message_context *message_context, const char *source_name); -void vkd3d_shader_parser_warning(struct vkd3d_shader_parser *parser, - enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); +#define vkd3d_shader_parser_warning(parser, error, ...) \ + vkd3d_shader_parser_warning_(parser, error, __FUNCTION__, __VA_ARGS__) +void vkd3d_shader_parser_warning_(struct vkd3d_shader_parser *parser, enum vkd3d_shader_error error, + const char *function, const char *format, ...) VKD3D_PRINTF_FUNC(4, 5); void vsir_program_trace(struct vsir_program *program); @@ -1841,6 +1854,7 @@ void vkd3d_string_buffer_cache_cleanup(struct vkd3d_string_buffer_cache *list); void vkd3d_string_buffer_cache_init(struct vkd3d_string_buffer_cache *list); void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer); void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size); +int vkd3d_string_buffer_print_f16(struct vkd3d_string_buffer *buffer, uint16_t f); int vkd3d_string_buffer_print_f32(struct vkd3d_string_buffer *buffer, float f); int vkd3d_string_buffer_print_f64(struct vkd3d_string_buffer *buffer, double d); int vkd3d_string_buffer_print_string_escaped(struct vkd3d_string_buffer *buffer, const char *s, size_t len); @@ -1889,6 +1903,13 @@ static inline size_t bytecode_get_size(struct vkd3d_bytecode_buffer *buffer) return buffer->size; } +#ifdef _WIN32 +# define vkd3d_locale _locale_t +#else +# define vkd3d_locale locale_t +#endif + +float vkd3d_parse_float(const char *s, vkd3d_locale l); uint32_t vkd3d_parse_integer(const char *s); struct vkd3d_shader_message_context @@ -1904,23 +1925,24 @@ void vkd3d_shader_message_context_trace_messages_(const struct vkd3d_shader_mess const char *function); #define vkd3d_shader_message_context_trace_messages(context) \ vkd3d_shader_message_context_trace_messages_(context, __FUNCTION__) -void vkd3d_shader_error(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, - enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(4, 5); +#define vkd3d_shader_error(context, location, error, ...) \ + vkd3d_shader_error_(context, location, error, __FUNCTION__, __VA_ARGS__) +void vkd3d_shader_error_(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, + enum vkd3d_shader_error error, const char *function, const char *format, ...) VKD3D_PRINTF_FUNC(5, 6); void vkd3d_shader_verror(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, - enum vkd3d_shader_error error, const char *format, va_list args); + enum vkd3d_shader_error error, const char *function, const char *format, va_list args); void vkd3d_shader_vnote(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, - enum vkd3d_shader_log_level level, const char *format, va_list args); -void vkd3d_shader_warning(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, - enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(4, 5); + enum vkd3d_shader_log_level level, const char *function, const char *format, va_list args); +#define vkd3d_shader_warning(context, location, error, ...) \ + vkd3d_shader_warning_(context, location, error, __FUNCTION__, __VA_ARGS__) +void vkd3d_shader_warning_(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, + enum vkd3d_shader_error error, const char *function, const char *format, ...) VKD3D_PRINTF_FUNC(5, 6); void vkd3d_shader_vwarning(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, - enum vkd3d_shader_error error, const char *format, va_list args); + enum vkd3d_shader_error error, const char *function, const char *format, va_list args); void vkd3d_shader_string_from_message_context(char **out, struct vkd3d_shader_message_context *context); uint64_t vkd3d_shader_init_config_flags(void); -void vkd3d_shader_trace_text_(const char *text, size_t size, const char *function); -#define vkd3d_shader_trace_text(text, size) \ - vkd3d_shader_trace_text_(text, size, __FUNCTION__) bool sm1_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, unsigned int semantic_index, bool output, enum vkd3d_shader_sysval_semantic *sysval, @@ -1929,11 +1951,12 @@ bool sm1_usage_from_semantic_name(const char *semantic_name, uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx); -bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg); +bool shader_sm4_is_scalar_register(const struct vsir_operand *reg); bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, - const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, - const char *semantic_name, unsigned int semantic_idx, bool output, - bool is_patch_constant_func, bool is_primitive); + const struct vkd3d_shader_version *version, + enum vkd3d_shader_compile_option_backward_compatibility compatibility_flags, + enum vkd3d_tessellator_domain domain, const char *semantic_name, + unsigned int semantic_idx, bool output, bool is_patch_constant_func, bool is_primitive); int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, struct vkd3d_shader_message_context *message_context, struct vsir_program *program); diff --git a/libs/vkd3d/libs/vkd3d-utils/vkd3d_utils_main.c b/libs/vkd3d/libs/vkd3d-utils/vkd3d_utils_main.c index 6e90b48e877..c75418ae254 100644 --- a/libs/vkd3d/libs/vkd3d-utils/vkd3d_utils_main.c +++ b/libs/vkd3d/libs/vkd3d-utils/vkd3d_utils_main.c @@ -16,7 +16,14 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ +#include "config.h" +#include <unistd.h> +#ifdef _WIN32 +#include <direct.h> /* For getcwd() */ +#endif #include "vkd3d_utils_private.h" +#include "vkd3d_shader_utils.h" + #undef D3D12CreateDevice static const char *debug_d3d_blob_part(D3D_BLOB_PART part) @@ -157,6 +164,93 @@ HRESULT WINAPI D3D12SerializeVersionedRootSignature(const D3D12_VERSIONED_ROOT_S return vkd3d_serialize_versioned_root_signature(desc, blob, error_blob); } +struct standard_include_ctx +{ + char *initial_dir; + size_t len; +}; + +static enum vkd3d_result standard_include_init(struct standard_include_ctx *ctx, const char *initial_filename) +{ + const char *last_slash = NULL; + const char *ptr; + + for (ptr = initial_filename; *ptr; ++ptr) + { +#ifdef _WIN32 + if (*ptr == '/' || *ptr == '\\') +#else + if (*ptr == '/') +#endif + last_slash = ptr; + } + + if (last_slash) + { + ctx->len = last_slash - initial_filename + 1; + if (!(ctx->initial_dir = vkd3d_malloc(ctx->len))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + memcpy(ctx->initial_dir, initial_filename, ctx->len); + + /* Normalize. */ + ctx->initial_dir[ctx->len - 1] = '/'; + + return VKD3D_OK; + } + + if ((ctx->initial_dir = getcwd(NULL, 0))) + { + ctx->len = strlen(ctx->initial_dir); + + /* Check for empty or / cwd. */ + if (ctx->len && ctx->initial_dir[ctx->len - 1] != '/') + { + /* Append, overwriting the null terminator intentionally. */ + ctx->initial_dir[ctx->len] = '/'; + ctx->len++; + } + + return VKD3D_OK; + } + + /* Fallback to relative path. */ + ctx->len = 2; + if (!(ctx->initial_dir = vkd3d_malloc(ctx->len))) + return VKD3D_ERROR_OUT_OF_MEMORY; + ctx->initial_dir[0] = '.'; + ctx->initial_dir[1] = '/'; + + return VKD3D_OK; +} + +static enum vkd3d_result standard_include_open(const char *filename, bool local, + const char *parent_data, void *context, struct vkd3d_shader_code *out) +{ + struct standard_include_ctx *ctx = context; + enum vkd3d_result res; + char *full_path; + size_t len; + FILE *file; + + len = strlen(filename); + if (!(full_path = vkd3d_malloc(ctx->len + len + 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + memcpy(full_path, ctx->initial_dir, ctx->len); + memcpy(&full_path[ctx->len], filename, len + 1); + file = fopen(full_path, "rb"); + vkd3d_free(full_path); + + /* d3dcompiler tries with the prefix path and direct filename. */ + if (!file && !(file = fopen(filename, "rb"))) + return VKD3D_ERROR; + + res = vkd3d_shader_code_from_file(out, file); + fclose(file); + + return res; +} + static int open_include(const char *filename, bool local, const char *parent_data, void *context, struct vkd3d_shader_code *code) { @@ -179,7 +273,8 @@ static void close_include(const struct vkd3d_shader_code *code, void *context) { ID3DInclude *iface = context; - ID3DInclude_Close(iface, code->code); + if (code->code) + ID3DInclude_Close(iface, code->code); } static enum vkd3d_shader_target_type get_target_for_profile(const char *profile) @@ -244,6 +339,7 @@ HRESULT WINAPI D3DCompile2VKD3D(const void *data, SIZE_T data_size, const char * struct vkd3d_shader_compile_option options[7]; struct vkd3d_shader_compile_info compile_info; struct vkd3d_shader_compile_option *option; + struct standard_include_ctx ctx = { 0 }; struct vkd3d_shader_code byte_code; const D3D_SHADER_MACRO *macro; char *messages; @@ -291,9 +387,22 @@ HRESULT WINAPI D3DCompile2VKD3D(const void *data, SIZE_T data_size, const char * for (macro = macros; macro->Name; ++macro) ++preprocess_info.macro_count; } - preprocess_info.pfn_open_include = open_include; - preprocess_info.pfn_close_include = close_include; - preprocess_info.include_context = include; + + if (include == D3D_COMPILE_STANDARD_FILE_INCLUDE) + { + if ((ret = standard_include_init(&ctx, filename)) != VKD3D_OK) + return hresult_from_vkd3d_result(ret); + + preprocess_info.pfn_open_include = standard_include_open; + preprocess_info.pfn_close_include = NULL; + preprocess_info.include_context = &ctx; + } + else + { + preprocess_info.pfn_open_include = open_include; + preprocess_info.pfn_close_include = close_include; + preprocess_info.include_context = include; + } hlsl_info.type = VKD3D_SHADER_STRUCTURE_TYPE_HLSL_SOURCE_INFO; hlsl_info.next = NULL; @@ -320,12 +429,13 @@ HRESULT WINAPI D3DCompile2VKD3D(const void *data, SIZE_T data_size, const char * option->value |= VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR; } + option = &options[compile_info.option_count++]; + option->name = VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY; + option->value = 0; if (flags & D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY) - { - option = &options[compile_info.option_count++]; - option->name = VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY; option->value = VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; - } + else if (compiler_version >= 37) + option->value = VKD3D_SHADER_COMPILE_OPTION_CONST_GLOBAL_UNIFORMS; if (effect_flags & D3DCOMPILE_EFFECT_CHILD_EFFECT) { @@ -350,6 +460,9 @@ HRESULT WINAPI D3DCompile2VKD3D(const void *data, SIZE_T data_size, const char * ret = vkd3d_shader_compile(&compile_info, &byte_code, &messages); + if (ctx.initial_dir) + vkd3d_free(ctx.initial_dir); + if (messages && messages_blob) { if (FAILED(hr = vkd3d_blob_create(messages, strlen(messages), messages_blob))) @@ -426,6 +539,7 @@ HRESULT WINAPI D3DPreprocess(const void *data, SIZE_T size, const char *filename struct vkd3d_shader_preprocess_info preprocess_info; struct vkd3d_shader_compile_info compile_info; struct vkd3d_shader_code preprocessed_code; + struct standard_include_ctx ctx = { 0 }; const D3D_SHADER_MACRO *macro; char *messages; HRESULT hr; @@ -462,12 +576,28 @@ HRESULT WINAPI D3DPreprocess(const void *data, SIZE_T size, const char *filename for (macro = macros; macro->Name; ++macro) ++preprocess_info.macro_count; } - preprocess_info.pfn_open_include = open_include; - preprocess_info.pfn_close_include = close_include; - preprocess_info.include_context = include; + + if (include == D3D_COMPILE_STANDARD_FILE_INCLUDE) + { + if ((ret = standard_include_init(&ctx, filename)) != VKD3D_OK) + return hresult_from_vkd3d_result(ret); + + preprocess_info.pfn_open_include = standard_include_open; + preprocess_info.pfn_close_include = NULL; + preprocess_info.include_context = &ctx; + } + else + { + preprocess_info.pfn_open_include = open_include; + preprocess_info.pfn_close_include = close_include; + preprocess_info.include_context = include; + } ret = vkd3d_shader_preprocess(&compile_info, &preprocessed_code, &messages); + if (ctx.initial_dir) + vkd3d_free(ctx.initial_dir); + if (messages && messages_blob) { if (FAILED(hr = vkd3d_blob_create(messages, strlen(messages), messages_blob))) diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index 84407e05cdb..0ece8fdb82a 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -4158,6 +4158,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm struct d3d12_resource *dst_resource, *src_resource; const struct vkd3d_format *dst_format, *src_format; const struct vkd3d_vk_device_procs *vk_procs; + VkImageAspectFlags vk_aspect_mask; VkBufferCopy vk_buffer_copy; VkImageCopy vk_image_copy; unsigned int layer_count; @@ -4197,7 +4198,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm VKD3D_ASSERT(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); VKD3D_ASSERT(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); - if (src_format->vk_aspect_mask != dst_format->vk_aspect_mask) + /* E.g., for D32_FLOAT_S8X24_UINT -> X32_TYPELESS_G8X24_UINT we just + * need to copy the STENCIL aspect. For D32_FLOAT -> R32_FLOAT we need + * to do a DEPTH -> COLOR copy. */ + vk_aspect_mask = src_format->vk_aspect_mask & dst_format->vk_aspect_mask; + if (vk_aspect_mask != dst_format->vk_aspect_mask) { for (i = 0; i < dst_resource->desc.MipLevels; ++i) { @@ -4212,8 +4217,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm { vk_image_copy_from_d3d12(&vk_image_copy, i, i, &src_resource->desc, &dst_resource->desc, src_format, dst_format, NULL, 0, 0, 0); - vk_image_copy.dstSubresource.layerCount = layer_count; + vk_image_copy.srcSubresource.aspectMask = vk_aspect_mask; vk_image_copy.srcSubresource.layerCount = layer_count; + vk_image_copy.dstSubresource.aspectMask = vk_aspect_mask; + vk_image_copy.dstSubresource.layerCount = layer_count; VK_CALL(vkCmdCopyImage(list->vk_command_buffer, src_resource->u.vk_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_resource->u.vk_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &vk_image_copy)); diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index d733165312c..44f58349dce 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -2395,6 +2395,9 @@ static HRESULT create_shader_stage(struct d3d12_device *device, {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, {VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE, 0}, {VKD3D_SHADER_COMPILE_OPTION_FEATURE, feature_flags_compile_option(device)}, + {VKD3D_SHADER_COMPILE_OPTION_DENORMAL_MODE_F16, VKD3D_SHADER_DENORMAL_MODE_ANY}, + {VKD3D_SHADER_COMPILE_OPTION_DENORMAL_MODE_F32, VKD3D_SHADER_DENORMAL_MODE_ANY}, + {VKD3D_SHADER_COMPILE_OPTION_DENORMAL_MODE_F64, VKD3D_SHADER_DENORMAL_MODE_ANY}, }; stage_desc->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c index 2d0510e5456..a86572b1100 100644 --- a/libs/vkd3d/libs/vkd3d/utils.c +++ b/libs/vkd3d/libs/vkd3d/utils.c @@ -454,6 +454,14 @@ const struct vkd3d_format *vkd3d_get_format(const struct d3d12_device *device, return &vkd3d_formats[i]; } + /* Combined depth/stencil formats can be used without + * D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL as well. In some ways it may be + * nicer to add entries for these to vkd3d_formats[], much like we do with + * e.g. DXGI_FORMAT_R32_TYPELESS, but we can't handle the override for the + * VK_FORMAT_D24_UNORM_S8_UINT formats there. */ + if ((format = vkd3d_get_depth_stencil_format(device, dxgi_format))) + return format; + /* Do not check VkPhysicalDevice4444FormatsFeaturesEXT because apps * should query format support, which returns more detailed info. */ if (dxgi_format == format_b4g4r4a4.dxgi_format && device->vk_info.EXT_4444_formats) -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10972