From: Alexandre Julliard julliard@winehq.org
To test the upcoming 1.9 release. --- dlls/d3dcompiler_43/tests/hlsl_d3d11.c | 4 +- dlls/d3dcompiler_43/tests/hlsl_d3d9.c | 3 + include/d3d12.idl | 70 +- libs/vkd3d/Makefile.in | 1 + libs/vkd3d/include/private/vkd3d_common.h | 22 + .../include/private/vkd3d_shader_utils.h | 63 + libs/vkd3d/include/vkd3d.h | 35 + libs/vkd3d/include/vkd3d_shader.h | 284 +- libs/vkd3d/libs/vkd3d-common/debug.c | 17 +- libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 48 +- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 415 ++- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 17 +- libs/vkd3d/libs/vkd3d-shader/dxil.c | 2370 +++++++++++++++++ libs/vkd3d/libs/vkd3d-shader/hlsl.c | 211 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 46 +- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1633 +++++++----- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 643 +++-- .../libs/vkd3d-shader/hlsl_constant_ops.c | 525 +++- libs/vkd3d/libs/vkd3d-shader/ir.c | 230 +- libs/vkd3d/libs/vkd3d-shader/preproc.l | 140 +- libs/vkd3d/libs/vkd3d-shader/spirv.c | 438 +-- libs/vkd3d/libs/vkd3d-shader/tpf.c | 1360 ++++++---- .../libs/vkd3d-shader/vkd3d_shader_main.c | 432 ++- .../libs/vkd3d-shader/vkd3d_shader_private.h | 118 +- libs/vkd3d/libs/vkd3d/command.c | 676 +++-- libs/vkd3d/libs/vkd3d/device.c | 245 +- libs/vkd3d/libs/vkd3d/resource.c | 342 ++- libs/vkd3d/libs/vkd3d/state.c | 14 +- libs/vkd3d/libs/vkd3d/vkd3d_main.c | 4 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 124 +- 30 files changed, 8298 insertions(+), 2232 deletions(-) create mode 100644 libs/vkd3d/include/private/vkd3d_shader_utils.h create mode 100644 libs/vkd3d/libs/vkd3d-shader/dxil.c
diff --git a/dlls/d3dcompiler_43/tests/hlsl_d3d11.c b/dlls/d3dcompiler_43/tests/hlsl_d3d11.c index c853b14f13a..d0c3c223353 100644 --- a/dlls/d3dcompiler_43/tests/hlsl_d3d11.c +++ b/dlls/d3dcompiler_43/tests/hlsl_d3d11.c @@ -616,8 +616,7 @@ static void test_sampling(void) winetest_push_context("Test %u", i);
ID3D11DeviceContext_ClearRenderTargetView(test_context.immediate_context, test_context.rtv, red); - todo_wine_if (i < 3) - ps_code = compile_shader_flags(tests[i], "ps_4_0", D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY); + ps_code = compile_shader_flags(tests[i], "ps_4_0", D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY); if (ps_code) { draw_quad(&test_context, ps_code); @@ -902,7 +901,6 @@ static void test_reflection(void) refcount = reflection->lpVtbl->Release(reflection); ok(!refcount, "Got unexpected refcount %lu.\n", refcount);
- todo_wine code = compile_shader_flags(ps_source, "ps_4_0", D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY); if (!code) return; diff --git a/dlls/d3dcompiler_43/tests/hlsl_d3d9.c b/dlls/d3dcompiler_43/tests/hlsl_d3d9.c index 7f84c2c62e3..2631f463e96 100644 --- a/dlls/d3dcompiler_43/tests/hlsl_d3d9.c +++ b/dlls/d3dcompiler_43/tests/hlsl_d3d9.c @@ -559,6 +559,8 @@ static void test_conditionals(void) device = test_context.device;
ps_code = compile_shader(ps_if_source, "ps_2_0", 0); + if (ps_code) + { draw_quad(device, ps_code); init_readback(device, &rb);
@@ -578,6 +580,7 @@ static void test_conditionals(void)
release_readback(&rb); ID3D10Blob_Release(ps_code); + }
ps_code = compile_shader(ps_ternary_source, "ps_2_0", 0); if (ps_code) diff --git a/include/d3d12.idl b/include/d3d12.idl index 4fec32d2656..5811608b94f 100644 --- a/include/d3d12.idl +++ b/include/d3d12.idl @@ -44,6 +44,7 @@ const UINT D3D12_DEFAULT_STENCIL_WRITE_MASK = 0xff; const UINT D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND = 0xffffffff; cpp_quote("#define D3D12_FLOAT32_MAX (3.402823466e+38f)") const UINT D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT = 32; +const UINT D3D12_PACKED_TILE = 0xffffffff; const UINT D3D12_UAV_SLOT_COUNT = 64; const UINT D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT = 4096; const UINT D3D12_REQ_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT = 4096; @@ -72,6 +73,7 @@ const UINT D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT = 4096; const UINT D3D12_STANDARD_MAXIMUM_ELEMENT_ALIGNMENT_BYTE_MULTIPLE = 4; const UINT D3D12_TEXTURE_DATA_PITCH_ALIGNMENT = 256; const UINT D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT = 512; +const UINT D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES = 65536; const UINT D3D12_UAV_COUNTER_PLACEMENT_ALIGNMENT = 4096; const UINT D3D12_VS_INPUT_REGISTER_COUNT = 32; const UINT D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE = 16; @@ -1853,6 +1855,24 @@ typedef struct D3D12_WRITEBUFFERIMMEDIATE_PARAMETER UINT32 Value; } D3D12_WRITEBUFFERIMMEDIATE_PARAMETER;
+typedef enum D3D12_PROTECTED_RESOURCE_SESSION_FLAGS +{ + D3D12_PROTECTED_RESOURCE_SESSION_FLAG_NONE = 0, +} D3D12_PROTECTED_RESOURCE_SESSION_FLAGS; +cpp_quote("DEFINE_ENUM_FLAG_OPERATORS(D3D12_PROTECTED_RESOURCE_SESSION_FLAGS);") + +typedef enum D3D12_PROTECTED_SESSION_STATUS +{ + D3D12_PROTECTED_SESSION_STATUS_OK = 0, + D3D12_PROTECTED_SESSION_STATUS_INVALID = 1, +} D3D12_PROTECTED_SESSION_STATUS; + +typedef struct D3D12_PROTECTED_RESOURCE_SESSION_DESC +{ + UINT NodeMask; + D3D12_PROTECTED_RESOURCE_SESSION_FLAGS Flags; +} D3D12_PROTECTED_RESOURCE_SESSION_DESC; + [ uuid(c4fec28f-7966-4e95-9f94-f431cb56c3b8), object, @@ -2214,6 +2234,41 @@ interface ID3D12GraphicsCommandList2 : ID3D12GraphicsCommandList1 const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes); }
+[ + uuid(a1533d18-0ac1-4084-85b9-89a96116806b), + object, + local, + pointer_default(unique) +] +interface ID3D12ProtectedSession : ID3D12DeviceChild +{ + HRESULT GetStatusFence(REFIID riid, void **fence); + + D3D12_PROTECTED_SESSION_STATUS GetSessionStatus(); +} + +[ + uuid(6cd696f4-f289-40cc-8091-5a6c0a099c3d), + object, + local, + pointer_default(unique) +] +interface ID3D12ProtectedResourceSession : ID3D12ProtectedSession +{ + D3D12_PROTECTED_RESOURCE_SESSION_DESC GetDesc(); +} + +[ + uuid(6fda83a7-b84c-4e38-9ac8-c7bd22016b3d), + object, + local, + pointer_default(unique) +] +interface ID3D12GraphicsCommandList3 : ID3D12GraphicsCommandList2 +{ + void SetProtectedResourceSession(ID3D12ProtectedResourceSession *protected_resource_session); +} + typedef enum D3D12_TILE_RANGE_FLAGS { D3D12_TILE_RANGE_FLAG_NONE = 0x0, @@ -2243,8 +2298,8 @@ interface ID3D12CommandQueue : ID3D12Pageable ID3D12Heap *heap, UINT range_count, const D3D12_TILE_RANGE_FLAGS *range_flags, - UINT *heap_range_offsets, - UINT *range_tile_counts, + const UINT *heap_range_offsets, + const UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags);
void CopyTileMappings(ID3D12Resource *dst_resource, @@ -2378,6 +2433,17 @@ interface ID3D12Fence : ID3D12Pageable HRESULT Signal(UINT64 value); }
+[ + uuid(433685fe-e22b-4ca0-a8db-b5b4f4dd0e4a), + object, + local, + pointer_default(unique) +] +interface ID3D12Fence1 : ID3D12Fence +{ + D3D12_FENCE_FLAGS GetCreationFlags(); +} + [ uuid(6102dee4-af59-4b09-b999-b44d73f09b24), object, diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in index 1ba0e9f71e1..f647af11d07 100644 --- a/libs/vkd3d/Makefile.in +++ b/libs/vkd3d/Makefile.in @@ -17,6 +17,7 @@ SOURCES = \ libs/vkd3d-shader/d3d_asm.c \ libs/vkd3d-shader/d3dbc.c \ libs/vkd3d-shader/dxbc.c \ + libs/vkd3d-shader/dxil.c \ libs/vkd3d-shader/glsl.c \ libs/vkd3d-shader/hlsl.c \ libs/vkd3d-shader/hlsl.l \ diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index 08dde1b2e7f..ee733ee0d76 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -20,6 +20,7 @@ #define __VKD3D_COMMON_H
#include "config.h" +#define WIN32_LEAN_AND_MEAN #include "windows.h" #include "vkd3d_types.h"
@@ -28,6 +29,7 @@ #include <stdbool.h> #include <stdint.h> #include <stdio.h> +#include <stdlib.h>
#ifdef _MSC_VER #include <intrin.h> @@ -171,6 +173,11 @@ static inline bool vkd3d_bound_range(size_t start, size_t count, size_t limit) #endif }
+static inline bool vkd3d_object_range_overflow(size_t start, size_t count, size_t size) +{ + return (~(size_t)0 - start) / size < count; +} + static inline uint16_t vkd3d_make_u16(uint8_t low, uint8_t high) { return low | ((uint16_t)high << 8); @@ -186,6 +193,21 @@ static inline int vkd3d_u32_compare(uint32_t x, uint32_t y) return (x > y) - (x < y); }
+static inline bool bitmap_clear(uint32_t *map, unsigned int idx) +{ + return map[idx >> 5] &= ~(1u << (idx & 0x1f)); +} + +static inline bool bitmap_set(uint32_t *map, unsigned int idx) +{ + return map[idx >> 5] |= (1u << (idx & 0x1f)); +} + +static inline bool bitmap_is_set(const uint32_t *map, unsigned int idx) +{ + return map[idx >> 5] & (1u << (idx & 0x1f)); +} + static inline int ascii_isupper(int c) { return 'A' <= c && c <= 'Z'; diff --git a/libs/vkd3d/include/private/vkd3d_shader_utils.h b/libs/vkd3d/include/private/vkd3d_shader_utils.h new file mode 100644 index 00000000000..00052a89988 --- /dev/null +++ b/libs/vkd3d/include/private/vkd3d_shader_utils.h @@ -0,0 +1,63 @@ +/* + * Copyright 2023 Conor McCarthy for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_SHADER_UTILS_H +#define __VKD3D_SHADER_UTILS_H + +#include "vkd3d_shader.h" + +#define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') +#define TAG_SHDR VKD3D_MAKE_TAG('S', 'H', 'D', 'R') +#define TAG_SHEX VKD3D_MAKE_TAG('S', 'H', 'E', 'X') + +static inline enum vkd3d_result vkd3d_shader_parse_dxbc_source_type(const struct vkd3d_shader_code *dxbc, + enum vkd3d_shader_source_type *type, char **messages) +{ + struct vkd3d_shader_dxbc_desc desc; + enum vkd3d_result ret; + unsigned int i; + + *type = VKD3D_SHADER_SOURCE_NONE; + + if ((ret = vkd3d_shader_parse_dxbc(dxbc, 0, &desc, messages)) < 0) + return ret; + + for (i = 0; i < desc.section_count; ++i) + { + uint32_t tag = desc.sections[i].tag; + if (tag == TAG_SHDR || tag == TAG_SHEX) + { + *type = VKD3D_SHADER_SOURCE_DXBC_TPF; + } + else if (tag == TAG_DXIL) + { + *type = VKD3D_SHADER_SOURCE_DXBC_DXIL; + /* Default to DXIL if both are present. */ + break; + } + } + + vkd3d_shader_free_dxbc(&desc); + + if (*type == VKD3D_SHADER_SOURCE_NONE) + return VKD3D_ERROR_INVALID_SHADER; + + return VKD3D_OK; +} + +#endif /* __VKD3D_SHADER_UTILS_H */ diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h index 72ed3ced671..2ccda47248a 100644 --- a/libs/vkd3d/include/vkd3d.h +++ b/libs/vkd3d/include/vkd3d.h @@ -207,7 +207,42 @@ VKD3D_API VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device); VKD3D_API struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device);
VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue); + +/** + * Acquire the Vulkan queue backing a command queue. + * + * While a queue is acquired by the client, it is locked so that + * neither the vkd3d library nor other threads can submit work to + * it. For that reason it should be released as soon as possible with + * vkd3d_release_vk_queue(). The lock is not reentrant, so the same + * queue must not be acquired more than once by the same thread. + * + * Work submitted through the Direct3D 12 API exposed by vkd3d is not + * always immediately submitted to the Vulkan queue; sometimes it is + * kept in another internal queue, which might not necessarily be + * empty at the time vkd3d_acquire_vk_queue() is called. For this + * reason, work submitted directly to the Vulkan queue might appear to + * the Vulkan driver as being submitted before other work submitted + * though the Direct3D 12 API. If this is not desired, it is + * recommended to synchronize work submission using an ID3D12Fence + * object, by submitting to the queue a signal operation after all the + * Direct3D 12 work is submitted and waiting for it before calling + * vkd3d_acquire_vk_queue(). + * + * \since 1.0 + */ VKD3D_API VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue); + +/** + * Release the Vulkan queue backing a command queue. + * + * This must be paired to an earlier corresponding + * vkd3d_acquire_vk_queue(). After this function is called, the Vulkan + * queue returned by vkd3d_acquire_vk_queue() must not be used any + * more. + * + * \since 1.0 + */ VKD3D_API void vkd3d_release_vk_queue(ID3D12CommandQueue *queue);
VKD3D_API HRESULT vkd3d_create_image_resource(ID3D12Device *device, diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index 274241546ea..cfe54dbff53 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -85,6 +85,16 @@ enum vkd3d_shader_structure_type * \since 1.3 */ VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO, + /** + * The structure is a vkd3d_shader_scan_signature_info structure. + * \since 1.9 + */ + VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO, + /** + * The structure is a vkd3d_shader_next_stage_info structure. + * \since 1.9 + */ + VKD3D_SHADER_STRUCTURE_TYPE_NEXT_STAGE_INFO,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), }; @@ -134,6 +144,14 @@ enum vkd3d_shader_compile_option_formatting_flags VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FORMATTING_FLAGS), };
+enum vkd3d_shader_compile_option_pack_matrix_order +{ + VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR = 0x00000001, + VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR = 0x00000002, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER), +}; + enum vkd3d_shader_compile_option_name { /** @@ -164,6 +182,15 @@ enum vkd3d_shader_compile_option_name * \since 1.7 */ VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE = 0x00000006, + /** + * This option specifies default matrix packing order. It's only supported for HLSL source type. + * Explicit variable modifiers or pragmas will take precedence. + * + * \a value is a member of enum vkd3d_shader_compile_option_pack_matrix_order. + * + * \since 1.9 + */ + VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER = 0x00000007,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), }; @@ -327,6 +354,25 @@ struct vkd3d_shader_parameter } u; };
+/** + * Symbolic register indices for mapping uniform constant register sets in + * legacy Direct3D bytecode to constant buffer views in the target environment. + * + * Members of this enumeration are used in + * \ref vkd3d_shader_resource_binding.register_index. + * + * \since 1.9 + */ +enum vkd3d_shader_d3dbc_constant_register +{ + /** The float constant register set, c# in Direct3D assembly. */ + VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER = 0x0, + /** The integer constant register set, i# in Direct3D assembly. */ + VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER = 0x1, + /** The boolean constant register set, b# in Direct3D assembly. */ + VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER = 0x2, +}; + /** * Describes the mapping of a single resource or resource array to its binding * point in the target environment. @@ -351,7 +397,14 @@ struct vkd3d_shader_resource_binding * support multiple register spaces, this parameter must be set to 0. */ unsigned int register_space; - /** Register index of the DXBC resource. */ + /** + * Register index of the Direct3D resource. + * + * For legacy Direct3D shaders, vkd3d-shader maps each constant register + * set to a single constant buffer view. This parameter names the register + * set to map, and must be a member of + * enum vkd3d_shader_d3dbc_constant_register. + */ unsigned int register_index; /** Shader stage(s) to which the resource is visible. */ enum vkd3d_shader_visibility shader_visibility; @@ -611,6 +664,11 @@ enum vkd3d_shader_source_type * model 1, 2, and 3 shaders. \since 1.3 */ VKD3D_SHADER_SOURCE_D3D_BYTECODE, + /** + * A 'DirectX Intermediate Language' shader embedded in a DXBC container. This is + * the format used for Direct3D shader model 6 shaders. \since 1.9 + */ + VKD3D_SHADER_SOURCE_DXBC_DXIL,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SOURCE_TYPE), }; @@ -620,7 +678,7 @@ enum vkd3d_shader_target_type { /** * The shader has no type or is to be ignored. This is not a valid value - * for vkd3d_shader_compile() or vkd3d_shader_scan(). + * for vkd3d_shader_compile(). */ VKD3D_SHADER_TARGET_NONE, /** @@ -1281,6 +1339,8 @@ enum vkd3d_shader_descriptor_info_flag /** The descriptor is a UAV resource, on which the shader performs * atomic ops. \since 1.6 */ VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS = 0x00000008, + /** The descriptor is a raw (byte-addressed) buffer. \since 1.9 */ + VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER = 0x00000010,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_DESCRIPTOR_INFO_FLAG), }; @@ -1320,6 +1380,20 @@ struct vkd3d_shader_descriptor_info * A chained structure enumerating the descriptors declared by a shader. * * This structure extends vkd3d_shader_compile_info. + * + * When scanning a legacy Direct3D shader, vkd3d-shader enumerates each + * constant register set used by the shader as a single constant buffer + * descriptor, as follows: + * - The \ref vkd3d_shader_descriptor_info.type field is set to + * VKD3D_SHADER_DESCRIPTOR_TYPE_CBV. + * - The \ref vkd3d_shader_descriptor_info.register_space field is set to zero. + * - The \ref vkd3d_shader_descriptor_info.register_index field is set to a + * member of enum vkd3d_shader_d3dbc_constant_register denoting which set + * is used. + * - The \ref vkd3d_shader_descriptor_info.count field is set to one. + * + * In summary, there may be up to three such descriptors, one for each register + * set used by the shader: float, integer, and boolean. */ struct vkd3d_shader_scan_descriptor_info { @@ -1551,6 +1625,134 @@ static inline uint32_t vkd3d_shader_create_swizzle(enum vkd3d_shader_swizzle_com | ((w & VKD3D_SHADER_SWIZZLE_MASK) << VKD3D_SHADER_SWIZZLE_SHIFT(3)); }
+/** + * A chained structure containing descriptions of shader inputs and outputs. + * + * This structure is currently implemented only for DXBC and legacy D3D bytecode + * source types. + * For DXBC shaders, the returned information is parsed directly from the + * signatures embedded in the DXBC shader. + * For legacy D3D shaders, the returned information is synthesized based on + * registers declared or used by shader instructions. + * For all other shader types, the structure is zeroed. + * + * All members (except for \ref type and \ref next) are output-only. + * + * This structure is passed to vkd3d_shader_scan() and extends + * vkd3d_shader_compile_info. + * + * Members of this structure are allocated by vkd3d-shader and should be freed + * with vkd3d_shader_free_scan_signature_info() when no longer needed. + * + * All signatures may contain pointers into the input shader, and should only + * be accessed while the input shader remains valid. + * + * Signature elements are synthesized from legacy Direct3D bytecode as follows: + * - The \ref vkd3d_shader_signature_element.semantic_name field is set to an + * uppercase string corresponding to the HLSL name for the usage, e.g. + * "POSITION", "BLENDWEIGHT", "COLOR", "PSIZE", etc. + * - The \ref vkd3d_shader_signature_element.semantic_index field is set to the + * usage index. + * - The \ref vkd3d_shader_signature_element.stream_index is always 0. + * + * Signature elements are synthesized for any input or output register declared + * or used in a legacy Direct3D bytecode shader, including the following: + * - Shader model 1 and 2 colour and texture coordinate registers. + * - The shader model 1 pixel shader output register. + * - Shader model 1 and 2 vertex shader output registers (position, fog, and + * point size). + * - Shader model 3 pixel shader system value input registers (pixel position + * and face). + * + * \since 1.9 + */ +struct vkd3d_shader_scan_signature_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** The shader input varyings. */ + struct vkd3d_shader_signature input; + + /** The shader output varyings. */ + struct vkd3d_shader_signature output; + + /** The shader patch constant varyings. */ + struct vkd3d_shader_signature patch_constant; +}; + +/** + * Describes the mapping of a output varying register in a shader stage, + * to an input varying register in the following shader stage. + * + * This structure is used in struct vkd3d_shader_next_stage_info. + */ +struct vkd3d_shader_varying_map +{ + /** + * The signature index (in the output signature) of the output varying. + * If greater than or equal to the number of elements in the output + * signature, signifies that the varying is consumed by the next stage but + * not written by this one. + */ + unsigned int output_signature_index; + /** The register index of the input varying to map this register to. */ + unsigned int input_register_index; + /** The mask consumed by the destination register. */ + unsigned int input_mask; +}; + +/** + * A chained structure which describes the next shader in the pipeline. + * + * This structure is optional, and should only be provided if there is in fact + * another shader in the pipeline. + * However, depending on the input and output formats, this structure may be + * necessary in order to generate shaders which correctly match each other. + * If the structure or its individual fields are not provided, vkd3d-shader + * will generate shaders which may be correct in isolation, but are not + * guaranteed to correctly match each other. + * + * This structure is passed to vkd3d_shader_compile() and extends + * vkd3d_shader_compile_info. + * + * This structure contains only input parameters. + * + * \since 1.9 + */ +struct vkd3d_shader_next_stage_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_NEXT_STAGE_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** + * A mapping of output varyings in this shader stage to input varyings + * in the next shader stage. + * + * This mapping should include exactly one element for each varying + * consumed by the next shader stage. + * If this shader stage outputs a varying that is not consumed by the next + * shader stage, that varying should be absent from this array. + * + * If this field is absent, vkd3d-shader will map varyings from one stage + * to another based on their register index. + * For Direct3D shader model 3.0, such a default mapping will be incorrect + * unless the registers are allocated in the same order, and hence this + * field is necessary to correctly match inter-stage varyings. + * This mapping may also be necessary under other circumstances where the + * varying interface does not match exactly. + * + * This mapping may be constructed by vkd3d_shader_build_varying_map(). + */ + const struct vkd3d_shader_varying_map *varying_map; + /** The number of registers provided in \ref varying_map. */ + unsigned int varying_count; +}; + #ifdef LIBVKD3D_SHADER_SOURCE # define VKD3D_SHADER_API VKD3D_EXPORT #else @@ -1623,12 +1825,14 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported * * Depending on the source and target types, this function may support the * following chained structures: + * - vkd3d_shader_hlsl_source_info * - vkd3d_shader_interface_info + * - vkd3d_shader_next_stage_info * - vkd3d_shader_scan_descriptor_info + * - vkd3d_shader_scan_signature_info * - vkd3d_shader_spirv_domain_shader_target_info * - vkd3d_shader_spirv_target_info * - vkd3d_shader_transform_feedback_info - * - vkd3d_shader_hlsl_source_info * * \param compile_info A chained structure containing compilation parameters. * @@ -1784,6 +1988,26 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver * Parse shader source code or byte code, returning various types of requested * information. * + * The \a source_type member of \a compile_info must be set to the type of the + * shader. + * + * The \a target_type member may be set to VKD3D_SHADER_TARGET_NONE, in which + * case vkd3d_shader_scan() will return information about the shader in + * isolation. Alternatively, it may be set to a valid compilation target for the + * shader, in which case vkd3d_shader_scan() will return information that + * reflects the interface for a shader as it will be compiled to that target. + * In this case other chained structures may be appended to \a compile_info as + * they would be passed to vkd3d_shader_compile(), and interpreted accordingly, + * such as vkd3d_shader_spirv_target_info. + * + * (For a hypothetical example, suppose the source shader distinguishes float + * and integer texture data, but the target environment does not support integer + * textures. In this case vkd3d_shader_compile() might translate integer + * operations to float. Accordingly using VKD3D_SHADER_TARGET_NONE would + * accurately report whether the texture expects integer or float data, but + * using the relevant specific target type would report + * VKD3D_SHADER_RESOURCE_DATA_FLOAT.) + * * Currently this function supports the following code types: * - VKD3D_SHADER_SOURCE_DXBC_TPF * @@ -1791,6 +2015,7 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver * \n * The DXBC_TPF scanner supports the following chained structures: * - vkd3d_shader_scan_descriptor_info + * - vkd3d_shader_scan_signature_info * \n * Although the \a compile_info parameter is read-only, chained structures * passed to this function need not be, and may serve as output parameters, @@ -1827,12 +2052,18 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_descriptor_info( struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info);
/** - * Read the input signature of a compiled shader, returning a structural + * Read the input signature of a compiled DXBC shader, returning a structural * description which can be easily parsed by C code. * * This function parses a compiled shader. To parse a standalone root signature, * use vkd3d_shader_parse_root_signature(). * + * This function only parses DXBC shaders, and only retrieves the input + * signature. To retrieve signatures from other shader types, or other signature + * types, use vkd3d_shader_scan() and struct vkd3d_shader_scan_signature_info. + * This function returns the same input signature that is returned in + * struct vkd3d_shader_scan_signature_info. + * * \param dxbc Compiled byte code, in DXBC format. * * \param signature Output location in which the parsed root signature will be @@ -2022,6 +2253,48 @@ VKD3D_SHADER_API int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxb VKD3D_SHADER_API int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages);
+/** + * Free members of struct vkd3d_shader_scan_signature_info allocated by + * vkd3d_shader_scan(). + * + * This function may free members of vkd3d_shader_scan_signature_info, but + * does not free the structure itself. + * + * \param info Scan information to free. + * + * \since 1.9 + */ +VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info); + +/** + * Build a mapping of output varyings in a shader stage to input varyings in + * the following shader stage. + * + * This mapping should be used in struct vkd3d_shader_next_stage_info to + * compile the first shader. + * + * \param output_signature The output signature of the first shader. + * + * \param input_signature The input signature of the second shader. + * + * \param count On output, contains the number of entries written into + * \ref varyings. + * + * \param varyings Pointer to an output array of varyings. + * This must point to space for N varyings, where N is the number of elements + * in the input signature. + * + * \remark Valid legacy Direct3D pixel shaders have at most 12 varying inputs: + * 10 inter-stage varyings, face, and position. + * Therefore, in practice, it is safe to call this function with a + * pre-allocated array with a fixed size of 12. + * + * \since 1.9 + */ +VKD3D_SHADER_API void vkd3d_shader_build_varying_map(const struct vkd3d_shader_signature *output_signature, + const struct vkd3d_shader_signature *input_signature, + unsigned int *count, struct vkd3d_shader_varying_map *varyings); + #endif /* VKD3D_SHADER_NO_PROTOTYPES */
/** Type of vkd3d_shader_get_version(). */ @@ -2087,6 +2360,9 @@ typedef int (*PFN_vkd3d_shader_parse_dxbc)(const struct vkd3d_shader_code *dxbc, typedef int (*PFN_vkd3d_shader_serialize_dxbc)(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages);
+/** Type of vkd3d_shader_free_scan_signature_info(). \since 1.9 */ +typedef void (*PFN_vkd3d_shader_free_scan_signature_info)(struct vkd3d_shader_scan_signature_info *info); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c index b363efbd360..aa7df5bd764 100644 --- a/libs/vkd3d/libs/vkd3d-common/debug.c +++ b/libs/vkd3d/libs/vkd3d-common/debug.c @@ -31,6 +31,7 @@ #include <stdlib.h> #include <stdbool.h> #include <string.h> +#include <unistd.h> #ifdef HAVE_PTHREAD_H #include <pthread.h> #endif @@ -44,11 +45,11 @@ extern const char *const vkd3d_dbg_env_name;
static const char *const debug_level_names[] = { - /* VKD3D_DBG_LEVEL_NONE */ "none", - /* VKD3D_DBG_LEVEL_ERR */ "err", - /* VKD3D_DBG_LEVEL_FIXME */ "fixme", - /* VKD3D_DBG_LEVEL_WARN */ "warn", - /* VKD3D_DBG_LEVEL_TRACE */ "trace", + [VKD3D_DBG_LEVEL_NONE ] = "none", + [VKD3D_DBG_LEVEL_ERR ] = "err", + [VKD3D_DBG_LEVEL_FIXME] = "fixme", + [VKD3D_DBG_LEVEL_WARN ] = "warn", + [VKD3D_DBG_LEVEL_TRACE] = "trace", };
enum vkd3d_dbg_level vkd3d_dbg_get_level(void) @@ -105,7 +106,13 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const ch
assert(level < ARRAY_SIZE(debug_level_names));
+#ifdef _WIN32 + vkd3d_dbg_output("vkd3d:%04lx:%s:%s ", GetCurrentThreadId(), debug_level_names[level], function); +#elif HAVE_GETTID + vkd3d_dbg_output("vkd3d:%u:%s:%s ", gettid(), debug_level_names[level], function); +#else vkd3d_dbg_output("vkd3d:%s:%s ", debug_level_names[level], function); +#endif va_start(args, fmt); vkd3d_dbg_voutput(fmt, args); va_end(args); diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 0a821b5c878..d72402eb250 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -578,17 +578,17 @@ static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, e { static const char *const resource_type_names[] = { - /* VKD3D_SHADER_RESOURCE_NONE */ "none", - /* VKD3D_SHADER_RESOURCE_BUFFER */ "buffer", - /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ "texture1d", - /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ "texture2d", - /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ "texture2dms", - /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ "texture3d", - /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ "texturecube", - /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ "texture1darray", - /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ "texture2darray", - /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ "texture2dmsarray", - /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ "texturecubearray", + [VKD3D_SHADER_RESOURCE_NONE ] = "none", + [VKD3D_SHADER_RESOURCE_BUFFER ] = "buffer", + [VKD3D_SHADER_RESOURCE_TEXTURE_1D ] = "texture1d", + [VKD3D_SHADER_RESOURCE_TEXTURE_2D ] = "texture2d", + [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS ] = "texture2dms", + [VKD3D_SHADER_RESOURCE_TEXTURE_3D ] = "texture3d", + [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE ] = "texturecube", + [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY ] = "texture1darray", + [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY ] = "texture2darray", + [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = "texture2dmsarray", + [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = "texturecubearray", };
if (type < ARRAY_SIZE(resource_type_names)) @@ -601,19 +601,19 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, const { static const char *const data_type_names[] = { - /* VKD3D_DATA_FLOAT */ "float", - /* VKD3D_DATA_INT */ "int", - /* VKD3D_DATA_RESOURCE */ "resource", - /* VKD3D_DATA_SAMPLER */ "sampler", - /* VKD3D_DATA_UAV */ "uav", - /* VKD3D_DATA_UINT */ "uint", - /* VKD3D_DATA_UNORM */ "unorm", - /* VKD3D_DATA_SNORM */ "snorm", - /* VKD3D_DATA_OPAQUE */ "opaque", - /* VKD3D_DATA_MIXED */ "mixed", - /* VKD3D_DATA_DOUBLE */ "double", - /* VKD3D_DATA_CONTINUED */ "<continued>", - /* VKD3D_DATA_UNUSED */ "<unused>", + [VKD3D_DATA_FLOAT ] = "float", + [VKD3D_DATA_INT ] = "int", + [VKD3D_DATA_RESOURCE ] = "resource", + [VKD3D_DATA_SAMPLER ] = "sampler", + [VKD3D_DATA_UAV ] = "uav", + [VKD3D_DATA_UINT ] = "uint", + [VKD3D_DATA_UNORM ] = "unorm", + [VKD3D_DATA_SNORM ] = "snorm", + [VKD3D_DATA_OPAQUE ] = "opaque", + [VKD3D_DATA_MIXED ] = "mixed", + [VKD3D_DATA_DOUBLE ] = "double", + [VKD3D_DATA_CONTINUED] = "<continued>", + [VKD3D_DATA_UNUSED ] = "<unused>", }; const char *name; int i; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index 712613ac13b..99a5bd7a438 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -214,6 +214,9 @@ struct vkd3d_shader_sm1_parser bool abort;
struct vkd3d_shader_parser p; + +#define MAX_CONSTANT_COUNT 8192 + uint32_t constant_def_mask[3][MAX_CONSTANT_COUNT / 32]; };
/* This table is not order or position dependent. */ @@ -260,9 +263,9 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = /* Declarations */ {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, /* Constant definitions */ - {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, + {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, - {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, + {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, /* Control flow */ {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}, {~0u, ~0u}}, {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}, {~0u, ~0u}}, @@ -327,9 +330,9 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = /* Declarations */ {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, /* Constant definitions */ - {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, + {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, - {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, + {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, /* Control flow */ {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}}, {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}}, @@ -490,6 +493,309 @@ static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; }
+static struct signature_element *find_signature_element(const struct shader_signature *signature, + const char *semantic_name, unsigned int semantic_index) +{ + struct signature_element *e = signature->elements; + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + if (!ascii_strcasecmp(e[i].semantic_name, semantic_name) + && e[i].semantic_index == semantic_index) + return &e[i]; + } + + return NULL; +} + +static struct signature_element *find_signature_element_by_register_index( + const struct shader_signature *signature, unsigned int register_index) +{ + struct signature_element *e = signature->elements; + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + if (e[i].register_index == register_index) + return &e[i]; + } + + return NULL; +} + +#define SM1_COLOR_REGISTER_OFFSET 8 + +static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, + const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, + unsigned int register_index, bool is_dcl, unsigned int mask) +{ + struct shader_signature *signature; + struct signature_element *element; + + if (output) + signature = &sm1->p.shader_desc.output_signature; + else + signature = &sm1->p.shader_desc.input_signature; + + if ((element = find_signature_element(signature, name, index))) + { + element->mask |= mask; + if (!is_dcl) + element->used_mask |= mask; + return true; + } + + if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, + signature->element_count + 1, sizeof(*signature->elements))) + return false; + element = &signature->elements[signature->element_count++]; + + element->semantic_name = name; + element->semantic_index = index; + element->stream_index = 0; + element->sysval_semantic = sysval; + element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; + element->register_index = register_index; + element->target_location = register_index; + element->register_count = 1; + element->mask = mask; + element->used_mask = is_dcl ? 0 : mask; + element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE; + + return true; +} + +static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, + unsigned int register_index, unsigned int mask) +{ + struct shader_signature *signature; + struct signature_element *element; + + if (output) + signature = &sm1->p.shader_desc.output_signature; + else + signature = &sm1->p.shader_desc.input_signature; + + if (!(element = find_signature_element_by_register_index(signature, register_index))) + { + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC, + "%s register %u was used without being declared.", output ? "Output" : "Input", register_index); + return; + } + + element->used_mask |= mask; +} + +static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, + const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) +{ + unsigned int register_index = reg->idx[0].offset; + + switch (reg->type) + { + case VKD3DSPR_TEMP: + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL + && sm1->p.shader_version.major == 1 && !register_index) + return add_signature_element(sm1, true, "COLOR", 0, VKD3D_SHADER_SV_NONE, 0, is_dcl, mask); + return true; + + case VKD3DSPR_INPUT: + /* For vertex shaders or sm3 pixel shaders, we should have already + * had a DCL instruction. Otherwise, this is a colour input. */ + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX || sm1->p.shader_version.major == 3) + { + add_signature_mask(sm1, false, register_index, mask); + return true; + } + return add_signature_element(sm1, false, "COLOR", register_index, + VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); + + case VKD3DSPR_TEXTURE: + /* For vertex shaders, this is ADDR. */ + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) + return true; + return add_signature_element(sm1, false, "TEXCOORD", register_index, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); + + case VKD3DSPR_OUTPUT: + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) + { + /* For sm < 2 vertex shaders, this is TEXCRDOUT. + * + * For sm3 vertex shaders, this is OUTPUT, but we already + * should have had a DCL instruction. */ + if (sm1->p.shader_version.major == 3) + { + add_signature_mask(sm1, true, register_index, mask); + return true; + } + return add_signature_element(sm1, true, "TEXCOORD", register_index, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); + } + /* fall through */ + + case VKD3DSPR_ATTROUT: + return add_signature_element(sm1, true, "COLOR", register_index, + VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); + + case VKD3DSPR_COLOROUT: + return add_signature_element(sm1, true, "COLOR", register_index, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); + + case VKD3DSPR_DEPTHOUT: + return add_signature_element(sm1, true, "DEPTH", 0, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); + + case VKD3DSPR_RASTOUT: + switch (register_index) + { + case 0: + return add_signature_element(sm1, true, "POSITION", 0, + VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); + + case 1: + return add_signature_element(sm1, true, "FOG", 0, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); + + case 2: + return add_signature_element(sm1, true, "PSIZE", 0, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); + + default: + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, + "Invalid rasterizer output index %u.", register_index); + return true; + } + + case VKD3DSPR_MISCTYPE: + switch (register_index) + { + case 0: + return add_signature_element(sm1, false, "VPOS", 0, + VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); + + case 1: + return add_signature_element(sm1, false, "VFACE", 0, + VKD3D_SHADER_SV_IS_FRONT_FACE, register_index, is_dcl, 0x1); + + default: + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, + "Invalid miscellaneous fragment input index %u.", register_index); + return true; + } + + default: + return true; + } +} + +static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser *sm1, + const struct vkd3d_shader_semantic *semantic) +{ + const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg; + enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; + unsigned int mask = semantic->resource.reg.write_mask; + bool output; + + static const char sm1_semantic_names[][13] = + { + [VKD3D_DECL_USAGE_POSITION ] = "POSITION", + [VKD3D_DECL_USAGE_BLEND_WEIGHT ] = "BLENDWEIGHT", + [VKD3D_DECL_USAGE_BLEND_INDICES] = "BLENDINDICES", + [VKD3D_DECL_USAGE_NORMAL ] = "NORMAL", + [VKD3D_DECL_USAGE_PSIZE ] = "PSIZE", + [VKD3D_DECL_USAGE_TEXCOORD ] = "TEXCOORD", + [VKD3D_DECL_USAGE_TANGENT ] = "TANGENT", + [VKD3D_DECL_USAGE_BINORMAL ] = "BINORMAL", + [VKD3D_DECL_USAGE_TESS_FACTOR ] = "TESSFACTOR", + [VKD3D_DECL_USAGE_POSITIONT ] = "POSITIONT", + [VKD3D_DECL_USAGE_COLOR ] = "COLOR", + [VKD3D_DECL_USAGE_FOG ] = "FOG", + [VKD3D_DECL_USAGE_DEPTH ] = "DEPTH", + [VKD3D_DECL_USAGE_SAMPLE ] = "SAMPLE", + }; + + if (reg->type == VKD3DSPR_OUTPUT) + output = true; + else if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_TEXTURE) + output = false; + else /* vpos and vface don't have a semantic. */ + return add_signature_element_from_register(sm1, reg, true, mask); + + /* sm2 pixel shaders use DCL but don't provide a semantic. */ + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL && sm1->p.shader_version.major == 2) + return add_signature_element_from_register(sm1, reg, true, mask); + + /* With the exception of vertex POSITION output, none of these are system + * values. Pixel POSITION input is not equivalent to SV_Position; the closer + * equivalent is VPOS, which is not declared as a semantic. */ + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX + && output && semantic->usage == VKD3D_DECL_USAGE_POSITION) + sysval = VKD3D_SHADER_SV_POSITION; + + return add_signature_element(sm1, output, sm1_semantic_names[semantic->usage], + semantic->usage_idx, sysval, reg->idx[0].offset, true, mask); +} + +static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, + enum vkd3d_shader_d3dbc_constant_register set, uint32_t index, bool from_def) +{ + struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; + + desc->flat_constant_count[set].used = max(desc->flat_constant_count[set].used, index + 1); + if (from_def) + { + /* d3d shaders have a maximum of 8192 constants; we should not overrun + * this array. */ + assert((index / 32) <= ARRAY_SIZE(sm1->constant_def_mask[set])); + bitmap_set(sm1->constant_def_mask[set], index); + } +} + +static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, + const struct vkd3d_shader_register *reg, unsigned int mask, bool from_def) +{ + struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; + uint32_t register_index = reg->idx[0].offset; + + switch (reg->type) + { + case VKD3DSPR_TEMP: + desc->temp_count = max(desc->temp_count, register_index + 1); + break; + + case VKD3DSPR_CONST: + record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, register_index, from_def); + break; + + case VKD3DSPR_CONST2: + record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048 + register_index, from_def); + break; + + case VKD3DSPR_CONST3: + record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096 + register_index, from_def); + break; + + case VKD3DSPR_CONST4: + record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144 + register_index, from_def); + break; + + case VKD3DSPR_CONSTINT: + record_constant_register(sm1, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, register_index, from_def); + break; + + case VKD3DSPR_CONSTBOOL: + record_constant_register(sm1, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, register_index, from_def); + break; + + default: + break; + } + + add_signature_element_from_register(sm1, reg, false, mask); +} + /* Read a parameter token from the input stream, and possibly a relative * addressing token. */ static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1, @@ -640,6 +946,8 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, range = &semantic->resource.range; range->space = 0; range->first = range->last = semantic->resource.reg.reg.idx[0].offset; + + add_signature_element_from_semantic(sm1, semantic); }
static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, @@ -744,6 +1052,14 @@ static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, } }
+static unsigned int mask_from_swizzle(unsigned int swizzle) +{ + return (1u << vkd3d_swizzle_get_component(swizzle, 0)) + | (1u << vkd3d_swizzle_get_component(swizzle, 1)) + | (1u << vkd3d_swizzle_get_component(swizzle, 2)) + | (1u << vkd3d_swizzle_get_component(swizzle, 3)); +} + static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) { struct vkd3d_shader_src_param *src_params, *predicate; @@ -817,22 +1133,28 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str { shader_sm1_read_dst_param(sm1, &p, dst_param); shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_FLOAT); + shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); } else if (ins->handler_idx == VKD3DSIH_DEFB) { shader_sm1_read_dst_param(sm1, &p, dst_param); shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_SCALAR, VKD3D_DATA_UINT); + shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); } else if (ins->handler_idx == VKD3DSIH_DEFI) { shader_sm1_read_dst_param(sm1, &p, dst_param); shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_INT); + shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); } else { /* Destination token */ if (ins->dst_count) + { shader_sm1_read_dst_param(sm1, &p, dst_param); + shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, false); + }
/* Predication token */ if (ins->predicate) @@ -840,7 +1162,10 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str
/* Other source tokens */ for (i = 0; i < ins->src_count; ++i) + { shader_sm1_read_src_param(sm1, &p, &src_params[i]); + shader_sm1_scan_register(sm1, &src_params[i].reg, mask_from_swizzle(src_params[i].swizzle), false); + } }
if (sm1->abort) @@ -947,12 +1272,30 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, return VKD3D_OK; }
+static uint32_t get_external_constant_count(struct vkd3d_shader_sm1_parser *sm1, + enum vkd3d_shader_d3dbc_constant_register set) +{ + unsigned int j; + + /* Find the highest constant index which is not written by a DEF + * instruction. We can't (easily) use an FFZ function for this since it + * needs to be limited by the highest used register index. */ + for (j = sm1->p.shader_desc.flat_constant_count[set].used; j > 0; --j) + { + if (!bitmap_is_set(sm1->constant_def_mask[set], j - 1)) + return j; + } + + return 0; +} + int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) { struct vkd3d_shader_instruction_array *instructions; struct vkd3d_shader_instruction *ins; struct vkd3d_shader_sm1_parser *sm1; + unsigned int i; int ret;
if (!(sm1 = vkd3d_calloc(1, sizeof(*sm1)))) @@ -992,6 +1335,9 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi
*parser = &sm1->p;
+ for (i = 0; i < ARRAY_SIZE(sm1->p.shader_desc.flat_constant_count); ++i) + sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); + return sm1->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; }
@@ -1340,7 +1686,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe else { put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); - put_u32(buffer, var->regs[r].bind_count); + put_u32(buffer, var->bind_count[r]); } put_u32(buffer, 0); /* type */ put_u32(buffer, 0); /* FIXME: default value */ @@ -1553,12 +1899,13 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_
for (i = 0; i < ctx->constant_defs.count; ++i) { + const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i]; uint32_t token = D3DSIO_DEF; const struct sm1_dst_register reg = { .type = D3DSPR_CONST, .writemask = VKD3DSP_WRITEMASK_ALL, - .reg = i, + .reg = constant_reg->index, };
if (ctx->profile->major_version > 1) @@ -1567,7 +1914,7 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_
write_sm1_dst_register(buffer, ®); for (x = 0; x < 4; ++x) - put_f32(buffer, ctx->constant_defs.values[i].f[x]); + put_f32(buffer, constant_reg->value.f[x]); } }
@@ -1686,14 +2033,19 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) continue;
- count = var->regs[HLSL_REGSET_SAMPLERS].bind_count; + count = var->bind_count[HLSL_REGSET_SAMPLERS];
for (i = 0; i < count; ++i) { if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) { sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; - assert(sampler_dim != HLSL_SAMPLER_DIM_GENERIC); + if (sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + { + /* These can appear in sm4-style combined sample instructions. */ + hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); + continue; + }
reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); @@ -1844,6 +2196,35 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } }
+static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + + switch (jump->type) + { + case HLSL_IR_JUMP_DISCARD_NEG: + { + struct hlsl_reg *reg = &jump->condition.node->reg; + + struct sm1_instruction instr = + { + .opcode = VKD3D_SM1_OP_TEXKILL, + + .dst.type = D3DSPR_TEMP, + .dst.reg = reg->id, + .dst.writemask = reg->writemask, + .has_dst = 1, + }; + + write_sm1_instruction(ctx, buffer, &instr); + break; + } + + default: + hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); + } +} + static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { const struct hlsl_ir_load *load = hlsl_ir_load(instr); @@ -2038,6 +2419,10 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b write_sm1_expr(ctx, buffer, instr); break;
+ case HLSL_IR_JUMP: + write_sm1_jump(ctx, buffer, instr); + break; + case HLSL_IR_LOAD: write_sm1_load(ctx, buffer, instr); break; @@ -2063,7 +2448,6 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) { struct vkd3d_bytecode_buffer buffer = {0}; - int ret;
put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version));
@@ -2076,10 +2460,17 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun
put_u32(&buffer, D3DSIO_END);
- if (!(ret = buffer.status)) + if (buffer.status) + ctx->result = buffer.status; + + if (!ctx->result) { out->code = buffer.data; out->size = buffer.size; } - return ret; + else + { + vkd3d_free(buffer.data); + } + return ctx->result; } diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index 3e3f06faeb5..cedc3da4a83 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -391,6 +391,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s read_dword(&ptr, &e[i].sysval_semantic); read_dword(&ptr, &e[i].component_type); read_dword(&ptr, &e[i].register_index); + e[i].target_location = e[i].register_index; e[i].register_count = 1; read_dword(&ptr, &mask); e[i].mask = mask & 0xff; @@ -493,8 +494,14 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, return ret; break;
+ case TAG_DXIL: case TAG_SHDR: case TAG_SHEX: + if ((section->tag == TAG_DXIL) != desc->is_dxil) + { + TRACE("Skipping chunk %#x.\n", section->tag); + break; + } if (desc->byte_code) FIXME("Multiple shader code chunks.\n"); desc->byte_code = section->data.code; @@ -505,10 +512,6 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, TRACE("Skipping AON9 shader code chunk.\n"); break;
- case TAG_DXIL: - FIXME("Skipping DXIL shader model 6+ code chunk.\n"); - break; - default: TRACE("Skipping chunk %#x.\n", section->tag); break; @@ -529,12 +532,6 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, { int ret;
- desc->byte_code = NULL; - desc->byte_code_size = 0; - memset(&desc->input_signature, 0, sizeof(desc->input_signature)); - memset(&desc->output_signature, 0, sizeof(desc->output_signature)); - memset(&desc->patch_constant_signature, 0, sizeof(desc->patch_constant_signature)); - ret = for_each_dxbc_section(dxbc, message_context, source_name, shdr_handler, desc); if (!desc->byte_code) ret = VKD3D_ERROR_INVALID_ARGUMENT; diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c new file mode 100644 index 00000000000..f9efe47f95d --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -0,0 +1,2370 @@ +/* + * Copyright 2023 Conor McCarthy for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_shader_private.h" + +#define VKD3D_SM6_VERSION_MAJOR(version) (((version) >> 4) & 0xf) +#define VKD3D_SM6_VERSION_MINOR(version) (((version) >> 0) & 0xf) + +#define BITCODE_MAGIC VKD3D_MAKE_TAG('B', 'C', 0xc0, 0xde) + +enum bitcode_block_id +{ + BLOCKINFO_BLOCK = 0, + MODULE_BLOCK = 8, + PARAMATTR_BLOCK = 9, + PARAMATTR_GROUP_BLOCK = 10, + CONSTANTS_BLOCK = 11, + FUNCTION_BLOCK = 12, + VALUE_SYMTAB_BLOCK = 14, + METADATA_BLOCK = 15, + METADATA_ATTACHMENT_BLOCK = 16, + TYPE_BLOCK = 17, + USELIST_BLOCK = 18, +}; + +enum bitcode_blockinfo_code +{ + SETBID = 1, + BLOCKNAME = 2, + SETRECORDNAME = 3, +}; + +enum bitcode_block_abbreviation +{ + END_BLOCK = 0, + ENTER_SUBBLOCK = 1, + DEFINE_ABBREV = 2, + UNABBREV_RECORD = 3, +}; + +enum bitcode_abbrev_type +{ + ABBREV_FIXED = 1, + ABBREV_VBR = 2, + ABBREV_ARRAY = 3, + ABBREV_CHAR = 4, + ABBREV_BLOB = 5, +}; + +enum bitcode_address_space +{ + ADDRESS_SPACE_DEFAULT, + ADDRESS_SPACE_DEVICEMEM, + ADDRESS_SPACE_CBUFFER, + ADDRESS_SPACE_GROUPSHARED, +}; + +enum bitcode_module_code +{ + MODULE_CODE_VERSION = 1, + MODULE_CODE_GLOBALVAR = 7, + MODULE_CODE_FUNCTION = 8, +}; + +enum bitcode_constant_code +{ + CST_CODE_SETTYPE = 1, + CST_CODE_NULL = 2, + CST_CODE_UNDEF = 3, + CST_CODE_INTEGER = 4, + CST_CODE_FLOAT = 6, + CST_CODE_STRING = 8, + CST_CODE_CE_GEP = 12, + CST_CODE_CE_INBOUNDS_GEP = 20, + CST_CODE_DATA = 22, +}; + +enum bitcode_function_code +{ + FUNC_CODE_DECLAREBLOCKS = 1, + FUNC_CODE_INST_BINOP = 2, + FUNC_CODE_INST_CAST = 3, + FUNC_CODE_INST_RET = 10, + FUNC_CODE_INST_BR = 11, + FUNC_CODE_INST_SWITCH = 12, + FUNC_CODE_INST_PHI = 16, + FUNC_CODE_INST_ALLOCA = 19, + FUNC_CODE_INST_LOAD = 20, + FUNC_CODE_INST_EXTRACTVAL = 26, + FUNC_CODE_INST_CMP2 = 28, + FUNC_CODE_INST_VSELECT = 29, + FUNC_CODE_INST_CALL = 34, + FUNC_CODE_INST_ATOMICRMW = 38, + FUNC_CODE_INST_LOADATOMIC = 41, + FUNC_CODE_INST_GEP = 43, + FUNC_CODE_INST_STORE = 44, + FUNC_CODE_INST_STOREATOMIC = 45, + FUNC_CODE_INST_CMPXCHG = 46, +}; + +enum bitcode_type_code +{ + TYPE_CODE_NUMENTRY = 1, + TYPE_CODE_VOID = 2, + TYPE_CODE_FLOAT = 3, + TYPE_CODE_DOUBLE = 4, + TYPE_CODE_LABEL = 5, + TYPE_CODE_INTEGER = 7, + TYPE_CODE_POINTER = 8, + TYPE_CODE_HALF = 10, + TYPE_CODE_ARRAY = 11, + TYPE_CODE_VECTOR = 12, + TYPE_CODE_METADATA = 16, + TYPE_CODE_STRUCT_ANON = 18, + TYPE_CODE_STRUCT_NAME = 19, + TYPE_CODE_STRUCT_NAMED = 20, + TYPE_CODE_FUNCTION = 21, +}; + +enum bitcode_value_symtab_code +{ + VST_CODE_ENTRY = 1, + VST_CODE_BBENTRY = 2, +}; + +struct sm6_pointer_info +{ + const struct sm6_type *type; + enum bitcode_address_space addr_space; +}; + +struct sm6_struct_info +{ + const char *name; + unsigned int elem_count; + const struct sm6_type *elem_types[]; +}; + +struct sm6_function_info +{ + const struct sm6_type *ret_type; + unsigned int param_count; + const struct sm6_type *param_types[]; +}; + +struct sm6_array_info +{ + unsigned int count; + const struct sm6_type *elem_type; +}; + +enum sm6_type_class +{ + TYPE_CLASS_VOID, + TYPE_CLASS_INTEGER, + TYPE_CLASS_FLOAT, + TYPE_CLASS_POINTER, + TYPE_CLASS_STRUCT, + TYPE_CLASS_FUNCTION, + TYPE_CLASS_VECTOR, + TYPE_CLASS_ARRAY, + TYPE_CLASS_LABEL, + TYPE_CLASS_METADATA, +}; + +struct sm6_type +{ + enum sm6_type_class class; + union + { + unsigned int width; + struct sm6_pointer_info pointer; + struct sm6_struct_info *struc; + struct sm6_function_info *function; + struct sm6_array_info array; + } u; +}; + +enum sm6_value_type +{ + VALUE_TYPE_FUNCTION, + VALUE_TYPE_REG, +}; + +struct sm6_function_data +{ + const char *name; + bool is_prototype; + unsigned int attribs_id; +}; + +struct sm6_value +{ + const struct sm6_type *type; + enum sm6_value_type value_type; + bool is_undefined; + union + { + struct sm6_function_data function; + struct vkd3d_shader_register reg; + } u; +}; + +struct dxil_record +{ + unsigned int code; + unsigned int operand_count; + uint64_t operands[]; +}; + +struct sm6_symbol +{ + unsigned int id; + const char *name; +}; + +struct sm6_block +{ + struct vkd3d_shader_instruction *instructions; + size_t instruction_capacity; + size_t instruction_count; +}; + +struct sm6_function +{ + const struct sm6_value *declaration; + + struct sm6_block *blocks[1]; + size_t block_count; +}; + +struct dxil_block +{ + const struct dxil_block *parent; + enum bitcode_block_id id; + unsigned int abbrev_len; + unsigned int start; + unsigned int length; + unsigned int level; + + /* The abbrev, block and record structs are not relocatable. */ + struct dxil_abbrev **abbrevs; + size_t abbrev_capacity; + size_t abbrev_count; + unsigned int blockinfo_bid; + bool has_bid; + + struct dxil_block **child_blocks; + size_t child_block_capacity; + size_t child_block_count; + + struct dxil_record **records; + size_t record_capacity; + size_t record_count; +}; + +struct sm6_parser +{ + const uint32_t *ptr, *start, *end; + unsigned int bitpos; + + struct dxil_block root_block; + struct dxil_block *current_block; + + struct dxil_global_abbrev **abbrevs; + size_t abbrev_capacity; + size_t abbrev_count; + + struct sm6_type *types; + size_t type_count; + + struct sm6_symbol *global_symbols; + size_t global_symbol_count; + + struct sm6_function *functions; + size_t function_count; + + struct sm6_value *values; + size_t value_count; + size_t value_capacity; + + struct vkd3d_shader_parser p; +}; + +struct dxil_abbrev_operand +{ + uint64_t context; + bool (*read_operand)(struct sm6_parser *sm6, uint64_t context, uint64_t *operand); +}; + +struct dxil_abbrev +{ + unsigned int count; + bool is_array; + struct dxil_abbrev_operand operands[]; +}; + +struct dxil_global_abbrev +{ + unsigned int block_id; + struct dxil_abbrev abbrev; +}; + +static size_t size_add_with_overflow_check(size_t a, size_t b) +{ + size_t i = a + b; + return (i < a) ? SIZE_MAX : i; +} + +static struct sm6_parser *sm6_parser(struct vkd3d_shader_parser *parser) +{ + return CONTAINING_RECORD(parser, struct sm6_parser, p); +} + +static bool sm6_parser_is_end(struct sm6_parser *sm6) +{ + return sm6->ptr == sm6->end; +} + +static uint32_t sm6_parser_read_uint32(struct sm6_parser *sm6) +{ + if (sm6_parser_is_end(sm6)) + { + sm6->p.failed = true; + return 0; + } + return *sm6->ptr++; +} + +static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length) +{ + unsigned int l, prev_len = 0; + uint32_t bits; + + if (!length) + return 0; + + assert(length < 32); + + if (sm6_parser_is_end(sm6)) + { + sm6->p.failed = true; + return 0; + } + + assert(sm6->bitpos < 32); + bits = *sm6->ptr >> sm6->bitpos; + l = 32 - sm6->bitpos; + if (l <= length) + { + ++sm6->ptr; + if (sm6_parser_is_end(sm6) && l < length) + { + sm6->p.failed = true; + return bits; + } + sm6->bitpos = 0; + bits |= *sm6->ptr << l; + prev_len = l; + } + sm6->bitpos += length - prev_len; + + return bits & ((1 << length) - 1); +} + +static uint64_t sm6_parser_read_vbr(struct sm6_parser *sm6, unsigned int length) +{ + unsigned int bits, flag, mask, shift = 0; + uint64_t result = 0; + + if (!length) + return 0; + + if (sm6_parser_is_end(sm6)) + { + sm6->p.failed = true; + return 0; + } + + flag = 1 << (length - 1); + mask = flag - 1; + do + { + bits = sm6_parser_read_bits(sm6, length); + result |= (uint64_t)(bits & mask) << shift; + shift += length - 1; + } while ((bits & flag) && !sm6->p.failed && shift < 64); + + sm6->p.failed |= !!(bits & flag); + + return result; +} + +static void sm6_parser_align_32(struct sm6_parser *sm6) +{ + if (!sm6->bitpos) + return; + + if (sm6_parser_is_end(sm6)) + { + sm6->p.failed = true; + return; + } + + ++sm6->ptr; + sm6->bitpos = 0; +} + +static bool dxil_block_handle_blockinfo_record(struct dxil_block *block, struct dxil_record *record) +{ + /* BLOCKINFO blocks must only occur immediately below the module root block. */ + if (block->level > 1) + { + WARN("Invalid blockinfo block level %u.\n", block->level); + return false; + } + + switch (record->code) + { + case SETBID: + if (!record->operand_count) + { + WARN("Missing id operand.\n"); + return false; + } + if (record->operands[0] > UINT_MAX) + WARN("Truncating block id %"PRIu64".\n", record->operands[0]); + block->blockinfo_bid = record->operands[0]; + block->has_bid = true; + break; + case BLOCKNAME: + case SETRECORDNAME: + break; + default: + FIXME("Unhandled BLOCKINFO record type %u.\n", record->code); + break; + } + + return true; +} + +static enum vkd3d_result dxil_block_add_record(struct dxil_block *block, struct dxil_record *record) +{ + unsigned int reserve; + + switch (block->id) + { + /* Rough initial reserve sizes for small shaders. */ + case CONSTANTS_BLOCK: reserve = 32; break; + case FUNCTION_BLOCK: reserve = 128; break; + case METADATA_BLOCK: reserve = 32; break; + case TYPE_BLOCK: reserve = 32; break; + default: reserve = 8; break; + } + reserve = max(reserve, block->record_count + 1); + if (!vkd3d_array_reserve((void **)&block->records, &block->record_capacity, reserve, sizeof(*block->records))) + { + ERR("Failed to allocate %u records.\n", reserve); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if (block->id == BLOCKINFO_BLOCK && !dxil_block_handle_blockinfo_record(block, record)) + return VKD3D_ERROR_INVALID_SHADER; + + block->records[block->record_count++] = record; + + return VKD3D_OK; +} + +static enum vkd3d_result sm6_parser_read_unabbrev_record(struct sm6_parser *sm6) +{ + struct dxil_block *block = sm6->current_block; + enum vkd3d_result ret = VKD3D_OK; + unsigned int code, count, i; + struct dxil_record *record; + + code = sm6_parser_read_vbr(sm6, 6); + + count = sm6_parser_read_vbr(sm6, 6); + if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) + { + ERR("Failed to allocate record with %u operands.\n", count); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + record->code = code; + record->operand_count = count; + + for (i = 0; i < count; ++i) + record->operands[i] = sm6_parser_read_vbr(sm6, 6); + if (sm6->p.failed) + ret = VKD3D_ERROR_INVALID_SHADER; + + if (ret < 0 || (ret = dxil_block_add_record(block, record)) < 0) + vkd3d_free(record); + + return ret; +} + +static bool sm6_parser_read_literal_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) +{ + *op = context; + return !sm6->p.failed; +} + +static bool sm6_parser_read_fixed_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) +{ + *op = sm6_parser_read_bits(sm6, context); + return !sm6->p.failed; +} + +static bool sm6_parser_read_vbr_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) +{ + *op = sm6_parser_read_vbr(sm6, context); + return !sm6->p.failed; +} + +static bool sm6_parser_read_char6_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) +{ + *op = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"[sm6_parser_read_bits(sm6, 6)]; + return !sm6->p.failed; +} + +static bool sm6_parser_read_blob_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) +{ + int count = sm6_parser_read_vbr(sm6, 6); + sm6_parser_align_32(sm6); + for (; count > 0; count -= 4) + sm6_parser_read_uint32(sm6); + FIXME("Unhandled blob operand.\n"); + return false; +} + +static enum vkd3d_result dxil_abbrev_init(struct dxil_abbrev *abbrev, unsigned int count, struct sm6_parser *sm6) +{ + enum bitcode_abbrev_type prev_type, type; + unsigned int i; + + abbrev->is_array = false; + + for (i = 0, prev_type = 0; i < count && !sm6->p.failed; ++i) + { + if (sm6_parser_read_bits(sm6, 1)) + { + if (prev_type == ABBREV_ARRAY) + { + WARN("Unexpected literal abbreviation after array.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 8); + abbrev->operands[i].read_operand = sm6_parser_read_literal_operand; + continue; + } + + switch (type = sm6_parser_read_bits(sm6, 3)) + { + case ABBREV_FIXED: + case ABBREV_VBR: + abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 5); + abbrev->operands[i].read_operand = (type == ABBREV_FIXED) ? sm6_parser_read_fixed_operand + : sm6_parser_read_vbr_operand; + break; + + case ABBREV_ARRAY: + if (prev_type == ABBREV_ARRAY || i != count - 2) + { + WARN("Unexpected array abbreviation.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + abbrev->is_array = true; + --i; + --count; + break; + + case ABBREV_CHAR: + abbrev->operands[i].read_operand = sm6_parser_read_char6_operand; + break; + + case ABBREV_BLOB: + if (prev_type == ABBREV_ARRAY || i != count - 1) + { + WARN("Unexpected blob abbreviation.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + abbrev->operands[i].read_operand = sm6_parser_read_blob_operand; + break; + } + + prev_type = type; + } + + abbrev->count = count; + + return sm6->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; +} + +static enum vkd3d_result sm6_parser_add_global_abbrev(struct sm6_parser *sm6) +{ + struct dxil_block *block = sm6->current_block; + unsigned int count = sm6_parser_read_vbr(sm6, 5); + struct dxil_global_abbrev *global_abbrev; + enum vkd3d_result ret; + + assert(block->id == BLOCKINFO_BLOCK); + + if (!vkd3d_array_reserve((void **)&sm6->abbrevs, &sm6->abbrev_capacity, sm6->abbrev_count + 1, sizeof(*sm6->abbrevs)) + || !(global_abbrev = vkd3d_malloc(sizeof(*global_abbrev) + count * sizeof(global_abbrev->abbrev.operands[0])))) + { + ERR("Failed to allocate global abbreviation.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if ((ret = dxil_abbrev_init(&global_abbrev->abbrev, count, sm6)) < 0) + { + vkd3d_free(global_abbrev); + return ret; + } + + if (!block->has_bid) + { + WARN("Missing blockinfo block id.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + if (block->blockinfo_bid == MODULE_BLOCK) + { + FIXME("Unhandled global abbreviation for module block.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + global_abbrev->block_id = block->blockinfo_bid; + + sm6->abbrevs[sm6->abbrev_count++] = global_abbrev; + + return VKD3D_OK; +} + +static enum vkd3d_result sm6_parser_add_block_abbrev(struct sm6_parser *sm6) +{ + struct dxil_block *block = sm6->current_block; + struct dxil_abbrev *abbrev; + enum vkd3d_result ret; + unsigned int count; + + if (block->id == BLOCKINFO_BLOCK) + return sm6_parser_add_global_abbrev(sm6); + + count = sm6_parser_read_vbr(sm6, 5); + if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, block->abbrev_count + 1, sizeof(*block->abbrevs)) + || !(abbrev = vkd3d_malloc(sizeof(*abbrev) + count * sizeof(abbrev->operands[0])))) + { + ERR("Failed to allocate block abbreviation.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if ((ret = dxil_abbrev_init(abbrev, count, sm6)) < 0) + { + vkd3d_free(abbrev); + return ret; + } + + block->abbrevs[block->abbrev_count++] = abbrev; + + return VKD3D_OK; +} + +static enum vkd3d_result sm6_parser_read_abbrev_record(struct sm6_parser *sm6, unsigned int abbrev_id) +{ + enum vkd3d_result ret = VKD3D_ERROR_INVALID_SHADER; + struct dxil_block *block = sm6->current_block; + struct dxil_record *temp, *record; + unsigned int i, count, array_len; + struct dxil_abbrev *abbrev; + uint64_t code; + + if (abbrev_id >= block->abbrev_count) + { + WARN("Invalid abbreviation id %u.\n", abbrev_id); + return VKD3D_ERROR_INVALID_SHADER; + } + + abbrev = block->abbrevs[abbrev_id]; + if (!(count = abbrev->count)) + return VKD3D_OK; + if (count == 1 && abbrev->is_array) + return VKD3D_ERROR_INVALID_SHADER; + + /* First operand is the record code. The array is included in the count, but will be done separately. */ + count -= abbrev->is_array + 1; + if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) + { + ERR("Failed to allocate record with %u operands.\n", count); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if (!abbrev->operands[0].read_operand(sm6, abbrev->operands[0].context, &code)) + goto fail; + if (code > UINT_MAX) + FIXME("Truncating 64-bit record code %#"PRIx64".\n", code); + record->code = code; + + for (i = 0; i < count; ++i) + if (!abbrev->operands[i + 1].read_operand(sm6, abbrev->operands[i + 1].context, &record->operands[i])) + goto fail; + record->operand_count = count; + + /* An array can occur only as the last operand. */ + if (abbrev->is_array) + { + array_len = sm6_parser_read_vbr(sm6, 6); + if (!(temp = vkd3d_realloc(record, sizeof(*record) + (count + array_len) * sizeof(record->operands[0])))) + { + ERR("Failed to allocate record with %u operands.\n", count + array_len); + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto fail; + } + record = temp; + + for (i = 0; i < array_len; ++i) + { + if (!abbrev->operands[count + 1].read_operand(sm6, abbrev->operands[count + 1].context, + &record->operands[count + i])) + { + goto fail; + } + } + record->operand_count += array_len; + } + + if ((ret = dxil_block_add_record(block, record)) < 0) + goto fail; + + return VKD3D_OK; + +fail: + vkd3d_free(record); + return ret; +} + +static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, + struct sm6_parser *sm6); + +static enum vkd3d_result dxil_block_read(struct dxil_block *parent, struct sm6_parser *sm6) +{ + unsigned int reserve = (parent->id == MODULE_BLOCK) ? 12 : 2; + struct dxil_block *block; + enum vkd3d_result ret; + + sm6->current_block = parent; + + do + { + unsigned int abbrev_id = sm6_parser_read_bits(sm6, parent->abbrev_len); + + switch (abbrev_id) + { + case END_BLOCK: + sm6_parser_align_32(sm6); + return VKD3D_OK; + + case ENTER_SUBBLOCK: + if (parent->id != MODULE_BLOCK && parent->id != FUNCTION_BLOCK) + { + WARN("Invalid subblock parent id %u.\n", parent->id); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (!vkd3d_array_reserve((void **)&parent->child_blocks, &parent->child_block_capacity, + max(reserve, parent->child_block_count + 1), sizeof(*parent->child_blocks)) + || !(block = vkd3d_calloc(1, sizeof(*block)))) + { + ERR("Failed to allocate block.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if ((ret = dxil_block_init(block, parent, sm6)) < 0) + { + vkd3d_free(block); + return ret; + } + + parent->child_blocks[parent->child_block_count++] = block; + sm6->current_block = parent; + break; + + case DEFINE_ABBREV: + if ((ret = sm6_parser_add_block_abbrev(sm6)) < 0) + return ret; + break; + + case UNABBREV_RECORD: + if ((ret = sm6_parser_read_unabbrev_record(sm6)) < 0) + { + WARN("Failed to read unabbreviated record.\n"); + return ret; + } + break; + + default: + if ((ret = sm6_parser_read_abbrev_record(sm6, abbrev_id - 4)) < 0) + { + WARN("Failed to read abbreviated record.\n"); + return ret; + } + break; + } + } while (!sm6->p.failed); + + return VKD3D_ERROR_INVALID_SHADER; +} + +static size_t sm6_parser_compute_global_abbrev_count_for_block_id(struct sm6_parser *sm6, + unsigned int block_id) +{ + size_t i, count; + + for (i = 0, count = 0; i < sm6->abbrev_count; ++i) + count += sm6->abbrevs[i]->block_id == block_id; + + return count; +} + +static void dxil_block_destroy(struct dxil_block *block) +{ + size_t i; + + for (i = 0; i < block->record_count; ++i) + vkd3d_free(block->records[i]); + vkd3d_free(block->records); + + for (i = 0; i < block->child_block_count; ++i) + { + dxil_block_destroy(block->child_blocks[i]); + vkd3d_free(block->child_blocks[i]); + } + vkd3d_free(block->child_blocks); + + block->records = NULL; + block->record_count = 0; + block->child_blocks = NULL; + block->child_block_count = 0; +} + +static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, + struct sm6_parser *sm6) +{ + size_t i, abbrev_count = 0; + enum vkd3d_result ret; + + block->parent = parent; + block->level = parent ? parent->level + 1 : 0; + block->id = sm6_parser_read_vbr(sm6, 8); + block->abbrev_len = sm6_parser_read_vbr(sm6, 4); + sm6_parser_align_32(sm6); + block->length = sm6_parser_read_uint32(sm6); + block->start = sm6->ptr - sm6->start; + + if (sm6->p.failed) + return VKD3D_ERROR_INVALID_SHADER; + + if ((block->abbrev_count = sm6_parser_compute_global_abbrev_count_for_block_id(sm6, block->id))) + { + if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, + block->abbrev_count, sizeof(*block->abbrevs))) + { + ERR("Failed to allocate block abbreviations.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < sm6->abbrev_count; ++i) + if (sm6->abbrevs[i]->block_id == block->id) + block->abbrevs[abbrev_count++] = &sm6->abbrevs[i]->abbrev; + + assert(abbrev_count == block->abbrev_count); + } + + if ((ret = dxil_block_read(block, sm6)) < 0) + dxil_block_destroy(block); + + for (i = abbrev_count; i < block->abbrev_count; ++i) + vkd3d_free(block->abbrevs[i]); + vkd3d_free(block->abbrevs); + block->abbrevs = NULL; + block->abbrev_count = 0; + + return ret; +} + +static size_t dxil_block_compute_function_count(const struct dxil_block *root) +{ + size_t i, count; + + for (i = 0, count = 0; i < root->child_block_count; ++i) + count += root->child_blocks[i]->id == FUNCTION_BLOCK; + + return count; +} + +static size_t dxil_block_compute_module_decl_count(const struct dxil_block *block) +{ + size_t i, count; + + for (i = 0, count = 0; i < block->record_count; ++i) + count += block->records[i]->code == MODULE_CODE_FUNCTION; + return count; +} + +static size_t dxil_block_compute_constants_count(const struct dxil_block *block) +{ + size_t i, count; + + for (i = 0, count = 0; i < block->record_count; ++i) + count += block->records[i]->code != CST_CODE_SETTYPE; + return count; +} + +static void dxil_global_abbrevs_cleanup(struct dxil_global_abbrev **abbrevs, size_t count) +{ + size_t i; + + for (i = 0; i < count; ++i) + vkd3d_free(abbrevs[i]); + vkd3d_free(abbrevs); +} + +static const struct dxil_block *sm6_parser_get_level_one_block(const struct sm6_parser *sm6, + enum bitcode_block_id id, bool *is_unique) +{ + const struct dxil_block *block, *found = NULL; + size_t i; + + for (i = 0, *is_unique = true; i < sm6->root_block.child_block_count; ++i) + { + block = sm6->root_block.child_blocks[i]; + if (block->id != id) + continue; + + if (!found) + found = block; + else + *is_unique = false; + } + + return found; +} + +static char *dxil_record_to_string(const struct dxil_record *record, unsigned int offset) +{ + unsigned int i; + char *str; + + assert(offset <= record->operand_count); + if (!(str = vkd3d_calloc(record->operand_count - offset + 1, 1))) + return NULL; + + for (i = offset; i < record->operand_count; ++i) + str[i - offset] = record->operands[i]; + + return str; +} + +static bool dxil_record_validate_operand_min_count(const struct dxil_record *record, unsigned int min_count, + struct sm6_parser *sm6) +{ + if (record->operand_count >= min_count) + return true; + + WARN("Invalid operand count %u for code %u.\n", record->operand_count, record->code); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, + "Invalid operand count %u for record code %u.", record->operand_count, record->code); + return false; +} + +static void dxil_record_validate_operand_max_count(const struct dxil_record *record, unsigned int max_count, + struct sm6_parser *sm6) +{ + if (record->operand_count <= max_count) + return; + + WARN("Ignoring %u extra operands for code %u.\n", record->operand_count - max_count, record->code); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, + "Ignoring %u extra operands for record code %u.", record->operand_count - max_count, record->code); +} + +static bool dxil_record_validate_operand_count(const struct dxil_record *record, unsigned int min_count, + unsigned int max_count, struct sm6_parser *sm6) +{ + dxil_record_validate_operand_max_count(record, max_count, sm6); + return dxil_record_validate_operand_min_count(record, min_count, sm6); +} + +static enum vkd3d_result sm6_parser_type_table_init(struct sm6_parser *sm6) +{ + const struct dxil_record *record; + size_t i, type_count, type_index; + const struct dxil_block *block; + char *struct_name = NULL; + unsigned int j, count; + struct sm6_type *type; + uint64_t type_id; + bool is_unique; + + sm6->p.location.line = 0; + sm6->p.location.column = 0; + + if (!(block = sm6_parser_get_level_one_block(sm6, TYPE_BLOCK, &is_unique))) + { + WARN("No type definitions found.\n"); + return VKD3D_OK; + } + if (!is_unique) + WARN("Ignoring invalid extra type table(s).\n"); + + sm6->p.location.line = block->id; + + type_count = 0; + for (i = 0; i < block->record_count; ++i) + type_count += block->records[i]->code != TYPE_CODE_NUMENTRY && block->records[i]->code != TYPE_CODE_STRUCT_NAME; + + /* The type array must not be relocated. */ + if (!(sm6->types = vkd3d_calloc(type_count, sizeof(*sm6->types)))) + { + ERR("Failed to allocate type array.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < block->record_count; ++i) + { + sm6->p.location.column = i; + record = block->records[i]; + + type = &sm6->types[sm6->type_count]; + type_index = sm6->type_count; + + switch (record->code) + { + case TYPE_CODE_ARRAY: + case TYPE_CODE_VECTOR: + if (!dxil_record_validate_operand_count(record, 2, 2, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + + type->class = record->code == TYPE_CODE_ARRAY ? TYPE_CLASS_ARRAY : TYPE_CLASS_VECTOR; + + if (!(type->u.array.count = record->operands[0])) + { + TRACE("Setting unbounded for type %zu.\n", type_index); + type->u.array.count = UINT_MAX; + } + + if ((type_id = record->operands[1]) >= type_count) + { + WARN("Invalid contained type id %"PRIu64" for type %zu.\n", type_id, type_index); + return VKD3D_ERROR_INVALID_SHADER; + } + type->u.array.elem_type = &sm6->types[type_id]; + break; + + case TYPE_CODE_DOUBLE: + dxil_record_validate_operand_max_count(record, 0, sm6); + type->class = TYPE_CLASS_FLOAT; + type->u.width = 64; + break; + + case TYPE_CODE_FLOAT: + dxil_record_validate_operand_max_count(record, 0, sm6); + type->class = TYPE_CLASS_FLOAT; + type->u.width = 32; + break; + + case TYPE_CODE_FUNCTION: + if (!dxil_record_validate_operand_min_count(record, 2, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + if (record->operands[0]) + FIXME("Unhandled vararg function type %zu.\n", type_index); + + type->class = TYPE_CLASS_FUNCTION; + + if ((type_id = record->operands[1]) >= type_count) + { + WARN("Invalid return type id %"PRIu64" for type %zu.\n", type_id, type_index); + return VKD3D_ERROR_INVALID_SHADER; + } + + count = record->operand_count - 2; + if (vkd3d_object_range_overflow(sizeof(type->u.function), count, sizeof(type->u.function->param_types[0])) + || !(type->u.function = vkd3d_malloc(offsetof(struct sm6_function_info, param_types[count])))) + { + ERR("Failed to allocate function parameter types.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + type->u.function->ret_type = &sm6->types[type_id]; + type->u.function->param_count = count; + for (j = 0; j < count; ++j) + { + if ((type_id = record->operands[j + 2]) >= type_count) + { + WARN("Invalid parameter type id %"PRIu64" for type %zu.\n", type_id, type_index); + vkd3d_free(type->u.function); + return VKD3D_ERROR_INVALID_SHADER; + } + type->u.function->param_types[j] = &sm6->types[type_id]; + } + break; + + case TYPE_CODE_HALF: + dxil_record_validate_operand_max_count(record, 0, sm6); + type->class = TYPE_CLASS_FLOAT; + type->u.width = 16; + break; + + case TYPE_CODE_INTEGER: + { + uint64_t width; + + if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + + type->class = TYPE_CLASS_INTEGER; + + switch ((width = record->operands[0])) + { + case 1: + case 8: + case 16: + case 32: + case 64: + break; + default: + WARN("Invalid integer width %"PRIu64" for type %zu.\n", width, type_index); + return VKD3D_ERROR_INVALID_SHADER; + } + type->u.width = width; + break; + } + + case TYPE_CODE_LABEL: + type->class = TYPE_CLASS_LABEL; + break; + + case TYPE_CODE_METADATA: + type->class = TYPE_CLASS_METADATA; + break; + + case TYPE_CODE_NUMENTRY: + continue; + + case TYPE_CODE_POINTER: + if (!dxil_record_validate_operand_count(record, 1, 2, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + + type->class = TYPE_CLASS_POINTER; + + if ((type_id = record->operands[0]) >= type_count) + { + WARN("Invalid pointee type id %"PRIu64" for type %zu.\n", type_id, type_index); + return VKD3D_ERROR_INVALID_SHADER; + } + type->u.pointer.type = &sm6->types[type_id]; + type->u.pointer.addr_space = (record->operand_count > 1) ? record->operands[1] : ADDRESS_SPACE_DEFAULT; + break; + + case TYPE_CODE_STRUCT_ANON: + case TYPE_CODE_STRUCT_NAMED: + if (!dxil_record_validate_operand_min_count(record, 2, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + if (record->code == TYPE_CODE_STRUCT_NAMED && !struct_name) + { + WARN("Missing struct name before struct type %zu.\n", type_index); + return VKD3D_ERROR_INVALID_SHADER; + } + + type->class = TYPE_CLASS_STRUCT; + + count = record->operand_count - 1; + if (vkd3d_object_range_overflow(sizeof(type->u.struc), count, sizeof(type->u.struc->elem_types[0])) + || !(type->u.struc = vkd3d_malloc(offsetof(struct sm6_struct_info, elem_types[count])))) + { + ERR("Failed to allocate struct element types.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if (record->operands[0]) + FIXME("Ignoring struct packed attribute.\n"); + + type->u.struc->elem_count = count; + for (j = 0; j < count; ++j) + { + if ((type_id = record->operands[j + 1]) >= type_count) + { + WARN("Invalid contained type id %"PRIu64" for type %zu.\n", type_id, type_index); + vkd3d_free(type->u.struc); + return VKD3D_ERROR_INVALID_SHADER; + } + type->u.struc->elem_types[j] = &sm6->types[type_id]; + } + + if (record->code == TYPE_CODE_STRUCT_ANON) + { + type->u.struc->name = NULL; + break; + } + + type->u.struc->name = struct_name; + struct_name = NULL; + break; + + case TYPE_CODE_STRUCT_NAME: + if (!(struct_name = dxil_record_to_string(record, 0))) + { + ERR("Failed to allocate struct name.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + if (!struct_name[0]) + WARN("Struct name is empty for type %zu.\n", type_index); + continue; + + case TYPE_CODE_VOID: + dxil_record_validate_operand_max_count(record, 0, sm6); + type->class = TYPE_CLASS_VOID; + break; + + default: + FIXME("Unhandled type %u at index %zu.\n", record->code, type_index); + return VKD3D_ERROR_INVALID_SHADER; + } + ++sm6->type_count; + } + + assert(sm6->type_count == type_count); + + if (struct_name) + { + WARN("Unused struct name %s.\n", struct_name); + vkd3d_free(struct_name); + } + + return VKD3D_OK; +} + +static inline bool sm6_type_is_void(const struct sm6_type *type) +{ + return type->class == TYPE_CLASS_VOID; +} + +static inline bool sm6_type_is_integer(const struct sm6_type *type) +{ + return type->class == TYPE_CLASS_INTEGER; +} + +static inline bool sm6_type_is_floating_point(const struct sm6_type *type) +{ + return type->class == TYPE_CLASS_FLOAT; +} + +static inline bool sm6_type_is_numeric(const struct sm6_type *type) +{ + return type->class == TYPE_CLASS_INTEGER || type->class == TYPE_CLASS_FLOAT; +} + +static inline bool sm6_type_is_pointer(const struct sm6_type *type) +{ + return type->class == TYPE_CLASS_POINTER; +} + +static bool sm6_type_is_numeric_aggregate(const struct sm6_type *type) +{ + unsigned int i; + + switch (type->class) + { + case TYPE_CLASS_ARRAY: + case TYPE_CLASS_VECTOR: + return sm6_type_is_numeric(type->u.array.elem_type); + + case TYPE_CLASS_STRUCT: + /* Do not handle nested structs. Support can be added if they show up. */ + for (i = 0; i < type->u.struc->elem_count; ++i) + if (!sm6_type_is_numeric(type->u.struc->elem_types[i])) + return false; + return true; + + default: + return false; + } +} + +static inline bool sm6_type_is_struct(const struct sm6_type *type) +{ + return type->class == TYPE_CLASS_STRUCT; +} + +static inline bool sm6_type_is_function(const struct sm6_type *type) +{ + return type->class == TYPE_CLASS_FUNCTION; +} + +static inline bool sm6_type_is_function_pointer(const struct sm6_type *type) +{ + return sm6_type_is_pointer(type) && sm6_type_is_function(type->u.pointer.type); +} + +static inline bool sm6_type_is_handle(const struct sm6_type *type) +{ + return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.Handle"); +} + +static inline const struct sm6_type *sm6_type_get_element_type(const struct sm6_type *type) +{ + return (type->class == TYPE_CLASS_ARRAY || type->class == TYPE_CLASS_VECTOR) ? type->u.array.elem_type : type; +} + +static const struct sm6_type *sm6_type_get_pointer_to_type(const struct sm6_type *type, + enum bitcode_address_space addr_space, struct sm6_parser *sm6) +{ + size_t i, start = type - sm6->types; + const struct sm6_type *pointer_type; + + /* DXC seems usually to place the pointer type immediately after its pointee. */ + for (i = (start + 1) % sm6->type_count; i != start; i = (i + 1) % sm6->type_count) + { + pointer_type = &sm6->types[i]; + if (sm6_type_is_pointer(pointer_type) && pointer_type->u.pointer.type == type + && pointer_type->u.pointer.addr_space == addr_space) + return pointer_type; + } + + return NULL; +} + +static const struct sm6_type *sm6_parser_get_type(struct sm6_parser *sm6, uint64_t type_id) +{ + if (type_id >= sm6->type_count) + { + WARN("Invalid type index %"PRIu64" at %zu.\n", type_id, sm6->value_count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID, + "DXIL type id %"PRIu64" is invalid.", type_id); + return NULL; + } + return &sm6->types[type_id]; +} + +static int global_symbol_compare(const void *a, const void *b) +{ + return vkd3d_u32_compare(((const struct sm6_symbol *)a)->id, ((const struct sm6_symbol *)b)->id); +} + +static enum vkd3d_result sm6_parser_symtab_init(struct sm6_parser *sm6) +{ + const struct dxil_record *record; + const struct dxil_block *block; + struct sm6_symbol *symbol; + size_t i, count; + bool is_unique; + + sm6->p.location.line = 0; + sm6->p.location.column = 0; + + if (!(block = sm6_parser_get_level_one_block(sm6, VALUE_SYMTAB_BLOCK, &is_unique))) + { + /* There should always be at least one symbol: the name of the entry point function. */ + WARN("No value symtab block found.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + if (!is_unique) + FIXME("Ignoring extra value symtab block(s).\n"); + + sm6->p.location.line = block->id; + + for (i = 0, count = 0; i < block->record_count; ++i) + count += block->records[i]->code == VST_CODE_ENTRY; + + if (!(sm6->global_symbols = vkd3d_calloc(count, sizeof(*sm6->global_symbols)))) + { + ERR("Failed to allocate global symbols.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < block->record_count; ++i) + { + sm6->p.location.column = i; + record = block->records[i]; + + if (record->code != VST_CODE_ENTRY) + { + FIXME("Unhandled symtab code %u.\n", record->code); + continue; + } + if (!dxil_record_validate_operand_min_count(record, 1, sm6)) + continue; + + symbol = &sm6->global_symbols[sm6->global_symbol_count]; + symbol->id = record->operands[0]; + if (!(symbol->name = dxil_record_to_string(record, 1))) + { + ERR("Failed to allocate symbol name.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ++sm6->global_symbol_count; + } + + sm6->p.location.column = block->record_count; + + qsort(sm6->global_symbols, sm6->global_symbol_count, sizeof(*sm6->global_symbols), global_symbol_compare); + for (i = 1; i < sm6->global_symbol_count; ++i) + { + if (sm6->global_symbols[i].id == sm6->global_symbols[i - 1].id) + { + WARN("Invalid duplicate symbol id %u.\n", sm6->global_symbols[i].id); + return VKD3D_ERROR_INVALID_SHADER; + } + } + + return VKD3D_OK; +} + +static const char *sm6_parser_get_global_symbol_name(const struct sm6_parser *sm6, size_t id) +{ + size_t i, start; + + /* id == array index is normally true */ + i = start = id % sm6->global_symbol_count; + do + { + if (sm6->global_symbols[i].id == id) + return sm6->global_symbols[i].name; + i = (i + 1) % sm6->global_symbol_count; + } while (i != start); + + return NULL; +} + +static inline bool sm6_value_is_dx_intrinsic_dcl(const struct sm6_value *fn) +{ + assert(fn->value_type == VALUE_TYPE_FUNCTION); + return fn->u.function.is_prototype && !strncmp(fn->u.function.name, "dx.op.", 6); +} + +static inline struct sm6_value *sm6_parser_get_current_value(const struct sm6_parser *sm6) +{ + assert(sm6->value_count < sm6->value_capacity); + return &sm6->values[sm6->value_count]; +} + +static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type *type) +{ + if (type->class == TYPE_CLASS_INTEGER) + { + switch (type->u.width) + { + case 8: + return VKD3D_DATA_UINT8; + case 32: + return VKD3D_DATA_UINT; + default: + FIXME("Unhandled width %u.\n", type->u.width); + return VKD3D_DATA_UINT; + } + } + else if (type->class == TYPE_CLASS_FLOAT) + { + switch (type->u.width) + { + case 32: + return VKD3D_DATA_FLOAT; + case 64: + return VKD3D_DATA_DOUBLE; + default: + FIXME("Unhandled width %u.\n", type->u.width); + return VKD3D_DATA_FLOAT; + } + } + + FIXME("Unhandled type %u.\n", type->class); + return VKD3D_DATA_UINT; +} + +/* Recurse through the block tree while maintaining a current value count. The current + * count is the sum of the global count plus all declarations within the current function. + * Store into value_capacity the highest count seen. */ +static size_t sm6_parser_compute_max_value_count(struct sm6_parser *sm6, + const struct dxil_block *block, size_t value_count) +{ + size_t i, old_value_count = value_count; + + if (block->id == MODULE_BLOCK) + value_count = size_add_with_overflow_check(value_count, dxil_block_compute_module_decl_count(block)); + + for (i = 0; i < block->child_block_count; ++i) + value_count = sm6_parser_compute_max_value_count(sm6, block->child_blocks[i], value_count); + + switch (block->id) + { + case CONSTANTS_BLOCK: + /* Function local constants are contained in a child block of the function block. */ + value_count = size_add_with_overflow_check(value_count, dxil_block_compute_constants_count(block)); + break; + case FUNCTION_BLOCK: + /* A function must start with a block count, which emits no value. This formula is likely to + * overestimate the value count somewhat, but this should be no problem. */ + value_count = size_add_with_overflow_check(value_count, max(block->record_count, 1u) - 1); + sm6->value_capacity = max(sm6->value_capacity, value_count); + /* The value count returns to its previous value after handling a function. */ + if (value_count < SIZE_MAX) + value_count = old_value_count; + break; + default: + break; + } + + return value_count; +} + +static bool sm6_parser_declare_function(struct sm6_parser *sm6, const struct dxil_record *record) +{ + const unsigned int max_count = 15; + const struct sm6_type *ret_type; + struct sm6_value *fn; + unsigned int i, j; + + if (!dxil_record_validate_operand_count(record, 8, max_count, sm6)) + return false; + + fn = sm6_parser_get_current_value(sm6); + fn->value_type = VALUE_TYPE_FUNCTION; + if (!(fn->u.function.name = sm6_parser_get_global_symbol_name(sm6, sm6->value_count))) + { + WARN("Missing symbol name for function %zu.\n", sm6->value_count); + fn->u.function.name = ""; + } + + if (!(fn->type = sm6_parser_get_type(sm6, record->operands[0]))) + return false; + if (!sm6_type_is_function(fn->type)) + { + WARN("Type is not a function.\n"); + return false; + } + ret_type = fn->type->u.function->ret_type; + + if (!(fn->type = sm6_type_get_pointer_to_type(fn->type, ADDRESS_SPACE_DEFAULT, sm6))) + { + WARN("Failed to get pointer type for type %u.\n", fn->type->class); + return false; + } + + if (record->operands[1]) + WARN("Ignoring calling convention %#"PRIx64".\n", record->operands[1]); + + fn->u.function.is_prototype = !!record->operands[2]; + + if (record->operands[3]) + WARN("Ignoring linkage %#"PRIx64".\n", record->operands[3]); + + if (record->operands[4] > UINT_MAX) + WARN("Invalid attributes id %#"PRIx64".\n", record->operands[4]); + /* 1-based index. */ + if ((fn->u.function.attribs_id = record->operands[4])) + TRACE("Ignoring function attributes.\n"); + + /* These always seem to be zero. */ + for (i = 5, j = 0; i < min(record->operand_count, max_count); ++i) + j += !!record->operands[i]; + if (j) + WARN("Ignoring %u operands.\n", j); + + if (sm6_value_is_dx_intrinsic_dcl(fn) && !sm6_type_is_void(ret_type) && !sm6_type_is_numeric(ret_type) + && !sm6_type_is_numeric_aggregate(ret_type) && !sm6_type_is_handle(ret_type)) + { + WARN("Unexpected return type for dx intrinsic function '%s'.\n", fn->u.function.name); + } + + ++sm6->value_count; + + return true; +} + +static inline uint64_t decode_rotated_signed_value(uint64_t value) +{ + if (value != 1) + { + bool neg = value & 1; + value >>= 1; + return neg ? -value : value; + } + return value << 63; +} + +static inline float bitcast_uint64_to_float(uint64_t value) +{ + union + { + uint32_t uint32_value; + float float_value; + } u; + + u.uint32_value = value; + return u.float_value; +} + +static inline double bitcast_uint64_to_double(uint64_t value) +{ + union + { + uint64_t uint64_value; + double double_value; + } u; + + u.uint64_value = value; + return u.double_value; +} + +static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) +{ + enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; + const struct sm6_type *type, *elem_type; + enum vkd3d_data_type reg_data_type; + const struct dxil_record *record; + struct sm6_value *dst; + size_t i, value_idx; + uint64_t value; + + for (i = 0, type = NULL; i < block->record_count; ++i) + { + sm6->p.location.column = i; + record = block->records[i]; + value_idx = sm6->value_count; + + if (record->code == CST_CODE_SETTYPE) + { + if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + + if (!(type = sm6_parser_get_type(sm6, record->operands[0]))) + return VKD3D_ERROR_INVALID_SHADER; + + elem_type = sm6_type_get_element_type(type); + if (sm6_type_is_numeric(elem_type)) + { + reg_data_type = vkd3d_data_type_from_sm6_type(elem_type); + reg_type = elem_type->u.width > 32 ? VKD3DSPR_IMMCONST64 : VKD3DSPR_IMMCONST; + } + else + { + reg_data_type = VKD3D_DATA_UNUSED; + reg_type = VKD3DSPR_INVALID; + } + + if (i == block->record_count - 1) + WARN("Unused SETTYPE record.\n"); + + continue; + } + + if (!type) + { + WARN("Constant record %zu has no type.\n", value_idx); + return VKD3D_ERROR_INVALID_SHADER; + } + + dst = sm6_parser_get_current_value(sm6); + dst->type = type; + dst->value_type = VALUE_TYPE_REG; + dst->u.reg.type = reg_type; + dst->u.reg.immconst_type = VKD3D_IMMCONST_SCALAR; + dst->u.reg.data_type = reg_data_type; + + switch (record->code) + { + case CST_CODE_NULL: + /* Register constant data is already zero-filled. */ + break; + + case CST_CODE_INTEGER: + if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + + if (!sm6_type_is_integer(type)) + { + WARN("Invalid integer of non-integer type %u at constant idx %zu.\n", type->class, value_idx); + return VKD3D_ERROR_INVALID_SHADER; + } + + value = decode_rotated_signed_value(record->operands[0]); + if (type->u.width <= 32) + dst->u.reg.u.immconst_uint[0] = value & ((1ull << type->u.width) - 1); + else + dst->u.reg.u.immconst_uint64[0] = value; + + break; + + case CST_CODE_FLOAT: + if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + + if (!sm6_type_is_floating_point(type)) + { + WARN("Invalid float of non-fp type %u at constant idx %zu.\n", type->class, value_idx); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (type->u.width == 16) + FIXME("Half float type is not supported yet.\n"); + else if (type->u.width == 32) + dst->u.reg.u.immconst_float[0] = bitcast_uint64_to_float(record->operands[0]); + else if (type->u.width == 64) + dst->u.reg.u.immconst_double[0] = bitcast_uint64_to_double(record->operands[0]); + else + vkd3d_unreachable(); + + break; + + case CST_CODE_DATA: + WARN("Unhandled constant array.\n"); + break; + + case CST_CODE_UNDEF: + dxil_record_validate_operand_max_count(record, 0, sm6); + dst->u.reg.type = VKD3DSPR_UNDEF; + /* Mark as explicitly undefined, not the result of a missing constant code or instruction. */ + dst->is_undefined = true; + break; + + default: + FIXME("Unhandled constant code %u.\n", record->code); + dst->u.reg.type = VKD3DSPR_UNDEF; + break; + } + + ++sm6->value_count; + } + + return VKD3D_OK; +} + +static struct vkd3d_shader_instruction *sm6_parser_require_space(struct sm6_parser *sm6, size_t extra) +{ + if (!shader_instruction_array_reserve(&sm6->p.instructions, sm6->p.instructions.count + extra)) + { + ERR("Failed to allocate instruction.\n"); + return NULL; + } + return &sm6->p.instructions.elements[sm6->p.instructions.count]; +} + +/* Space should be reserved before calling this. It is intended to require no checking of the returned pointer. */ +static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_parser *sm6, + enum vkd3d_shader_opcode handler_idx) +{ + struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); + assert(ins); + shader_instruction_init(ins, handler_idx); + ++sm6->p.instructions.count; + return ins; +} + +static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) +{ + const struct dxil_block *block = &sm6->root_block; + const struct dxil_record *record; + uint64_t version; + size_t i; + + sm6->p.location.line = block->id; + sm6->p.location.column = 0; + + for (i = 0; i < block->record_count; ++i) + { + sm6->p.location.column = i; + record = block->records[i]; + switch (record->code) + { + case MODULE_CODE_FUNCTION: + if (!sm6_parser_declare_function(sm6, record)) + { + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL, + "A DXIL function declaration is invalid."); + return VKD3D_ERROR_INVALID_SHADER; + } + break; + + case MODULE_CODE_GLOBALVAR: + FIXME("Global variables are not implemented yet.\n"); + break; + + case MODULE_CODE_VERSION: + if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + if ((version = record->operands[0]) != 1) + { + FIXME("Unsupported format version %#"PRIx64".\n", version); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT, + "Bitcode format version %#"PRIx64" is unsupported.", version); + return VKD3D_ERROR_INVALID_SHADER; + } + break; + + default: + break; + } + } + + return VKD3D_OK; +} + +static const struct sm6_value *sm6_parser_next_function_definition(struct sm6_parser *sm6) +{ + size_t i, count = sm6->function_count; + + for (i = 0; i < sm6->value_count; ++i) + { + if (sm6_type_is_function_pointer(sm6->values[i].type) && !sm6->values[i].u.function.is_prototype && !count--) + break; + } + if (i == sm6->value_count) + return NULL; + + ++sm6->function_count; + return &sm6->values[i]; +} + +static struct sm6_block *sm6_block_create() +{ + struct sm6_block *block = vkd3d_calloc(1, sizeof(*block)); + return block; +} + +static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record *record, + struct sm6_block *code_block, struct vkd3d_shader_instruction *ins) +{ + if (!dxil_record_validate_operand_count(record, 0, 1, sm6)) + return; + + if (record->operand_count) + FIXME("Non-void return is not implemented.\n"); + + ins->handler_idx = VKD3DSIH_NOP; +} + +static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const struct dxil_block *block, + struct sm6_function *function) +{ + struct vkd3d_shader_instruction *ins; + const struct dxil_record *record; + struct sm6_block *code_block; + struct sm6_value *dst; + size_t i, block_idx; + bool ret_found; + enum + { + RESULT_VALUE, + RESULT_TERMINATE, + } result_type; + + if (sm6->function_count) + { + FIXME("Multiple functions are not supported yet.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + if (!(function->declaration = sm6_parser_next_function_definition(sm6))) + { + WARN("Failed to find definition to match function body.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (block->record_count < 2) + { + /* It should contain at least a block count and a RET instruction. */ + WARN("Invalid function block record count %zu.\n", block->record_count); + return VKD3D_ERROR_INVALID_SHADER; + } + if (block->records[0]->code != FUNC_CODE_DECLAREBLOCKS || !block->records[0]->operand_count + || block->records[0]->operands[0] > UINT_MAX) + { + WARN("Block count declaration not found or invalid.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (!(function->block_count = block->records[0]->operands[0])) + { + WARN("Function contains no blocks.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + if (function->block_count > 1) + { + FIXME("Branched shaders are not supported yet.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (!(function->blocks[0] = sm6_block_create())) + { + ERR("Failed to allocate code block.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + code_block = function->blocks[0]; + + for (i = 1, block_idx = 0, ret_found = false; i < block->record_count; ++i) + { + sm6->p.location.column = i; + + /* block->record_count - 1 is the instruction count, but some instructions + * can emit >1 IR instruction, so extra may be used. */ + if (!vkd3d_array_reserve((void **)&code_block->instructions, &code_block->instruction_capacity, + max(code_block->instruction_count + 1, block->record_count), sizeof(*code_block->instructions))) + { + ERR("Failed to allocate instructions.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + ins = &code_block->instructions[code_block->instruction_count]; + ins->handler_idx = VKD3DSIH_INVALID; + + dst = sm6_parser_get_current_value(sm6); + dst->type = NULL; + dst->value_type = VALUE_TYPE_REG; + result_type = RESULT_VALUE; + + record = block->records[i]; + switch (record->code) + { + case FUNC_CODE_INST_RET: + sm6_parser_emit_ret(sm6, record, code_block, ins); + result_type = RESULT_TERMINATE; + ret_found = true; + break; + default: + FIXME("Unhandled dxil instruction %u.\n", record->code); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (result_type == RESULT_TERMINATE) + { + ++block_idx; + code_block = (block_idx < function->block_count) ? function->blocks[block_idx] : NULL; + } + if (code_block) + code_block->instruction_count += ins->handler_idx != VKD3DSIH_NOP; + else + assert(ins->handler_idx == VKD3DSIH_NOP); + sm6->value_count += !!dst->type; + } + + if (!ret_found) + { + WARN("Function contains no RET instruction.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + + return VKD3D_OK; +} + +static bool sm6_block_emit_instructions(struct sm6_block *block, struct sm6_parser *sm6) +{ + struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, block->instruction_count + 1); + + if (!ins) + return false; + + memcpy(ins, block->instructions, block->instruction_count * sizeof(*block->instructions)); + sm6->p.instructions.count += block->instruction_count; + + sm6_parser_add_instruction(sm6, VKD3DSIH_RET); + + return true; +} + +static enum vkd3d_result sm6_parser_module_init(struct sm6_parser *sm6, const struct dxil_block *block, + unsigned int level) +{ + size_t i, old_value_count = sm6->value_count; + struct sm6_function *function; + enum vkd3d_result ret; + + for (i = 0; i < block->child_block_count; ++i) + { + if ((ret = sm6_parser_module_init(sm6, block->child_blocks[i], level + 1)) < 0) + return ret; + } + + sm6->p.location.line = block->id; + sm6->p.location.column = 0; + + switch (block->id) + { + case CONSTANTS_BLOCK: + return sm6_parser_constants_init(sm6, block); + + case FUNCTION_BLOCK: + function = &sm6->functions[sm6->function_count]; + if ((ret = sm6_parser_function_init(sm6, block, function)) < 0) + return ret; + /* The value index returns to its previous value after handling a function. It's usually nonzero + * at the start because of global constants/variables/function declarations. Function constants + * occur in a child block, so value_count is already saved before they are emitted. */ + memset(&sm6->values[old_value_count], 0, (sm6->value_count - old_value_count) * sizeof(*sm6->values)); + sm6->value_count = old_value_count; + break; + + case BLOCKINFO_BLOCK: + case MODULE_BLOCK: + case PARAMATTR_BLOCK: + case PARAMATTR_GROUP_BLOCK: + case VALUE_SYMTAB_BLOCK: + case METADATA_BLOCK: + case METADATA_ATTACHMENT_BLOCK: + case TYPE_BLOCK: + break; + + default: + FIXME("Unhandled block id %u.\n", block->id); + break; + } + + return VKD3D_OK; +} + +static void sm6_type_table_cleanup(struct sm6_type *types, size_t count) +{ + size_t i; + + if (!types) + return; + + for (i = 0; i < count; ++i) + { + switch (types[i].class) + { + case TYPE_CLASS_STRUCT: + vkd3d_free((void *)types[i].u.struc->name); + vkd3d_free(types[i].u.struc); + break; + case TYPE_CLASS_FUNCTION: + vkd3d_free(types[i].u.function); + break; + default: + break; + } + } + + vkd3d_free(types); +} + +static void sm6_symtab_cleanup(struct sm6_symbol *symbols, size_t count) +{ + size_t i; + + for (i = 0; i < count; ++i) + vkd3d_free((void *)symbols[i].name); + vkd3d_free(symbols); +} + +static void sm6_block_destroy(struct sm6_block *block) +{ + vkd3d_free(block->instructions); + vkd3d_free(block); +} + +static void sm6_functions_cleanup(struct sm6_function *functions, size_t count) +{ + size_t i, j; + + for (i = 0; i < count; ++i) + { + for (j = 0; j < functions[i].block_count; ++j) + sm6_block_destroy(functions[i].blocks[j]); + } + vkd3d_free(functions); +} + +static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) +{ + struct sm6_parser *sm6 = sm6_parser(parser); + + dxil_block_destroy(&sm6->root_block); + dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); + shader_instruction_array_destroy(&parser->instructions); + sm6_type_table_cleanup(sm6->types, sm6->type_count); + sm6_symtab_cleanup(sm6->global_symbols, sm6->global_symbol_count); + sm6_functions_cleanup(sm6->functions, sm6->function_count); + vkd3d_free(sm6->values); + free_shader_desc(&parser->shader_desc); + vkd3d_free(sm6); +} + +static const struct vkd3d_shader_parser_ops sm6_parser_ops = +{ + .parser_destroy = sm6_parser_destroy, +}; + +static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, + const char *source_name, struct vkd3d_shader_message_context *message_context) +{ + const struct vkd3d_shader_location location = {.source_name = source_name}; + uint32_t version_token, dxil_version, token_count, magic; + unsigned int chunk_offset, chunk_size; + size_t count, length, function_count; + enum bitcode_block_abbreviation abbr; + struct vkd3d_shader_version version; + struct dxil_block *block; + enum vkd3d_result ret; + unsigned int i; + + count = byte_code_size / sizeof(*byte_code); + if (count < 6) + { + WARN("Invalid data size %zu.\n", byte_code_size); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE, + "DXIL chunk size %zu is smaller than the DXIL header size.", byte_code_size); + return VKD3D_ERROR_INVALID_SHADER; + } + + version_token = byte_code[0]; + TRACE("Compiler version: 0x%08x.\n", version_token); + token_count = byte_code[1]; + TRACE("Token count: %u.\n", token_count); + + if (token_count < 6 || count < token_count) + { + WARN("Invalid token count %u (word count %zu).\n", token_count, count); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, + "DXIL chunk token count %#x is invalid (word count %zu).", token_count, count); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (byte_code[2] != TAG_DXIL) + WARN("Unknown magic number 0x%08x.\n", byte_code[2]); + + dxil_version = byte_code[3]; + if (dxil_version > 0x102) + WARN("Unknown DXIL version: 0x%08x.\n", dxil_version); + else + TRACE("DXIL version: 0x%08x.\n", dxil_version); + + chunk_offset = byte_code[4]; + if (chunk_offset < 16 || chunk_offset >= byte_code_size) + { + WARN("Invalid bitcode chunk offset %#x (data size %zu).\n", chunk_offset, byte_code_size); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET, + "DXIL bitcode chunk has invalid offset %#x (data size %#zx).", chunk_offset, byte_code_size); + return VKD3D_ERROR_INVALID_SHADER; + } + chunk_size = byte_code[5]; + if (chunk_size > byte_code_size - chunk_offset) + { + WARN("Invalid bitcode chunk size %#x (data size %zu, chunk offset %#x).\n", + chunk_size, byte_code_size, chunk_offset); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, + "DXIL bitcode chunk has invalid size %#x (data size %#zx, chunk offset %#x).", + chunk_size, byte_code_size, chunk_offset); + return VKD3D_ERROR_INVALID_SHADER; + } + + sm6->start = (const uint32_t *)((const char*)&byte_code[2] + chunk_offset); + if ((magic = sm6->start[0]) != BITCODE_MAGIC) + { + WARN("Unknown magic number 0x%08x.\n", magic); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER, + "DXIL bitcode chunk magic number 0x%08x is not the expected 0x%08x.", magic, BITCODE_MAGIC); + } + + sm6->end = &sm6->start[(chunk_size + sizeof(*sm6->start) - 1) / sizeof(*sm6->start)]; + + if ((version.type = version_token >> 16) >= VKD3D_SHADER_TYPE_COUNT) + { + FIXME("Unknown shader type %#x.\n", version.type); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE, + "Unknown shader type %#x.", version.type); + } + + version.major = VKD3D_SM6_VERSION_MAJOR(version_token); + version.minor = VKD3D_SM6_VERSION_MINOR(version_token); + + if ((abbr = sm6->start[1] & 3) != ENTER_SUBBLOCK) + { + WARN("Initial block abbreviation %u is not ENTER_SUBBLOCK.\n", abbr); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, + "DXIL bitcode chunk has invalid initial block abbreviation %u.", abbr); + return VKD3D_ERROR_INVALID_SHADER; + } + + /* Estimate instruction count to avoid reallocation in most shaders. */ + count = max(token_count, 400) - 400; + vkd3d_shader_parser_init(&sm6->p, message_context, source_name, &version, &sm6_parser_ops, + (count + (count >> 2)) / 2u + 10); + sm6->ptr = &sm6->start[1]; + sm6->bitpos = 2; + + block = &sm6->root_block; + if ((ret = dxil_block_init(block, NULL, sm6)) < 0) + { + if (ret == VKD3D_ERROR_OUT_OF_MEMORY) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory parsing DXIL bitcode chunk."); + else if (ret == VKD3D_ERROR_INVALID_SHADER) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, + "DXIL bitcode chunk has invalid bitcode."); + else + vkd3d_unreachable(); + return ret; + } + + dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); + sm6->abbrevs = NULL; + sm6->abbrev_count = 0; + + length = sm6->ptr - sm6->start - block->start; + if (length != block->length) + { + WARN("Invalid block length %zu; expected %u.\n", length, block->length); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH, + "Root block ends with length %zu but indicated length is %u.", length, block->length); + } + if (sm6->ptr != sm6->end) + { + size_t expected_length = sm6->end - sm6->start; + length = sm6->ptr - sm6->start; + WARN("Invalid module length %zu; expected %zu.\n", length, expected_length); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH, + "Module ends with length %zu but indicated length is %zu.", length, expected_length); + } + + if ((ret = sm6_parser_type_table_init(sm6)) < 0) + { + if (ret == VKD3D_ERROR_OUT_OF_MEMORY) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory parsing DXIL type table."); + else if (ret == VKD3D_ERROR_INVALID_SHADER) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_TABLE, + "DXIL type table is invalid."); + else + vkd3d_unreachable(); + return ret; + } + + if ((ret = sm6_parser_symtab_init(sm6)) < 0) + { + if (ret == VKD3D_ERROR_OUT_OF_MEMORY) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory parsing DXIL value symbol table."); + else if (ret == VKD3D_ERROR_INVALID_SHADER) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_VALUE_SYMTAB, + "DXIL value symbol table is invalid."); + else + vkd3d_unreachable(); + return ret; + } + + function_count = dxil_block_compute_function_count(&sm6->root_block); + if (!(sm6->functions = vkd3d_calloc(function_count, sizeof(*sm6->functions)))) + { + ERR("Failed to allocate function array.\n"); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory allocating DXIL function array."); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if (sm6_parser_compute_max_value_count(sm6, &sm6->root_block, 0) == SIZE_MAX) + { + WARN("Value array count overflowed.\n"); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, + "Overflow occurred in the DXIL module value count."); + return VKD3D_ERROR_INVALID_SHADER; + } + if (!(sm6->values = vkd3d_calloc(sm6->value_capacity, sizeof(*sm6->values)))) + { + ERR("Failed to allocate value array.\n"); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory allocating DXIL value array."); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if ((ret = sm6_parser_globals_init(sm6)) < 0) + { + WARN("Failed to load global declarations.\n"); + return ret; + } + + if ((ret = sm6_parser_module_init(sm6, &sm6->root_block, 0)) < 0) + { + if (ret == VKD3D_ERROR_OUT_OF_MEMORY) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory parsing DXIL module."); + else if (ret == VKD3D_ERROR_INVALID_SHADER) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, + "DXIL module is invalid."); + else + vkd3d_unreachable(); + return ret; + } + + for (i = 0; i < sm6->function_count; ++i) + { + if (!sm6_block_emit_instructions(sm6->functions[i].blocks[0], sm6)) + { + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory emitting shader instructions."); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + } + + dxil_block_destroy(&sm6->root_block); + + return VKD3D_OK; +} + +int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) +{ + struct vkd3d_shader_desc *shader_desc; + uint32_t *byte_code = NULL; + struct sm6_parser *sm6; + int ret; + + if (!(sm6 = vkd3d_calloc(1, sizeof(*sm6)))) + { + ERR("Failed to allocate parser.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + shader_desc = &sm6->p.shader_desc; + shader_desc->is_dxil = true; + if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, + shader_desc)) < 0) + { + WARN("Failed to extract shader, vkd3d result %d.\n", ret); + vkd3d_free(sm6); + return ret; + } + + sm6->p.shader_desc = *shader_desc; + shader_desc = &sm6->p.shader_desc; + + if (((uintptr_t)shader_desc->byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) + { + /* LLVM bitcode should be 32-bit aligned, but before dxc v1.7.2207 this was not always the case in the DXBC + * container due to missing padding after signature names. Get an aligned copy to prevent unaligned access. */ + if (!(byte_code = vkd3d_malloc(align(shader_desc->byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) + ERR("Failed to allocate aligned chunk. Unaligned access will occur.\n"); + else + memcpy(byte_code, shader_desc->byte_code, shader_desc->byte_code_size); + } + + ret = sm6_parser_init(sm6, byte_code ? byte_code : shader_desc->byte_code, shader_desc->byte_code_size, + compile_info->source_name, message_context); + vkd3d_free(byte_code); + + if (ret < 0) + { + WARN("Failed to initialise shader parser.\n"); + sm6_parser_destroy(&sm6->p); + return ret; + } + + *parser = &sm6->p; + + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index ba5bcfbfaf0..8b706e1e667 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -430,6 +430,51 @@ struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl return type; }
+unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, + enum hlsl_regset regset, unsigned int index) +{ + struct hlsl_type *next_type; + unsigned int offset = 0; + unsigned int idx; + + while (!type_is_single_component(type)) + { + next_type = type; + idx = traverse_path_from_component_index(ctx, &next_type, &index); + + switch (type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + if (regset == HLSL_REGSET_NUMERIC) + offset += idx; + break; + + case HLSL_CLASS_STRUCT: + offset += type->e.record.fields[idx].reg_offset[regset]; + break; + + case HLSL_CLASS_ARRAY: + if (regset == HLSL_REGSET_NUMERIC) + offset += idx * align(type->e.array.type->reg_size[regset], 4); + else + offset += idx * type->e.array.type->reg_size[regset]; + break; + + case HLSL_CLASS_OBJECT: + assert(idx == 0); + break; + + default: + vkd3d_unreachable(); + } + type = next_type; + } + + return offset; +} + static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, unsigned int path_len) { @@ -524,7 +569,9 @@ struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_de unsigned int i;
assert(deref); - assert(!deref->offset.node); + + if (deref->offset.node) + return deref->data_type;
type = deref->var->data_type; for (i = 0; i < deref->path_len; ++i) @@ -626,6 +673,7 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba type->e.array.type = basic_type; type->dimx = basic_type->dimx; type->dimy = basic_type->dimy; + type->sampler_dim = basic_type->sampler_dim; hlsl_type_calculate_reg_size(ctx, type);
list_add_tail(&ctx->types, &type->entry); @@ -992,20 +1040,31 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem struct vkd3d_string_buffer *string; struct hlsl_ir_var *var; static LONG counter; - const char *name;
if (!(string = hlsl_get_string_buffer(ctx))) return NULL; vkd3d_string_buffer_printf(string, "<%s-%u>", template, InterlockedIncrement(&counter)); - if (!(name = hlsl_strdup(ctx, string->buffer))) - { - hlsl_release_string_buffer(ctx, string); - return NULL; - } - var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL); + var = hlsl_new_synthetic_var_named(ctx, string->buffer, type, loc, true); hlsl_release_string_buffer(ctx, string); + return var; +} + +struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, + struct hlsl_type *type, const struct vkd3d_shader_location *loc, bool dummy_scope) +{ + struct hlsl_ir_var *var; + const char *name_copy; + + if (!(name_copy = hlsl_strdup(ctx, name))) + return NULL; + var = hlsl_new_var(ctx, name_copy, type, loc, NULL, 0, NULL); if (var) - list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); + { + if (dummy_scope) + list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); + else + list_add_tail(&ctx->globals->vars, &var->scope_entry); + } return var; }
@@ -1432,7 +1491,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v }
struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, - const struct vkd3d_shader_location *loc) + struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc) { struct hlsl_ir_jump *jump;
@@ -1440,6 +1499,7 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type return NULL; init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); jump->type = type; + hlsl_src_from_node(&jump->condition, condition); return &jump->node; }
@@ -1484,7 +1544,7 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, hlsl_block_cleanup(dst_block); return false; } - list_add_tail(&dst_block->instrs, &dst->entry); + hlsl_block_add_instr(dst_block, dst);
if (!list_empty(&src->uses)) { @@ -1585,9 +1645,9 @@ static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_ma return dst; }
-static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) +static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_jump *src) { - return hlsl_new_jump(ctx, src->type, &src->node.loc); + return hlsl_new_jump(ctx, src->type, map_instr(map, src->condition.node), &src->node.loc); }
static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) @@ -1728,7 +1788,7 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, return clone_index(ctx, map, hlsl_ir_index(instr));
case HLSL_IR_JUMP: - return clone_jump(ctx, hlsl_ir_jump(instr)); + return clone_jump(ctx, map, hlsl_ir_jump(instr));
case HLSL_IR_LOAD: return clone_load(ctx, map, hlsl_ir_load(instr)); @@ -2065,6 +2125,31 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru } }
+struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, + unsigned int index) +{ + struct hlsl_type *type = var->data_type, *current_type; + struct vkd3d_string_buffer *buffer; + unsigned int element_index; + + if (!(buffer = hlsl_get_string_buffer(ctx))) + return NULL; + + vkd3d_string_buffer_printf(buffer, "%s", var->name); + + while (!type_is_single_component(type)) + { + current_type = type; + element_index = traverse_path_from_component_index(ctx, &type, &index); + if (current_type->class == HLSL_CLASS_STRUCT) + vkd3d_string_buffer_printf(buffer, ".%s", current_type->e.record.fields[element_index].name); + else + vkd3d_string_buffer_printf(buffer, "[%u]", element_index); + } + + return buffer; +} + const char *debug_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type) { struct vkd3d_string_buffer *string; @@ -2123,18 +2208,18 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) { static const char * const names[] = { - "HLSL_IR_CALL", - "HLSL_IR_CONSTANT", - "HLSL_IR_EXPR", - "HLSL_IR_IF", - "HLSL_IR_INDEX", - "HLSL_IR_LOAD", - "HLSL_IR_LOOP", - "HLSL_IR_JUMP", - "HLSL_IR_RESOURCE_LOAD", - "HLSL_IR_RESOURCE_STORE", - "HLSL_IR_STORE", - "HLSL_IR_SWIZZLE", + [HLSL_IR_CALL ] = "HLSL_IR_CALL", + [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", + [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", + [HLSL_IR_IF ] = "HLSL_IR_IF", + [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", + [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", + [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", + [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", + [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", + [HLSL_IR_RESOURCE_STORE] = "HLSL_IR_RESOURCE_STORE", + [HLSL_IR_STORE ] = "HLSL_IR_STORE", + [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", };
if (type >= ARRAY_SIZE(names)) @@ -2146,10 +2231,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) { static const char * const names[] = { - "HLSL_IR_JUMP_BREAK", - "HLSL_IR_JUMP_CONTINUE", - "HLSL_IR_JUMP_DISCARD", - "HLSL_IR_JUMP_RETURN", + [HLSL_IR_JUMP_BREAK] = "HLSL_IR_JUMP_BREAK", + [HLSL_IR_JUMP_CONTINUE] = "HLSL_IR_JUMP_CONTINUE", + [HLSL_IR_JUMP_DISCARD_NEG] = "HLSL_IR_JUMP_DISCARD_NEG", + [HLSL_IR_JUMP_DISCARD_NZ] = "HLSL_IR_JUMP_DISCARD_NZ", + [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", };
assert(type < ARRAY_SIZE(names)); @@ -2158,11 +2244,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type)
static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr);
-static void dump_instr_list(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct list *list) +static void dump_block(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_block *block) { struct hlsl_ir_node *instr;
- LIST_FOR_EACH_ENTRY(instr, list, struct hlsl_ir_node, entry) + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { dump_instr(ctx, buffer, instr); vkd3d_string_buffer_printf(buffer, "\n"); @@ -2337,7 +2423,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_COS] = "cos", [HLSL_OP1_COS_REDUCED] = "cos_reduced", [HLSL_OP1_DSX] = "dsx", + [HLSL_OP1_DSX_COARSE] = "dsx_coarse", + [HLSL_OP1_DSX_FINE] = "dsx_fine", [HLSL_OP1_DSY] = "dsy", + [HLSL_OP1_DSY_COARSE] = "dsy_coarse", + [HLSL_OP1_DSY_FINE] = "dsy_fine", [HLSL_OP1_EXP2] = "exp2", [HLSL_OP1_FRACT] = "fract", [HLSL_OP1_LOG2] = "log2", @@ -2400,9 +2490,9 @@ static void dump_ir_if(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, vkd3d_string_buffer_printf(buffer, "if ("); dump_src(buffer, &if_node->condition); vkd3d_string_buffer_printf(buffer, ") {\n"); - dump_instr_list(ctx, buffer, &if_node->then_block.instrs); + dump_block(ctx, buffer, &if_node->then_block); vkd3d_string_buffer_printf(buffer, " %10s } else {\n", ""); - dump_instr_list(ctx, buffer, &if_node->else_block.instrs); + dump_block(ctx, buffer, &if_node->else_block); vkd3d_string_buffer_printf(buffer, " %10s }", ""); }
@@ -2418,8 +2508,12 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i vkd3d_string_buffer_printf(buffer, "continue"); break;
- case HLSL_IR_JUMP_DISCARD: - vkd3d_string_buffer_printf(buffer, "discard"); + case HLSL_IR_JUMP_DISCARD_NEG: + vkd3d_string_buffer_printf(buffer, "discard_neg"); + break; + + case HLSL_IR_JUMP_DISCARD_NZ: + vkd3d_string_buffer_printf(buffer, "discard_nz"); break;
case HLSL_IR_JUMP_RETURN: @@ -2431,7 +2525,7 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i static void dump_ir_loop(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_loop *loop) { vkd3d_string_buffer_printf(buffer, "for (;;) {\n"); - dump_instr_list(ctx, buffer, &loop->body.instrs); + dump_block(ctx, buffer, &loop->body); vkd3d_string_buffer_printf(buffer, " %10s }", ""); }
@@ -2450,6 +2544,8 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru [HLSL_RESOURCE_GATHER_GREEN] = "gather_green", [HLSL_RESOURCE_GATHER_BLUE] = "gather_blue", [HLSL_RESOURCE_GATHER_ALPHA] = "gather_alpha", + [HLSL_RESOURCE_SAMPLE_INFO] = "sample_info", + [HLSL_RESOURCE_RESINFO] = "resinfo", };
assert(load->load_type < ARRAY_SIZE(type_names)); @@ -2457,8 +2553,11 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru dump_deref(buffer, &load->resource); vkd3d_string_buffer_printf(buffer, ", sampler = "); dump_deref(buffer, &load->sampler); - vkd3d_string_buffer_printf(buffer, ", coords = "); - dump_src(buffer, &load->coords); + if (load->coords.node) + { + vkd3d_string_buffer_printf(buffer, ", coords = "); + dump_src(buffer, &load->coords); + } if (load->sample_index.node) { vkd3d_string_buffer_printf(buffer, ", sample index = "); @@ -2614,7 +2713,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl vkd3d_string_buffer_printf(&buffer, "\n"); } if (func->has_body) - dump_instr_list(ctx, &buffer, &func->body.instrs); + dump_block(ctx, &buffer, &func->body);
vkd3d_string_buffer_trace(&buffer); vkd3d_string_buffer_cleanup(&buffer); @@ -2703,6 +2802,7 @@ static void free_ir_if(struct hlsl_ir_if *if_node)
static void free_ir_jump(struct hlsl_ir_jump *jump) { + hlsl_src_remove(&jump->condition); vkd3d_free(jump); }
@@ -2822,7 +2922,7 @@ void hlsl_free_attribute(struct hlsl_attribute *attr)
for (i = 0; i < attr->args_count; ++i) hlsl_src_remove(&attr->args[i]); - hlsl_free_instr_list(&attr->instrs); + hlsl_block_cleanup(&attr->instrs); vkd3d_free((void *)attr->name); vkd3d_free(attr); } @@ -3127,8 +3227,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx)
for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) { - unsigned int n_variants = 0; const char *const *variants; + unsigned int n_variants;
switch (bt) { @@ -3148,6 +3248,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) break;
default: + n_variants = 0; + variants = NULL; break; }
@@ -3199,9 +3301,11 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) } }
-static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, +static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info *compile_info, const struct hlsl_profile_info *profile, struct vkd3d_shader_message_context *message_context) { + unsigned int i; + memset(ctx, 0, sizeof(*ctx));
ctx->profile = profile; @@ -3210,7 +3314,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name,
if (!(ctx->source_files = hlsl_alloc(ctx, sizeof(*ctx->source_files)))) return false; - if (!(ctx->source_files[0] = hlsl_strdup(ctx, source_name ? source_name : "<anonymous>"))) + if (!(ctx->source_files[0] = hlsl_strdup(ctx, compile_info->source_name ? compile_info->source_name : "<anonymous>"))) { vkd3d_free(ctx->source_files); return false; @@ -3249,6 +3353,19 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, return false; ctx->cur_buffer = ctx->globals_buffer;
+ for (i = 0; i < compile_info->option_count; ++i) + { + const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; + + if (option->name == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER) + { + if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR) + ctx->matrix_majority = HLSL_MODIFIER_ROW_MAJOR; + else if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR) + ctx->matrix_majority = HLSL_MODIFIER_COLUMN_MAJOR; + } + } + return true; }
@@ -3260,6 +3377,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) struct hlsl_type *type, *next_type; unsigned int i;
+ hlsl_block_cleanup(&ctx->static_initializers); + for (i = 0; i < ctx->source_files_count; ++i) vkd3d_free((void *)ctx->source_files[i]); vkd3d_free(ctx->source_files); @@ -3283,6 +3402,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) vkd3d_free((void *)buffer->name); vkd3d_free(buffer); } + + vkd3d_free(ctx->constant_defs.regs); }
int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, @@ -3324,7 +3445,7 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d return VKD3D_ERROR_INVALID_ARGUMENT; }
- if (!hlsl_ctx_init(&ctx, compile_info->source_name, profile, message_context)) + if (!hlsl_ctx_init(&ctx, compile_info, profile, message_context)) return VKD3D_ERROR_OUT_OF_MEMORY;
if ((ret = hlsl_lexer_compile(&ctx, hlsl)) == 2) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index cd1ad37a542..8c21bd10801 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -257,7 +257,7 @@ struct hlsl_reg /* Number of registers to be allocated. * Unlike the variable's type's regsize, it is not expressed in register components, but rather * in whole registers, and may depend on which components are used within the shader. */ - uint32_t bind_count; + uint32_t allocation_size; /* For numeric registers, a writemask can be provided to indicate the reservation of only some * of the 4 components. */ unsigned int writemask; @@ -337,7 +337,7 @@ struct hlsl_src struct hlsl_attribute { const char *name; - struct list instrs; + struct hlsl_block instrs; struct vkd3d_shader_location loc; unsigned int args_count; struct hlsl_src args[]; @@ -356,6 +356,7 @@ struct hlsl_attribute #define HLSL_MODIFIER_COLUMN_MAJOR 0x00000400 #define HLSL_STORAGE_IN 0x00000800 #define HLSL_STORAGE_OUT 0x00001000 +#define HLSL_MODIFIER_INLINE 0x00002000
#define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ @@ -417,11 +418,15 @@ struct hlsl_ir_var enum hlsl_sampler_dim sampler_dim; struct vkd3d_shader_location first_sampler_dim_loc; } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; + /* Minimum number of binds required to include all object components actually used in the shader. + * It may be less than the allocation size, e.g. for texture arrays. */ + unsigned int bind_count[HLSL_REGSET_LAST_OBJECT + 1];
uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; uint32_t is_param : 1; + uint32_t is_separated_resource : 1; };
/* Sized array of variables representing a function's parameters. */ @@ -502,7 +507,11 @@ enum hlsl_ir_expr_op HLSL_OP1_COS, HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ HLSL_OP1_DSX, + HLSL_OP1_DSX_COARSE, + HLSL_OP1_DSX_FINE, HLSL_OP1_DSY, + HLSL_OP1_DSY_COARSE, + HLSL_OP1_DSY_FINE, HLSL_OP1_EXP2, HLSL_OP1_FLOOR, HLSL_OP1_FRACT, @@ -558,7 +567,8 @@ enum hlsl_ir_jump_type { HLSL_IR_JUMP_BREAK, HLSL_IR_JUMP_CONTINUE, - HLSL_IR_JUMP_DISCARD, + HLSL_IR_JUMP_DISCARD_NEG, + HLSL_IR_JUMP_DISCARD_NZ, HLSL_IR_JUMP_RETURN, };
@@ -566,6 +576,8 @@ struct hlsl_ir_jump { struct hlsl_ir_node node; enum hlsl_ir_jump_type type; + /* Argument used for HLSL_IR_JUMP_DISCARD_NZ and HLSL_IR_JUMP_DISCARD_NEG. */ + struct hlsl_src condition; };
struct hlsl_ir_swizzle @@ -600,9 +612,11 @@ struct hlsl_deref * components, within the pertaining regset), from the start of the variable, of the part * referenced. * The path is lowered to this single offset -- whose value may vary between SM1 and SM4 -- - * before writing the bytecode. */ + * before writing the bytecode. + * Since the type information cannot longer be retrieved from the offset alone, the type is + * stored in the data_type field. */ struct hlsl_src offset; - enum hlsl_regset offset_regset; + struct hlsl_type *data_type; };
struct hlsl_ir_load @@ -624,6 +638,8 @@ enum hlsl_resource_load_type HLSL_RESOURCE_GATHER_GREEN, HLSL_RESOURCE_GATHER_BLUE, HLSL_RESOURCE_GATHER_ALPHA, + HLSL_RESOURCE_SAMPLE_INFO, + HLSL_RESOURCE_RESINFO, };
struct hlsl_ir_resource_load @@ -803,7 +819,11 @@ struct hlsl_ctx * Only used for SM1 profiles. */ struct hlsl_constant_defs { - struct hlsl_vec4 *values; + struct hlsl_constant_register + { + uint32_t index; + struct hlsl_vec4 value; + } *regs; size_t count, size; } constant_defs; /* Number of temp. registers required for the shader to run, i.e. the largest temp register @@ -1055,10 +1075,12 @@ const char *debug_hlsl_writemask(unsigned int writemask); const char *debug_hlsl_swizzle(unsigned int swizzle, unsigned int count);
struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type); +struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, + unsigned int index); struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type);
-struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, +struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false); void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var); @@ -1120,7 +1142,7 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, - enum hlsl_ir_jump_type type, const struct vkd3d_shader_location *loc); + enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc);
void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var);
@@ -1132,6 +1154,8 @@ struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hls const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, @@ -1156,6 +1180,8 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned in struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, struct hlsl_type *type, const struct vkd3d_shader_location *loc); +struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, + struct hlsl_type *type, const struct vkd3d_shader_location *loc, bool dummy_scope); struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, unsigned int sample_count); struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format); @@ -1187,6 +1213,8 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type); unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, enum hlsl_regset regset); struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl_type *type, unsigned int index); +unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, + enum hlsl_regset regset, unsigned int index); bool hlsl_type_is_row_major(const struct hlsl_type *type); unsigned int hlsl_type_minor_size(const struct hlsl_type *type); unsigned int hlsl_type_major_size(const struct hlsl_type *type); @@ -1227,7 +1255,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); + bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out);
int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 0e07fe578e1..43ea4b4d038 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -53,7 +53,7 @@ struct parse_initializer { struct hlsl_ir_node **args; unsigned int args_count; - struct list *instrs; + struct hlsl_block *instrs; bool braces; };
@@ -73,6 +73,10 @@ struct parse_variable_def struct hlsl_semantic semantic; struct hlsl_reg_reservation reg_reservation; struct parse_initializer initializer; + + struct hlsl_type *basic_type; + unsigned int modifiers; + struct vkd3d_shader_location modifiers_loc; };
struct parse_function @@ -85,8 +89,8 @@ struct parse_function
struct parse_if_body { - struct list *then_block; - struct list *else_block; + struct hlsl_block *then_block; + struct hlsl_block *else_block; };
enum parse_assign_op @@ -129,9 +133,18 @@ static void yyerror(YYLTYPE *loc, void *scanner, struct hlsl_ctx *ctx, const cha hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "%s", s); }
-static struct hlsl_ir_node *node_from_list(struct list *list) +static struct hlsl_ir_node *node_from_block(struct hlsl_block *block) +{ + return LIST_ENTRY(list_tail(&block->instrs), struct hlsl_ir_node, entry); +} + +static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx) { - return LIST_ENTRY(list_tail(list), struct hlsl_ir_node, entry); + struct hlsl_block *block; + + if ((block = hlsl_alloc(ctx, sizeof(*block)))) + hlsl_block_init(block); + return block; }
static struct list *make_empty_list(struct hlsl_ctx *ctx) @@ -143,10 +156,10 @@ static struct list *make_empty_list(struct hlsl_ctx *ctx) return list; }
-static void destroy_instr_list(struct list *list) +static void destroy_block(struct hlsl_block *block) { - hlsl_free_instr_list(list); - vkd3d_free(list); + hlsl_block_cleanup(block); + vkd3d_free(block); }
static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct hlsl_type *src, @@ -273,10 +286,7 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ return hlsl_types_are_componentwise_equal(ctx, src, dst); }
-static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, - unsigned int comp, const struct vkd3d_shader_location *loc); - -static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) { struct hlsl_type *src_type = node->data_type; @@ -313,7 +323,7 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, { struct hlsl_ir_node *component_load; struct hlsl_type *dst_comp_type; - struct hlsl_block block; + struct hlsl_block store_block; unsigned int src_idx;
if (broadcast) @@ -333,21 +343,21 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs,
dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx);
- if (!(component_load = add_load_component(ctx, instrs, node, src_idx, loc))) + if (!(component_load = hlsl_add_load_component(ctx, block, node, src_idx, loc))) return NULL;
if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) return NULL; - list_add_tail(instrs, &cast->entry); + hlsl_block_add_instr(block, cast);
- if (!hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, cast)) + if (!hlsl_new_store_component(ctx, &store_block, &var_deref, dst_idx, cast)) return NULL; - list_move_tail(instrs, &block.instrs); + hlsl_block_add_block(block, &store_block); }
if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; - list_add_tail(instrs, &load->node.entry); + hlsl_block_add_instr(block, &load->node);
return &load->node; } @@ -355,12 +365,12 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, { if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) return NULL; - list_add_tail(instrs, &cast->entry); + hlsl_block_add_instr(block, cast); return cast; } }
-static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) { struct hlsl_type *src_type = node->data_type; @@ -386,7 +396,7 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix");
- return add_cast(ctx, instrs, node, dst_type, loc); + return add_cast(ctx, block, node, dst_type, loc); }
static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, @@ -405,29 +415,29 @@ static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, return modifiers | mod; }
-static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_list) +static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) { struct hlsl_ir_node *condition, *not, *iff, *jump; struct hlsl_block then_block;
/* E.g. "for (i = 0; ; ++i)". */ - if (list_empty(cond_list)) + if (list_empty(&cond_block->instrs)) return true;
- condition = node_from_list(cond_list); + condition = node_from_block(cond_block); if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) return false; - list_add_tail(cond_list, ¬->entry); + hlsl_block_add_instr(cond_block, not);
hlsl_block_init(&then_block);
- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &condition->loc))) + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &condition->loc))) return false; hlsl_block_add_instr(&then_block, jump);
if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &condition->loc))) return false; - list_add_tail(cond_list, &iff->entry); + hlsl_block_add_instr(cond_block, iff); return true; }
@@ -454,10 +464,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att return false; }
-static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct parse_attribute_list *attributes, struct list *init, struct list *cond, - struct list *iter, struct list *body, const struct vkd3d_shader_location *loc) +static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, + struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) { - struct hlsl_block body_block; struct hlsl_ir_node *loop; unsigned int i;
@@ -476,53 +486,49 @@ static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const } else { - hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented.\n"); + hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); } } else if (!strcmp(attr->name, "loop") || !strcmp(attr->name, "fastopt") || !strcmp(attr->name, "allow_uav_condition")) { - hlsl_fixme(ctx, loc, "Unhandled attribute %s.", attr->name); + hlsl_fixme(ctx, loc, "Unhandled attribute '%s'.", attr->name); } else { - hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unrecognized attribute %s.", attr->name); + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, "Unrecognized attribute '%s'.", attr->name); } }
- if (!init && !(init = make_empty_list(ctx))) + if (!init && !(init = make_empty_block(ctx))) goto oom;
if (!append_conditional_break(ctx, cond)) goto oom;
- hlsl_block_init(&body_block); - - if (type != LOOP_DO_WHILE) - list_move_tail(&body_block.instrs, cond); - - list_move_tail(&body_block.instrs, body); - if (iter) - list_move_tail(&body_block.instrs, iter); + hlsl_block_add_block(body, iter);
if (type == LOOP_DO_WHILE) - list_move_tail(&body_block.instrs, cond); + list_move_tail(&body->instrs, &cond->instrs); + else + list_move_head(&body->instrs, &cond->instrs);
- if (!(loop = hlsl_new_loop(ctx, &body_block, loc))) + if (!(loop = hlsl_new_loop(ctx, body, loc))) goto oom; - list_add_tail(init, &loop->entry); + hlsl_block_add_instr(init, loop);
- vkd3d_free(cond); - vkd3d_free(body); + destroy_block(cond); + destroy_block(body); + destroy_block(iter); return init;
oom: - destroy_instr_list(init); - destroy_instr_list(cond); - destroy_instr_list(iter); - destroy_instr_list(body); + destroy_block(init); + destroy_block(cond); + destroy_block(iter); + destroy_block(body); return NULL; }
@@ -539,7 +545,7 @@ static unsigned int initializer_size(const struct parse_initializer *initializer
static void free_parse_initializer(struct parse_initializer *initializer) { - destroy_instr_list(initializer->instrs); + destroy_block(initializer->instrs); vkd3d_free(initializer->args); }
@@ -625,7 +631,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod return NULL; }
-static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, +static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *return_value, const struct vkd3d_shader_location *loc) { struct hlsl_type *return_type = ctx->cur_function->return_type; @@ -637,7 +643,7 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, { struct hlsl_ir_node *store;
- if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, loc))) + if (!(return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc))) return false;
if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) @@ -656,18 +662,18 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); }
- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, NULL, loc))) return false; - list_add_tail(instrs, &jump->entry); + hlsl_block_add_instr(block, jump);
return true; }
-static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, - unsigned int comp, const struct vkd3d_shader_location *loc) +struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *load, *store; - struct hlsl_block block; + struct hlsl_block load_block; struct hlsl_ir_var *var; struct hlsl_deref src;
@@ -676,17 +682,17 @@ static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list
if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) return NULL; - list_add_tail(instrs, &store->entry); + hlsl_block_add_instr(block, store);
hlsl_init_simple_deref_from_var(&src, var); - if (!(load = hlsl_new_load_component(ctx, &block, &src, comp, loc))) + if (!(load = hlsl_new_load_component(ctx, &load_block, &src, comp, loc))) return NULL; - list_move_tail(instrs, &block.instrs); + hlsl_block_add_block(block, &load_block);
return load; }
-static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, +static bool add_record_access(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *record, unsigned int idx, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *index, *c; @@ -695,20 +701,20 @@ static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct
if (!(c = hlsl_new_uint_constant(ctx, idx, loc))) return false; - list_add_tail(instrs, &c->entry); + hlsl_block_add_instr(block, c);
if (!(index = hlsl_new_index(ctx, record, c, loc))) return false; - list_add_tail(instrs, &index->entry); + hlsl_block_add_instr(block, index);
return true; }
-static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc);
-static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, +static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *array, struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) { const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; @@ -731,13 +737,13 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct h return false; }
- if (!(index = add_implicit_conversion(ctx, instrs, index, + if (!(index = add_implicit_conversion(ctx, block, index, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc))) return false;
if (!(return_index = hlsl_new_index(ctx, array, index, loc))) return false; - list_add_tail(instrs, &return_index->entry); + hlsl_block_add_instr(block, return_index);
return true; } @@ -750,7 +756,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct h
if (!(cast = hlsl_new_cast(ctx, index, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &index->loc))) return false; - list_add_tail(instrs, &cast->entry); + hlsl_block_add_instr(block, cast); index = cast;
if (expr_type->class != HLSL_CLASS_ARRAY && expr_type->class != HLSL_CLASS_VECTOR && expr_type->class != HLSL_CLASS_MATRIX) @@ -764,7 +770,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct h
if (!(return_index = hlsl_new_index(ctx, array, index, loc))) return false; - list_add_tail(instrs, &return_index->entry); + hlsl_block_add_instr(block, return_index);
return true; } @@ -830,6 +836,16 @@ static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; }
+static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) +{ + return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); +} + +static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) +{ + return !shader_profile_version_ge(ctx, major, minor); +} + static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, struct hlsl_type *type, unsigned int modifiers, struct list *defs) { @@ -1020,7 +1036,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const struct hlsl_reg_reservation reservation = {0}; char *endptr;
- if (ctx->profile->major_version < 4) + if (shader_profile_version_lt(ctx, 4, 0)) return reservation;
reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); @@ -1079,17 +1095,17 @@ static struct hlsl_ir_function_decl *get_func_decl(struct rb_tree *funcs, return NULL; }
-static struct list *make_list(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) +static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr) { - struct list *list; + struct hlsl_block *block;
- if (!(list = make_empty_list(ctx))) + if (!(block = make_empty_block(ctx))) { - hlsl_free_instr(node); + hlsl_free_instr(instr); return NULL; } - list_add_tail(list, &node->entry); - return list; + hlsl_block_add_instr(block, instr); + return block; }
static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -1097,20 +1113,50 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str { struct hlsl_ir_constant *constant; struct hlsl_ir_node *node; + struct hlsl_block expr; unsigned int ret = 0; bool progress;
- if (!add_implicit_conversion(ctx, &block->instrs, node_from_list(&block->instrs), + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + { + switch (node->type) + { + case HLSL_IR_CONSTANT: + case HLSL_IR_EXPR: + case HLSL_IR_SWIZZLE: + case HLSL_IR_LOAD: + case HLSL_IR_INDEX: + continue; + case HLSL_IR_CALL: + case HLSL_IR_IF: + case HLSL_IR_LOOP: + case HLSL_IR_JUMP: + case HLSL_IR_RESOURCE_LOAD: + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_STORE: + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Expected literal expression."); + } + } + + if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) + return 0; + hlsl_block_add_block(&expr, block); + + if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) + { + hlsl_block_cleanup(&expr); return 0; + }
do { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); - progress |= hlsl_copy_propagation_execute(ctx, block); + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, &expr, NULL); + progress |= hlsl_copy_propagation_execute(ctx, &expr); } while (progress);
- node = node_from_list(&block->instrs); + node = node_from_block(&expr); if (node->type == HLSL_IR_CONSTANT) { constant = hlsl_ir_constant(node); @@ -1119,9 +1165,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str else { hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Failed to evaluate constant expression %d.", node->type); + "Failed to evaluate constant expression."); }
+ hlsl_block_cleanup(&expr); + return ret; }
@@ -1253,7 +1301,7 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct return true; }
-static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], struct hlsl_type *type, const struct vkd3d_shader_location *loc) { @@ -1277,38 +1325,38 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, for (i = 0; i < type->dimy * type->dimx; ++i) { struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; - struct hlsl_block block; + struct hlsl_block store_block; unsigned int j;
for (j = 0; j < HLSL_MAX_OPERANDS; j++) { if (operands[j]) { - if (!(load = add_load_component(ctx, instrs, operands[j], i, loc))) + if (!(load = hlsl_add_load_component(ctx, block, operands[j], i, loc))) return NULL;
cell_operands[j] = load; } }
- if (!(value = add_expr(ctx, instrs, op, cell_operands, scalar_type, loc))) + if (!(value = add_expr(ctx, block, op, cell_operands, scalar_type, loc))) return NULL;
- if (!hlsl_new_store_component(ctx, &block, &var_deref, i, value)) + if (!hlsl_new_store_component(ctx, &store_block, &var_deref, i, value)) return NULL; - list_move_tail(instrs, &block.instrs); + hlsl_block_add_block(block, &store_block); }
if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return NULL; - list_add_tail(instrs, &var_load->node.entry); + hlsl_block_add_instr(block, &var_load->node);
return &var_load->node; }
if (!(expr = hlsl_new_expr(ctx, op, operands, type, loc))) return NULL; - list_add_tail(instrs, &expr->entry); + hlsl_block_add_instr(block, expr);
return expr; } @@ -1334,23 +1382,23 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * } }
-static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {arg};
- return add_expr(ctx, instrs, op, args, arg->data_type, loc); + return add_expr(ctx, block, op, args, arg->data_type, loc); }
-static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) { check_integer_type(ctx, arg);
- return add_unary_arithmetic_expr(ctx, instrs, op, arg, loc); + return add_unary_arithmetic_expr(ctx, block, op, arg, loc); }
-static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; @@ -1359,10 +1407,10 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, arg->data_type->dimx, arg->data_type->dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) + if (!(args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc))) return NULL;
- return add_expr(ctx, instrs, op, args, bool_type, loc); + return add_expr(ctx, block, op, args, bool_type, loc); }
static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *arg1, @@ -1378,7 +1426,7 @@ static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const str return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); }
-static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { @@ -1387,49 +1435,26 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str
common_type = get_common_numeric_type(ctx, arg1, arg2, loc);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) + if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) + if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) return NULL;
- return add_expr(ctx, instrs, op, args, common_type, loc); + return add_expr(ctx, block, op, args, common_type, loc); }
-static struct list *add_binary_arithmetic_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); - - list_move_tail(list1, list2); - vkd3d_free(list2); - add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, loc); - return list1; -} - -static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { check_integer_type(ctx, arg1); check_integer_type(ctx, arg2);
- return add_binary_arithmetic_expr(ctx, instrs, op, arg1, arg2, loc); + return add_binary_arithmetic_expr(ctx, block, op, arg1, arg2, loc); }
-static struct list *add_binary_bitwise_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); - - list_move_tail(list1, list2); - vkd3d_free(list2); - add_binary_bitwise_expr(ctx, list1, op, arg1, arg2, loc); - - return list1; -} - -static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { @@ -1445,27 +1470,16 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) + if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) + if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) return NULL;
- return add_expr(ctx, instrs, op, args, return_type, loc); -} - -static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); - - list_move_tail(list1, list2); - vkd3d_free(list2); - add_binary_comparison_expr(ctx, list1, op, arg1, arg2, loc); - return list1; + return add_expr(ctx, block, op, args, return_type, loc); }
-static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { @@ -1479,28 +1493,16 @@ static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct
common_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) + if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) + if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) return NULL;
- return add_expr(ctx, instrs, op, args, common_type, loc); + return add_expr(ctx, block, op, args, common_type, loc); }
-static struct list *add_binary_logical_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); - - list_move_tail(list1, list2); - vkd3d_free(list2); - add_binary_logical_expr(ctx, list1, op, arg1, arg2, loc); - - return list1; -} - -static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { @@ -1522,28 +1524,16 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct l return_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); integer_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_INT, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, return_type, loc))) + if (!(args[0] = add_implicit_conversion(ctx, block, arg1, return_type, loc))) return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, integer_type, loc))) + if (!(args[1] = add_implicit_conversion(ctx, block, arg2, integer_type, loc))) return NULL;
- return add_expr(ctx, instrs, op, args, return_type, loc); + return add_expr(ctx, block, op, args, return_type, loc); }
-static struct list *add_binary_shift_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); - - list_move_tail(list1, list2); - vkd3d_free(list2); - add_binary_shift_expr(ctx, list1, op, arg1, arg2, loc); - - return list1; -} - -static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); @@ -1557,8 +1547,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis struct vkd3d_string_buffer *string;
if ((string = hlsl_type_to_string(ctx, arg1->data_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Invalid type %s.\n", string->buffer); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); hlsl_release_string_buffer(ctx, string); return NULL; } @@ -1568,8 +1557,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis struct vkd3d_string_buffer *string;
if ((string = hlsl_type_to_string(ctx, arg2->data_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Invalid type %s.\n", string->buffer); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); hlsl_release_string_buffer(ctx, string); return NULL; } @@ -1598,6 +1586,53 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis return add_expr(ctx, instrs, op, args, ret_type, loc); }
+static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hlsl_block *block1, + struct hlsl_block *block2, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg1 = node_from_block(block1), *arg2 = node_from_block(block2); + + hlsl_block_add_block(block1, block2); + destroy_block(block2); + + switch (op) + { + case HLSL_OP2_ADD: + case HLSL_OP2_DIV: + case HLSL_OP2_MOD: + case HLSL_OP2_MUL: + add_binary_arithmetic_expr(ctx, block1, op, arg1, arg2, loc); + break; + + case HLSL_OP2_BIT_AND: + case HLSL_OP2_BIT_OR: + case HLSL_OP2_BIT_XOR: + add_binary_bitwise_expr(ctx, block1, op, arg1, arg2, loc); + break; + + case HLSL_OP2_LESS: + case HLSL_OP2_GEQUAL: + case HLSL_OP2_EQUAL: + case HLSL_OP2_NEQUAL: + add_binary_comparison_expr(ctx, block1, op, arg1, arg2, loc); + break; + + case HLSL_OP2_LOGIC_AND: + case HLSL_OP2_LOGIC_OR: + add_binary_logical_expr(ctx, block1, op, arg1, arg2, loc); + break; + + case HLSL_OP2_LSHIFT: + case HLSL_OP2_RSHIFT: + add_binary_shift_expr(ctx, block1, op, arg1, arg2, loc); + break; + + default: + vkd3d_unreachable(); + } + + return block1; +} + static enum hlsl_ir_expr_op op_from_assignment(enum parse_assign_op op) { static const enum hlsl_ir_expr_op ops[] = @@ -1654,7 +1689,7 @@ static bool invert_swizzle(unsigned int *swizzle, unsigned int *writemask, unsig return true; }
-static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *lhs, +static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) { struct hlsl_type *lhs_type = lhs->data_type; @@ -1663,7 +1698,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in
if (assign_op == ASSIGN_OP_SUB) { - if (!(rhs = add_unary_arithmetic_expr(ctx, instrs, HLSL_OP1_NEG, rhs, &rhs->loc))) + if (!(rhs = add_unary_arithmetic_expr(ctx, block, HLSL_OP1_NEG, rhs, &rhs->loc))) return NULL; assign_op = ASSIGN_OP_ADD; } @@ -1672,14 +1707,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in enum hlsl_ir_expr_op op = op_from_assignment(assign_op);
assert(op); - if (!(rhs = add_binary_arithmetic_expr(ctx, instrs, op, lhs, rhs, &rhs->loc))) + if (!(rhs = add_binary_arithmetic_expr(ctx, block, op, lhs, rhs, &rhs->loc))) return NULL; }
if (lhs_type->class <= HLSL_CLASS_LAST_NUMERIC) writemask = (1 << lhs_type->dimx) - 1;
- if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) + if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) return NULL;
while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX) @@ -1708,7 +1743,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in { return NULL; } - list_add_tail(instrs, &new_swizzle->entry); + hlsl_block_add_instr(block, new_swizzle);
lhs = swizzle->val.node; rhs = new_swizzle; @@ -1754,7 +1789,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in hlsl_cleanup_deref(&resource_deref); return NULL; } - list_add_tail(instrs, &store->entry); + hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&resource_deref); } else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) @@ -1773,13 +1808,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in
if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) return NULL; - list_add_tail(instrs, &c->entry); + hlsl_block_add_instr(block, c);
if (!(cell = hlsl_new_index(ctx, &row->node, c, &lhs->loc))) return NULL; - list_add_tail(instrs, &cell->entry); + hlsl_block_add_instr(block, cell);
- if (!(load = add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) + if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) return NULL;
if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) @@ -1790,7 +1825,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in hlsl_cleanup_deref(&deref); return NULL; } - list_add_tail(instrs, &store->entry); + hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&deref); } } @@ -1807,7 +1842,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in hlsl_cleanup_deref(&deref); return NULL; } - list_add_tail(instrs, &store->entry); + hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&deref); }
@@ -1816,14 +1851,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in * the last instruction in the list, we do need to copy. */ if (!(copy = hlsl_new_copy(ctx, rhs))) return NULL; - list_add_tail(instrs, ©->entry); + hlsl_block_add_instr(block, copy); return copy; }
-static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrement, bool post, +static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool decrement, bool post, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *lhs = node_from_list(instrs); + struct hlsl_ir_node *lhs = node_from_block(block); struct hlsl_ir_node *one;
if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) @@ -1832,9 +1867,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem
if (!(one = hlsl_new_int_constant(ctx, 1, loc))) return false; - list_add_tail(instrs, &one->entry); + hlsl_block_add_instr(block, one);
- if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) + if (!add_assignment(ctx, block, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) return false;
if (post) @@ -1843,7 +1878,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem
if (!(copy = hlsl_new_copy(ctx, lhs))) return false; - list_add_tail(instrs, ©->entry); + hlsl_block_add_instr(block, copy);
/* Post increment/decrement expressions are considered const. */ if (!(copy->data_type = hlsl_type_clone(ctx, copy->data_type, 0, HLSL_MODIFIER_CONST))) @@ -1853,7 +1888,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem return true; }
-static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, +static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) { unsigned int src_comp_count = hlsl_type_component_count(src->data_type); @@ -1868,7 +1903,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_type *dst_comp_type; struct hlsl_block block;
- if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) + if (!(load = hlsl_add_load_component(ctx, instrs, src, k, &src->loc))) return;
dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); @@ -1878,7 +1913,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs,
if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) return; - list_move_tail(instrs, &block.instrs); + hlsl_block_add_block(instrs, &block);
++*store_index; } @@ -1924,211 +1959,231 @@ static bool type_has_numeric_components(struct hlsl_type *type) return false; }
-static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, - unsigned int modifiers, const struct vkd3d_shader_location *modifiers_loc, struct list *var_list) +static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int modifiers, + const struct vkd3d_shader_location *loc) { - struct parse_variable_def *v, *v_next; + modifiers &= (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); + if (modifiers) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_modifiers_to_string(ctx, modifiers))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } +} + +static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) +{ + struct hlsl_type *basic_type = v->basic_type; struct hlsl_ir_function_decl *func; - unsigned int invalid_modifiers; - struct list *statements_list; + struct hlsl_semantic new_semantic; + uint32_t modifiers = v->modifiers; + bool unbounded_res_array = false; struct hlsl_ir_var *var; struct hlsl_type *type; bool local = true; + char *var_name; + unsigned int i; + + assert(basic_type);
if (basic_type->class == HLSL_CLASS_MATRIX) assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
- if (!(statements_list = make_empty_list(ctx))) - { - LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) - free_parse_variable_def(v); - vkd3d_free(var_list); - return NULL; - } - - if (!var_list) - return statements_list; + type = basic_type;
- invalid_modifiers = modifiers & (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); - if (invalid_modifiers) + if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) { - struct vkd3d_string_buffer *string; - - if ((string = hlsl_modifiers_to_string(ctx, invalid_modifiers))) - hlsl_error(ctx, modifiers_loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); - hlsl_release_string_buffer(ctx, string); + for (i = 0; i < v->arrays.count; ++i) + unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); }
- LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) + if (unbounded_res_array) { - bool unbounded_res_array = false; - unsigned int i; - - type = basic_type; - - if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) + if (v->arrays.count == 1) { - for (i = 0; i < v->arrays.count; ++i) - unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); + hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); + return; } - - if (unbounded_res_array) + else { - if (v->arrays.count == 1) - { - hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); - free_parse_variable_def(v); - continue; - } - else - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Unbounded resource arrays cannot be multi-dimensional."); - } + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Unbounded resource arrays cannot be multi-dimensional."); } - else + } + else + { + for (i = 0; i < v->arrays.count; ++i) { - for (i = 0; i < v->arrays.count; ++i) + if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) { - if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) - { - unsigned int size = initializer_size(&v->initializer); - unsigned int elem_components = hlsl_type_component_count(type); - - if (i < v->arrays.count - 1) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Only innermost array size can be implicit."); - free_parse_initializer(&v->initializer); - v->initializer.args_count = 0; - } - else if (elem_components == 0) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Cannot declare an implicit size array of a size 0 type."); - free_parse_initializer(&v->initializer); - v->initializer.args_count = 0; - } - else if (size == 0) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Implicit size arrays need to be initialized."); - free_parse_initializer(&v->initializer); - v->initializer.args_count = 0; + unsigned int size = initializer_size(&v->initializer); + unsigned int elem_components = hlsl_type_component_count(type);
- } - else if (size % elem_components != 0) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Cannot initialize implicit size array with %u components, expected a multiple of %u.", - size, elem_components); - free_parse_initializer(&v->initializer); - v->initializer.args_count = 0; - } - else - { - v->arrays.sizes[i] = size / elem_components; - } + if (i < v->arrays.count - 1) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Only innermost array size can be implicit."); + v->initializer.args_count = 0; + } + else if (elem_components == 0) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Cannot declare an implicit size array of a size 0 type."); + v->initializer.args_count = 0; + } + else if (size == 0) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Implicit size arrays need to be initialized."); + v->initializer.args_count = 0; + } + else if (size % elem_components != 0) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Cannot initialize implicit size array with %u components, expected a multiple of %u.", + size, elem_components); + v->initializer.args_count = 0; + } + else + { + v->arrays.sizes[i] = size / elem_components; } - type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); } + type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); } - vkd3d_free(v->arrays.sizes); + } + + if (!(var_name = vkd3d_strdup(v->name))) + return;
- if (!(var = hlsl_new_var(ctx, v->name, type, &v->loc, &v->semantic, modifiers, &v->reg_reservation))) + new_semantic = v->semantic; + if (v->semantic.name) + { + if (!(new_semantic.name = vkd3d_strdup(v->semantic.name))) { - free_parse_variable_def(v); - continue; + vkd3d_free(var_name); + return; } + }
- var->buffer = ctx->cur_buffer; + if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) + { + hlsl_cleanup_semantic(&new_semantic); + vkd3d_free(var_name); + return; + }
- if (var->buffer == ctx->globals_buffer) - { - if (var->reg_reservation.offset_type) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "packoffset() is only allowed inside constant buffer declarations."); - } + var->buffer = ctx->cur_buffer;
- if (ctx->cur_scope == ctx->globals) - { - local = false; + if (var->buffer == ctx->globals_buffer) + { + if (var->reg_reservation.offset_type) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is only allowed inside constant buffer declarations."); + }
- if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Variable '%s' is declared as both "uniform" and "static".", var->name); + if (ctx->cur_scope == ctx->globals) + { + local = false;
- /* Mark it as uniform. We need to do this here since synthetic - * variables also get put in the global scope, but shouldn't be - * considered uniforms, and we have no way of telling otherwise. */ - if (!(modifiers & HLSL_STORAGE_STATIC)) - var->storage_modifiers |= HLSL_STORAGE_UNIFORM; + if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Variable '%s' is declared as both "uniform" and "static".", var->name);
- if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && - type_has_object_components(var->data_type, true)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Target profile doesn't support objects as struct members in uniform variables.\n"); - } + /* Mark it as uniform. We need to do this here since synthetic + * variables also get put in the global scope, but shouldn't be + * considered uniforms, and we have no way of telling otherwise. */ + if (!(modifiers & HLSL_STORAGE_STATIC)) + var->storage_modifiers |= HLSL_STORAGE_UNIFORM;
- if ((func = hlsl_get_func_decl(ctx, var->name))) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, - "'%s' is already defined as a function.", var->name); - hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, - "'%s' was previously defined here.", var->name); - } - } - else + if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && + type_has_object_components(var->data_type, true)) { - static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED - | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM; - - if (modifiers & invalid) - { - struct vkd3d_string_buffer *string; + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Target profile doesn't support objects as struct members in uniform variables."); + }
- if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Modifiers '%s' are not allowed on local variables.", string->buffer); - hlsl_release_string_buffer(ctx, string); - } - if (var->semantic.name) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, - "Semantics are not allowed on local variables."); + if ((func = hlsl_get_func_decl(ctx, var->name))) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "'%s' is already defined as a function.", var->name); + hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, + "'%s' was previously defined here.", var->name); } + } + else + { + static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED + | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM;
- if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) - && type_has_object_components(var->data_type, false)) + if (modifiers & invalid) { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Static variables cannot have both numeric and resource components."); + struct vkd3d_string_buffer *string; + + if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers '%s' are not allowed on local variables.", string->buffer); + hlsl_release_string_buffer(ctx, string); } + if (var->semantic.name) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Semantics are not allowed on local variables.");
- if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count - && !(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_UNIFORM))) + if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count && !(modifiers & HLSL_STORAGE_STATIC)) { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, - "Const variable "%s" is missing an initializer.", var->name); - hlsl_free_var(var); - free_parse_initializer(&v->initializer); - vkd3d_free(v); - continue; + "Const variable "%s" is missing an initializer.", var->name); } + } + + if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) + && type_has_object_components(var->data_type, false)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Static variables cannot have both numeric and resource components."); + }
- if (!hlsl_add_var(ctx, var, local)) + if (!hlsl_add_var(ctx, var, local)) + { + struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); + + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "Variable "%s" was already declared in this scope.", var->name); + hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, ""%s" was previously declared here.", old->name); + hlsl_free_var(var); + return; + } +} + +static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var_list) +{ + struct parse_variable_def *v, *v_next; + struct hlsl_block *initializers; + struct hlsl_ir_var *var; + struct hlsl_type *type; + + if (!(initializers = make_empty_block(ctx))) + { + LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) { - struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); + free_parse_variable_def(v); + } + vkd3d_free(var_list); + return NULL; + }
- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, - "Variable "%s" was already declared in this scope.", var->name); - hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, ""%s" was previously declared here.", old->name); - hlsl_free_var(var); - free_parse_initializer(&v->initializer); - vkd3d_free(v); + LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) + { + /* If this fails, the variable failed to be declared. */ + if (!(var = hlsl_get_var(ctx->cur_scope, v->name))) + { + free_parse_variable_def(v); continue; } + type = var->data_type;
if (v->initializer.args_count) { @@ -2143,8 +2198,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, "Expected %u components in initializer, but got %u.", hlsl_type_component_count(type), size); - free_parse_initializer(&v->initializer); - vkd3d_free(v); + free_parse_variable_def(v); continue; }
@@ -2159,16 +2213,14 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc);
assert(v->initializer.args_count == 1); - list_add_tail(v->initializer.instrs, &load->node.entry); + hlsl_block_add_instr(v->initializer.instrs, &load->node); add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); }
- if (modifiers & HLSL_STORAGE_STATIC) - list_move_tail(&ctx->static_initializers.instrs, v->initializer.instrs); + if (var->storage_modifiers & HLSL_STORAGE_STATIC) + hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); else - list_move_tail(statements_list, v->initializer.instrs); - vkd3d_free(v->initializer.args); - vkd3d_free(v->initializer.instrs); + hlsl_block_add_block(initializers, v->initializer.instrs); } else if (var->storage_modifiers & HLSL_STORAGE_STATIC) { @@ -2178,34 +2230,35 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t
if (type_has_object_components(var->data_type, false)) { - vkd3d_free(v); + free_parse_variable_def(v); continue; }
if (!(zero = hlsl_new_uint_constant(ctx, 0, &var->loc))) { - vkd3d_free(v); + free_parse_variable_def(v); continue; } hlsl_block_add_instr(&ctx->static_initializers, zero);
- if (!(cast = add_cast(ctx, &ctx->static_initializers.instrs, zero, var->data_type, &var->loc))) + if (!(cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc))) { - vkd3d_free(v); + free_parse_variable_def(v); continue; }
if (!(store = hlsl_new_simple_store(ctx, var, cast))) { - vkd3d_free(v); + free_parse_variable_def(v); continue; } hlsl_block_add_instr(&ctx->static_initializers, store); } - vkd3d_free(v); + free_parse_variable_def(v); } + vkd3d_free(var_list); - return statements_list; + return initializers; }
struct find_function_call_args @@ -2394,18 +2447,18 @@ static bool intrinsic_all(struct hlsl_ctx *ctx,
if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) return false; - list_add_tail(params->instrs, &one->entry); + hlsl_block_add_instr(params->instrs, one);
if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->entry); + hlsl_block_add_instr(params->instrs, zero);
mul = one;
count = hlsl_type_component_count(arg->data_type); for (i = 0; i < count; ++i) { - if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) + if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) return false;
if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) @@ -2431,7 +2484,7 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, { if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->entry); + hlsl_block_add_instr(params->instrs, zero);
if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) return false; @@ -2442,14 +2495,14 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, { if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) return false; - list_add_tail(params->instrs, &bfalse->entry); + hlsl_block_add_instr(params->instrs, bfalse);
or = bfalse;
count = hlsl_type_component_count(arg->data_type); for (i = 0; i < count; ++i) { - if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) + if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) return false;
if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) @@ -2544,6 +2597,34 @@ static bool intrinsic_clamp(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, max, params->args[2], loc); }
+static bool intrinsic_clip(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *condition, *jump; + + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; + + condition = params->args[0]; + + if (ctx->profile->major_version < 4 && hlsl_type_component_count(condition->data_type) > 4) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, condition->data_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Argument type cannot exceed 4 components, got type "%s".", string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NEG, condition, loc))) + return false; + hlsl_block_add_instr(params->instrs, jump); + + return true; +} + static bool intrinsic_cos(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2579,26 +2660,26 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx,
if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) return false; - list_add_tail(params->instrs, &arg1_swzl1->entry); + hlsl_block_add_instr(params->instrs, arg1_swzl1);
if (!(arg2_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc))) return false; - list_add_tail(params->instrs, &arg2_swzl1->entry); + hlsl_block_add_instr(params->instrs, arg2_swzl1);
if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl1, arg2_swzl1, loc))) return false;
if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, loc))) return false; - list_add_tail(params->instrs, &mul1_neg->entry); + hlsl_block_add_instr(params->instrs, mul1_neg);
if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) return false; - list_add_tail(params->instrs, &arg1_swzl2->entry); + hlsl_block_add_instr(params->instrs, arg1_swzl2);
if (!(arg2_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc))) return false; - list_add_tail(params->instrs, &arg2_swzl2->entry); + hlsl_block_add_instr(params->instrs, arg2_swzl2);
if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl2, arg2_swzl2, loc))) return false; @@ -2617,6 +2698,28 @@ static bool intrinsic_ddx(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); }
+static bool intrinsic_ddx_coarse(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_COARSE, arg, loc); +} + +static bool intrinsic_ddx_fine(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_FINE, arg, loc); +} + static bool intrinsic_ddy(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2628,6 +2731,28 @@ static bool intrinsic_ddy(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); }
+static bool intrinsic_ddy_coarse(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_COARSE, arg, loc); +} + +static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); +} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2668,7 +2793,7 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, /* 1/ln(2) */ if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) return false; - list_add_tail(params->instrs, &coeff->entry); + hlsl_block_add_instr(params->instrs, coeff);
if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) return false; @@ -2715,7 +2840,7 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer
if (!(zero = hlsl_new_constant(ctx, div->data_type, &zero_value, loc))) return false; - list_add_tail(params->instrs, &zero->entry); + hlsl_block_add_instr(params->instrs, zero);
if (!(abs = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, div, loc))) return false; @@ -2806,7 +2931,7 @@ static bool intrinsic_lerp(struct hlsl_ctx *ctx, }
static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, - struct list *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + struct hlsl_block *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *log, *mul; @@ -2861,15 +2986,15 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, init_value.u[3].f = 1.0f; if (!(init = hlsl_new_constant(ctx, ret_type, &init_value, loc))) return false; - list_add_tail(params->instrs, &init->entry); + hlsl_block_add_instr(params->instrs, init);
if (!(store = hlsl_new_simple_store(ctx, var, init))) return false; - list_add_tail(params->instrs, &store->entry); + hlsl_block_add_instr(params->instrs, store);
if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->entry); + hlsl_block_add_instr(params->instrs, zero);
/* Diffuse component. */ if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, zero, loc))) @@ -2877,7 +3002,7 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx,
if (!hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse)) return false; - list_move_tail(params->instrs, &block.instrs); + hlsl_block_add_block(params->instrs, &block);
/* Specular component. */ if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_h, zero, loc))) @@ -2897,11 +3022,11 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx,
if (!hlsl_new_store_component(ctx, &block, &var_deref, 2, load)) return false; - list_move_tail(params->instrs, &block.instrs); + hlsl_block_add_block(params->instrs, &block);
if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &var_load->node.entry); + hlsl_block_add_instr(params->instrs, &var_load->node);
return true; } @@ -3034,10 +3159,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, { struct hlsl_ir_node *value1, *value2, *mul;
- if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) + if (!(value1 = hlsl_add_load_component(ctx, params->instrs, + cast1, j * cast1->data_type->dimx + k, loc))) return false;
- if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) + if (!(value2 = hlsl_add_load_component(ctx, params->instrs, + cast2, k * cast2->data_type->dimx + i, loc))) return false;
if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) @@ -3056,13 +3183,13 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx,
if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) return false; - list_move_tail(params->instrs, &block.instrs); + hlsl_block_add_block(params->instrs, &block); } }
if (!(load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &load->node.entry); + hlsl_block_add_instr(params->instrs, &load->node);
return !!add_implicit_conversion(ctx, params->instrs, &load->node, ret_type, loc); } @@ -3169,7 +3296,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx,
if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) return false; - list_add_tail(params->instrs, &zero->entry); + hlsl_block_add_instr(params->instrs, zero);
/* Check if 0 < arg, cast bool to int */
@@ -3229,7 +3356,7 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx,
if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) return false; - list_add_tail(params->instrs, &one->entry); + hlsl_block_add_instr(params->instrs, one);
if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, one, p_denom, loc))) return false; @@ -3242,11 +3369,11 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx,
if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) return false; - list_add_tail(params->instrs, &minus_two->entry); + hlsl_block_add_instr(params->instrs, minus_two);
if (!(three = hlsl_new_float_constant(ctx, 3.0, loc))) return false; - list_add_tail(params->instrs, &three->entry); + hlsl_block_add_instr(params->instrs, three);
if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, minus_two, p, loc))) return false; @@ -3308,7 +3435,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *
if (params->args_count == 4) { - hlsl_fixme(ctx, loc, "Samples with gradients are not implemented.\n"); + hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); }
sampler_type = params->args[0]->data_type; @@ -3335,7 +3462,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(params->instrs, &load->entry); + hlsl_block_add_instr(params->instrs, load); return true; }
@@ -3369,7 +3496,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx,
if ((string = hlsl_type_to_string(ctx, arg_type))) hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.\n", + "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.", string->buffer); hlsl_release_string_buffer(ctx, string); return false; @@ -3377,7 +3504,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx,
if (arg_type->class == HLSL_CLASS_SCALAR) { - list_add_tail(params->instrs, &arg->entry); + hlsl_block_add_instr(params->instrs, arg); return true; }
@@ -3393,18 +3520,18 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { struct hlsl_block block;
- if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) + if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) return false;
if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) return false; - list_move_tail(params->instrs, &block.instrs); + hlsl_block_add_block(params->instrs, &block); } }
if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &var_load->node.entry); + hlsl_block_add_instr(params->instrs, &var_load->node);
return true; } @@ -3444,13 +3571,13 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx,
if (!(c = hlsl_new_float_constant(ctx, 255.0f + (0.5f / 256.0f), loc))) return false; - list_add_tail(params->instrs, &c->entry); + hlsl_block_add_instr(params->instrs, c);
if (arg_type->class == HLSL_CLASS_VECTOR) { if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc))) return false; - list_add_tail(params->instrs, &swizzle->entry); + hlsl_block_add_instr(params->instrs, swizzle);
arg = swizzle; } @@ -3458,7 +3585,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) return false;
- if (ctx->profile->major_version >= 4) + if (shader_profile_version_ge(ctx, 4, 0)) return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc);
return true; @@ -3482,10 +3609,15 @@ intrinsic_functions[] = {"asfloat", 1, true, intrinsic_asfloat}, {"asuint", -1, true, intrinsic_asuint}, {"clamp", 3, true, intrinsic_clamp}, + {"clip", 1, true, intrinsic_clip}, {"cos", 1, true, intrinsic_cos}, {"cross", 2, true, intrinsic_cross}, {"ddx", 1, true, intrinsic_ddx}, + {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, + {"ddx_fine", 1, true, intrinsic_ddx_fine}, {"ddy", 1, true, intrinsic_ddy}, + {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, + {"ddy_fine", 1, true, intrinsic_ddy_fine}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, @@ -3527,7 +3659,14 @@ static int intrinsic_function_name_compare(const void *a, const void *b) return strcmp(a, func->name); }
-static struct list *add_call(struct hlsl_ctx *ctx, const char *name, +static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + + return hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc); +} + +static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, struct parse_initializer *args, const struct vkd3d_shader_location *loc) { struct intrinsic_function *intrinsic; @@ -3561,13 +3700,13 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name,
if (!(store = hlsl_new_simple_store(ctx, param, arg))) goto fail; - list_add_tail(args->instrs, &store->entry); + hlsl_block_add_instr(args->instrs, store); } }
if (!(call = hlsl_new_call(ctx, decl, loc))) goto fail; - list_add_tail(args->instrs, &call->entry); + hlsl_block_add_instr(args->instrs, call);
for (i = 0; i < decl->parameters.count; ++i) { @@ -3584,7 +3723,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name,
if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) goto fail; - list_add_tail(args->instrs, &load->node.entry); + hlsl_block_add_instr(args->instrs, &load->node);
if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) goto fail; @@ -3597,16 +3736,15 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name,
if (!(load = hlsl_new_var_load(ctx, decl->return_var, loc))) goto fail; - list_add_tail(args->instrs, &load->node.entry); + hlsl_block_add_instr(args->instrs, &load->node); } else { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; struct hlsl_ir_node *expr;
- if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc))) + if (!(expr = hlsl_new_void_expr(ctx, loc))) goto fail; - list_add_tail(args->instrs, &expr->entry); + hlsl_block_add_instr(args->instrs, expr); } } else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), @@ -3662,7 +3800,7 @@ fail: return NULL; }
-static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, +static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, struct parse_initializer *params, const struct vkd3d_shader_location *loc) { struct hlsl_ir_load *load; @@ -3692,7 +3830,7 @@ static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type
if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; - list_add_tail(params->instrs, &load->node.entry); + hlsl_block_add_instr(params->instrs, &load->node);
vkd3d_free(params->args); return params->instrs; @@ -3733,7 +3871,7 @@ static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct return false; }
-static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, +static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { const struct hlsl_type *object_type = object->data_type; @@ -3761,7 +3899,7 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru } if (multisampled) { - if (!(load_params.sample_index = add_implicit_conversion(ctx, instrs, params->args[1], + if (!(load_params.sample_index = add_implicit_conversion(ctx, block, params->args[1], hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc))) return false; } @@ -3769,7 +3907,7 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru assert(offset_dim); if (params->args_count > 1 + multisampled) { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[1 + multisampled], + if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[1 + multisampled], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; } @@ -3779,7 +3917,7 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru }
/* +1 for the mipmap level for non-multisampled textures */ - if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], + if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + !multisampled), loc))) return false;
@@ -3788,11 +3926,11 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(instrs, &load->entry); + hlsl_block_add_instr(block, load); return true; }
-static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, +static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { const struct hlsl_type *object_type = object->data_type; @@ -3829,13 +3967,13 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, st return false; }
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) return false;
if (offset_dim && params->args_count > 2) { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], + if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; } @@ -3851,12 +3989,12 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, st
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(instrs, &load->entry); + hlsl_block_add_instr(block, load);
return true; }
-static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, +static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { const struct hlsl_type *object_type = object->data_type; @@ -3899,17 +4037,17 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs return false; }
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) return false;
- if (!(load_params.cmp = add_implicit_conversion(ctx, instrs, params->args[2], + if (!(load_params.cmp = add_implicit_conversion(ctx, block, params->args[2], hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) load_params.cmp = params->args[2];
if (offset_dim && params->args_count > 3) { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], + if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; } @@ -3925,12 +4063,12 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(instrs, &load->entry); + hlsl_block_add_instr(block, load);
return true; }
-static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, +static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { const struct hlsl_type *object_type = object->data_type; @@ -3997,7 +4135,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, st } else if (offset_dim && params->args_count > 2) { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], + if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; } @@ -4022,7 +4160,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, st return false; }
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) return false;
@@ -4032,11 +4170,187 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, st
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(instrs, &load->entry); + hlsl_block_add_instr(block, load); + return true; +} + +static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *dest, + struct hlsl_ir_node *src, unsigned int component, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *load; + + if (!dest) + return true; + + if (!(load = hlsl_add_load_component(ctx, instrs, src, component, loc))) + return false; + + if (!add_assignment(ctx, instrs, dest, ASSIGN_OP_ASSIGN, load)) + return false; + return true; }
-static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, +static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + bool uint_resinfo, has_uint_arg, has_float_arg; + struct hlsl_resource_load_params load_params; + struct hlsl_ir_node *sample_info, *res_info; + struct hlsl_ir_node *zero = NULL, *void_ret; + struct hlsl_type *uint_type, *float_type; + unsigned int i, j; + enum func_argument + { + ARG_MIP_LEVEL, + ARG_WIDTH, + ARG_HEIGHT, + ARG_ELEMENT_COUNT, + ARG_LEVEL_COUNT, + ARG_SAMPLE_COUNT, + ARG_MAX_ARGS, + }; + struct hlsl_ir_node *args[ARG_MAX_ARGS] = { 0 }; + static const struct overload + { + enum hlsl_sampler_dim sampler_dim; + unsigned int args_count; + enum func_argument args[ARG_MAX_ARGS]; + } + overloads[] = + { + { HLSL_SAMPLER_DIM_1D, 1, { ARG_WIDTH } }, + { HLSL_SAMPLER_DIM_1D, 3, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_LEVEL_COUNT } }, + { HLSL_SAMPLER_DIM_1DARRAY, 2, { ARG_WIDTH, ARG_ELEMENT_COUNT } }, + { HLSL_SAMPLER_DIM_1DARRAY, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, + { HLSL_SAMPLER_DIM_2D, 2, { ARG_WIDTH, ARG_HEIGHT } }, + { HLSL_SAMPLER_DIM_2D, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_LEVEL_COUNT } }, + { HLSL_SAMPLER_DIM_2DARRAY, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, + { HLSL_SAMPLER_DIM_2DARRAY, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, + { HLSL_SAMPLER_DIM_3D, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, + { HLSL_SAMPLER_DIM_3D, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, + { HLSL_SAMPLER_DIM_CUBE, 2, { ARG_WIDTH, ARG_HEIGHT } }, + { HLSL_SAMPLER_DIM_CUBE, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_LEVEL_COUNT } }, + { HLSL_SAMPLER_DIM_CUBEARRAY, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, + { HLSL_SAMPLER_DIM_CUBEARRAY, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, + { HLSL_SAMPLER_DIM_2DMS, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_SAMPLE_COUNT } }, + { HLSL_SAMPLER_DIM_2DMSARRAY, 4, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_SAMPLE_COUNT } }, + }; + const struct overload *o = NULL; + + if (object_type->sampler_dim > HLSL_SAMPLER_DIM_LAST_TEXTURE) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "GetDimensions() is not defined for this type."); + } + + uint_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT); + float_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT); + has_uint_arg = has_float_arg = false; + for (i = 0; i < ARRAY_SIZE(overloads); ++i) + { + const struct overload *iter = &overloads[i]; + + if (iter->sampler_dim == object_type->sampler_dim && iter->args_count == params->args_count) + { + for (j = 0; j < params->args_count; ++j) + { + args[iter->args[j]] = params->args[j]; + + /* Input parameter. */ + if (iter->args[j] == ARG_MIP_LEVEL) + { + if (!(args[ARG_MIP_LEVEL] = add_implicit_conversion(ctx, block, args[ARG_MIP_LEVEL], + hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) + { + return false; + } + + continue; + } + + has_float_arg |= hlsl_types_are_equal(params->args[j]->data_type, float_type); + has_uint_arg |= hlsl_types_are_equal(params->args[j]->data_type, uint_type); + + if (params->args[j]->data_type->class != HLSL_CLASS_SCALAR) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected scalar arguments."); + break; + } + } + o = iter; + break; + } + } + uint_resinfo = !has_float_arg && has_uint_arg; + + if (!o) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, object_type))) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Unexpected number of arguments %u for %s.%s().", params->args_count, string->buffer, name); + hlsl_release_string_buffer(ctx, string); + } + } + + if (!args[ARG_MIP_LEVEL]) + { + if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) + return false; + hlsl_block_add_instr(block, zero); + args[ARG_MIP_LEVEL] = zero; + } + + memset(&load_params, 0, sizeof(load_params)); + load_params.type = HLSL_RESOURCE_RESINFO; + load_params.resource = object; + load_params.lod = args[ARG_MIP_LEVEL]; + load_params.format = hlsl_get_vector_type(ctx, uint_resinfo ? HLSL_TYPE_UINT : HLSL_TYPE_FLOAT, 4); + + if (!(res_info = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + hlsl_block_add_instr(block, res_info); + + if (!add_assignment_from_component(ctx, block, args[ARG_WIDTH], res_info, 0, loc)) + return false; + + if (!add_assignment_from_component(ctx, block, args[ARG_HEIGHT], res_info, 1, loc)) + return false; + + if (!add_assignment_from_component(ctx, block, args[ARG_ELEMENT_COUNT], res_info, + object_type->sampler_dim == HLSL_SAMPLER_DIM_1DARRAY ? 1 : 2, loc)) + { + return false; + } + + if (!add_assignment_from_component(ctx, block, args[ARG_LEVEL_COUNT], res_info, 3, loc)) + return false; + + if (args[ARG_SAMPLE_COUNT]) + { + memset(&load_params, 0, sizeof(load_params)); + load_params.type = HLSL_RESOURCE_SAMPLE_INFO; + load_params.resource = object; + load_params.format = args[ARG_SAMPLE_COUNT]->data_type; + if (!(sample_info = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + hlsl_block_add_instr(block, sample_info); + + if (!add_assignment(ctx, block, args[ARG_SAMPLE_COUNT], ASSIGN_OP_ASSIGN, sample_info)) + return false; + } + + if (!(void_ret = hlsl_new_void_expr(ctx, loc))) + return false; + hlsl_block_add_instr(block, void_ret); + + return true; +} + +static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { const struct hlsl_type *object_type = object->data_type; @@ -4078,17 +4392,17 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs return false; }
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) load_params.coords = params->args[1];
- if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], + if (!(load_params.lod = add_implicit_conversion(ctx, block, params->args[2], hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) load_params.lod = params->args[2];
if (offset_dim && params->args_count > 3) { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[3], + if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[3], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; } @@ -4102,11 +4416,11 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(instrs, &load->entry); + hlsl_block_add_instr(block, load); return true; }
-static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, +static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { const struct hlsl_type *object_type = object->data_type; @@ -4145,21 +4459,21 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instr return false; }
- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) load_params.coords = params->args[1];
- if (!(load_params.ddx = add_implicit_conversion(ctx, instrs, params->args[2], + if (!(load_params.ddx = add_implicit_conversion(ctx, block, params->args[2], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) load_params.ddx = params->args[2];
- if (!(load_params.ddy = add_implicit_conversion(ctx, instrs, params->args[3], + if (!(load_params.ddy = add_implicit_conversion(ctx, block, params->args[3], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) load_params.ddy = params->args[3];
if (offset_dim && params->args_count > 4) { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[4], + if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[4], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; } @@ -4173,14 +4487,14 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instr
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(instrs, &load->entry); + hlsl_block_add_instr(block, load); return true; }
static const struct method_function { const char *name; - bool (*handler)(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + bool (*handler)(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc); } object_methods[] = @@ -4191,6 +4505,8 @@ object_methods[] = { "GatherGreen", add_gather_method_call }, { "GatherRed", add_gather_method_call },
+ { "GetDimensions", add_getdimensions_method_call }, + { "Load", add_load_method_call },
{ "Sample", add_sample_method_call }, @@ -4208,7 +4524,7 @@ static int object_method_function_name_compare(const void *a, const void *b) return strcmp(a, func->name); }
-static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, +static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { const struct hlsl_type *object_type = object->data_type; @@ -4229,7 +4545,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl if ((method = bsearch(name, object_methods, ARRAY_SIZE(object_methods), sizeof(*method), object_method_function_name_compare))) { - return method->handler(ctx, instrs, object, name, params, loc); + return method->handler(ctx, block, object, name, params, loc); } else { @@ -4272,6 +4588,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type char *name; DWORD modifiers; struct hlsl_ir_node *instr; + struct hlsl_block *block; struct list *list; struct parse_fields fields; struct parse_function function; @@ -4399,38 +4716,9 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %token <intval> C_INTEGER %token <intval> PRE_LINE
-%type <list> add_expr -%type <list> assignment_expr -%type <list> bitand_expr -%type <list> bitor_expr -%type <list> bitxor_expr -%type <list> compound_statement -%type <list> conditional_expr -%type <list> declaration -%type <list> declaration_statement -%type <list> discard_statement -%type <list> equality_expr -%type <list> expr -%type <list> expr_optional -%type <list> expr_statement -%type <list> initializer_expr -%type <list> jump_statement -%type <list> logicand_expr -%type <list> logicor_expr -%type <list> loop_statement -%type <list> mul_expr -%type <list> postfix_expr -%type <list> primary_expr -%type <list> relational_expr -%type <list> selection_statement -%type <list> shift_expr -%type <list> statement -%type <list> statement_list -%type <list> struct_declaration %type <list> type_specs -%type <list> unary_expr %type <list> variables_def -%type <list> variables_def_optional +%type <list> variables_def_typed
%token <name> VAR_IDENTIFIER %token <name> NEW_IDENTIFIER @@ -4446,6 +4734,35 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type <attr_list> attribute_list %type <attr_list> attribute_list_optional
+%type <block> add_expr +%type <block> assignment_expr +%type <block> bitand_expr +%type <block> bitor_expr +%type <block> bitxor_expr +%type <block> compound_statement +%type <block> conditional_expr +%type <block> declaration +%type <block> declaration_statement +%type <block> equality_expr +%type <block> expr +%type <block> expr_optional +%type <block> expr_statement +%type <block> initializer_expr +%type <block> jump_statement +%type <block> logicand_expr +%type <block> logicor_expr +%type <block> loop_statement +%type <block> mul_expr +%type <block> postfix_expr +%type <block> primary_expr +%type <block> relational_expr +%type <block> shift_expr +%type <block> selection_statement +%type <block> statement +%type <block> statement_list +%type <block> struct_declaration_without_vars +%type <block> unary_expr + %type <boolval> boolean
%type <buffer_type> buffer_type @@ -4493,6 +4810,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type <variable_def> type_spec %type <variable_def> variable_decl %type <variable_def> variable_def +%type <variable_def> variable_def_typed
%%
@@ -4502,9 +4820,9 @@ hlsl_prog: | hlsl_prog buffer_declaration buffer_body | hlsl_prog declaration_statement { - if (!list_empty($2)) + if (!list_empty(&$2->instrs)) hlsl_fixme(ctx, &@2, "Uniform initializer."); - destroy_instr_list($2); + destroy_block($2); } | hlsl_prog preproc_directive | hlsl_prog ';' @@ -4561,25 +4879,19 @@ preproc_directive: } }
-struct_declaration: - var_modifiers struct_spec variables_def_optional ';' +struct_declaration_without_vars: + var_modifiers struct_spec ';' { - struct hlsl_type *type; - unsigned int modifiers = $1; + if (!$2->name) + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Anonymous struct type must declare a variable.");
- if (!$3) - { - if (!$2->name) - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Anonymous struct type must declare a variable."); - if (modifiers) - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Modifiers are not allowed on struct type declarations."); - } + if ($1) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers are not allowed on struct type declarations.");
- if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) + if (!($$ = make_empty_block(ctx))) YYABORT; - $$ = declare_vars(ctx, type, modifiers, &@1, $3); }
struct_spec: @@ -4686,7 +4998,7 @@ attribute: YYABORT; } $$->name = $2; - list_init(&$$->instrs); + hlsl_block_init(&$$->instrs); $$->loc = @$; $$->args_count = 0; } @@ -4701,8 +5013,8 @@ attribute: YYABORT; } $$->name = $2; - list_init(&$$->instrs); - list_move_tail(&$$->instrs, $4.instrs); + hlsl_block_init(&$$->instrs); + hlsl_block_add_block(&$$->instrs, $4.instrs); vkd3d_free($4.instrs); $$->loc = @$; $$->args_count = $4.args_count; @@ -4758,15 +5070,15 @@ func_declaration: "Function "%s" is already defined.", decl->func->name); hlsl_note(ctx, &decl->loc, VKD3D_SHADER_LOG_ERROR, ""%s" was previously defined here.", decl->func->name); - hlsl_free_instr_list($2); + destroy_block($2); } else { size_t i;
decl->has_body = true; - list_move_tail(&decl->body.instrs, $2); - vkd3d_free($2); + hlsl_block_add_block(&decl->body, $2); + destroy_block($2);
/* Semantics are taken from whichever definition has a body. * We can't just replace the hlsl_ir_var pointers, though: if @@ -4817,6 +5129,9 @@ func_prototype_no_attrs: struct hlsl_ir_var *var; struct hlsl_type *type;
+ /* Functions are unconditionally inlined. */ + modifiers &= ~HLSL_MODIFIER_INLINE; + if (modifiers & ~HLSL_MODIFIERS_MAJORITY_MASK) hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Only majority modifiers are allowed on functions."); @@ -4943,7 +5258,7 @@ func_prototype: compound_statement: '{' '}' { - if (!($$ = make_empty_list(ctx))) + if (!($$ = make_empty_block(ctx))) YYABORT; } | '{' scope_start statement_list '}' @@ -5261,7 +5576,12 @@ type_no_void: { validate_texture_format_type(ctx, $3, &@3);
- /* TODO: unspecified sample count is not allowed for all targets */ + if (shader_profile_version_lt(ctx, 4, 1)) + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); + } + $$ = hlsl_new_texture_type(ctx, $1, $3, 0); } | texture_ms_type '<' type ',' shift_expr '>' @@ -5270,7 +5590,7 @@ type_no_void: struct hlsl_block block;
hlsl_block_init(&block); - list_move_tail(&block.instrs, $5); + hlsl_block_add_block(&block, $5);
sample_count = evaluate_static_expression_as_uint(ctx, &block, &@5);
@@ -5325,7 +5645,7 @@ type_no_void: $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); if ($$->is_minimum_precision) { - if (ctx->profile->major_version < 4) + if (shader_profile_version_lt(ctx, 4, 0)) { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Target profile doesn't support minimum-precision types."); @@ -5354,10 +5674,10 @@ type:
declaration_statement: declaration - | struct_declaration + | struct_declaration_without_vars | typedef { - if (!($$ = make_empty_list(ctx))) + if (!($$ = make_empty_block(ctx))) YYABORT; }
@@ -5416,22 +5736,11 @@ type_spec: }
declaration: - var_modifiers type variables_def ';' + variables_def_typed ';' { - struct hlsl_type *type; - unsigned int modifiers = $1; - - if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) + if (!($$ = initialize_vars(ctx, $1))) YYABORT; - $$ = declare_vars(ctx, type, modifiers, &@1, $3); - } - -variables_def_optional: - %empty - { - $$ = NULL; } - | variables_def
variables_def: variable_def @@ -5446,6 +5755,33 @@ variables_def: list_add_tail($$, &$3->entry); }
+variables_def_typed: + variable_def_typed + { + if (!($$ = make_empty_list(ctx))) + YYABORT; + list_add_head($$, &$1->entry); + + declare_var(ctx, $1); + } + | variables_def_typed ',' variable_def + { + struct parse_variable_def *head_def; + + assert(!list_empty($1)); + head_def = LIST_ENTRY(list_head($1), struct parse_variable_def, entry); + + assert(head_def->basic_type); + $3->basic_type = head_def->basic_type; + $3->modifiers = head_def->modifiers; + $3->modifiers_loc = head_def->modifiers_loc; + + declare_var(ctx, $3); + + $$ = $1; + list_add_tail($$, &$3->entry); + } + variable_decl: any_identifier arrays colon_attribute { @@ -5461,7 +5797,7 @@ state: any_identifier '=' expr ';' { vkd3d_free($1); - hlsl_free_instr_list($3); + destroy_block($3); }
state_block_start: @@ -5487,6 +5823,38 @@ variable_def: ctx->in_state_block = 0; }
+variable_def_typed: + var_modifiers struct_spec variable_def + { + unsigned int modifiers = $1; + struct hlsl_type *type; + + if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) + YYABORT; + + check_invalid_in_out_modifiers(ctx, modifiers, &@1); + + $$ = $3; + $$->basic_type = type; + $$->modifiers = modifiers; + $$->modifiers_loc = @1; + } + | var_modifiers type variable_def + { + unsigned int modifiers = $1; + struct hlsl_type *type; + + if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) + YYABORT; + + check_invalid_in_out_modifiers(ctx, modifiers, &@1); + + $$ = $3; + $$->basic_type = type; + $$->modifiers = modifiers; + $$->modifiers_loc = @1; + } + arrays: %empty { @@ -5495,17 +5863,12 @@ arrays: } | '[' expr ']' arrays { - struct hlsl_block block; uint32_t *new_array; unsigned int size;
- hlsl_clone_block(ctx, &block, &ctx->static_initializers); - list_move_tail(&block.instrs, $2); + size = evaluate_static_expression_as_uint(ctx, $2, &@2);
- size = evaluate_static_expression_as_uint(ctx, &block, &@2); - - hlsl_block_cleanup(&block); - vkd3d_free($2); + destroy_block($2);
$$ = $4;
@@ -5610,6 +5973,10 @@ var_modifiers: { $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, &@1); } + | KW_INLINE var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_INLINE, &@1); + }
complex_initializer: @@ -5618,10 +5985,10 @@ complex_initializer: $$.args_count = 1; if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) { - destroy_instr_list($1); + destroy_block($1); YYABORT; } - $$.args[0] = node_from_list($1); + $$.args[0] = node_from_block($1); $$.instrs = $1; $$.braces = false; } @@ -5653,7 +6020,7 @@ complex_initializer_list: $$.args = new_args; for (i = 0; i < $3.args_count; ++i) $$.args[$$.args_count++] = $3.args[i]; - list_move_tail($$.instrs, $3.instrs); + hlsl_block_add_block($$.instrs, $3.instrs); free_parse_initializer(&$3); }
@@ -5666,10 +6033,10 @@ initializer_expr_list: $$.args_count = 1; if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) { - destroy_instr_list($1); + destroy_block($1); YYABORT; } - $$.args[0] = node_from_list($1); + $$.args[0] = node_from_block($1); $$.instrs = $1; $$.braces = false; } @@ -5681,13 +6048,13 @@ initializer_expr_list: if (!(new_args = hlsl_realloc(ctx, $$.args, ($$.args_count + 1) * sizeof(*$$.args)))) { free_parse_initializer(&$$); - destroy_instr_list($3); + destroy_block($3); YYABORT; } $$.args = new_args; - $$.args[$$.args_count++] = node_from_list($3); - list_move_tail($$.instrs, $3); - vkd3d_free($3); + $$.args[$$.args_count++] = node_from_block($3); + hlsl_block_add_block($$.instrs, $3); + destroy_block($3); }
boolean: @@ -5705,15 +6072,14 @@ statement_list: | statement_list statement { $$ = $1; - list_move_tail($$, $2); - vkd3d_free($2); + hlsl_block_add_block($$, $2); + destroy_block($2); }
statement: declaration_statement | expr_statement | compound_statement - | discard_statement | jump_statement | selection_statement | loop_statement @@ -5721,47 +6087,67 @@ statement: jump_statement: KW_RETURN expr ';' { - if (!add_return(ctx, $2, node_from_list($2), &@1)) - YYABORT; $$ = $2; + if (!add_return(ctx, $$, node_from_block($$), &@1)) + YYABORT; } | KW_RETURN ';' { - if (!($$ = make_empty_list(ctx))) + if (!($$ = make_empty_block(ctx))) YYABORT; if (!add_return(ctx, $$, NULL, &@1)) YYABORT; } - -discard_statement: - KW_DISCARD ';' + | KW_DISCARD ';' { - struct hlsl_ir_node *discard; + struct hlsl_ir_node *discard, *c;
- if (!($$ = make_empty_list(ctx))) + if (!($$ = make_empty_block(ctx))) YYABORT; - if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, &@1))) + + if (!(c = hlsl_new_uint_constant(ctx, ~0u, &@1))) + return false; + hlsl_block_add_instr($$, c); + + if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NZ, c, &@1))) return false; - list_add_tail($$, &discard->entry); + hlsl_block_add_instr($$, discard); }
selection_statement: - KW_IF '(' expr ')' if_body + attribute_list_optional KW_IF '(' expr ')' if_body { - struct hlsl_ir_node *condition = node_from_list($3); - struct hlsl_block then_block, else_block; + struct hlsl_ir_node *condition = node_from_block($4); + const struct parse_attribute_list *attributes = &$1; struct hlsl_ir_node *instr; + unsigned int i; + + if (attribute_list_has_duplicates(attributes)) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); + + for (i = 0; i < attributes->count; ++i) + { + const struct hlsl_attribute *attr = attributes->attrs[i];
- hlsl_block_init(&then_block); - list_move_tail(&then_block.instrs, $5.then_block); - hlsl_block_init(&else_block); - if ($5.else_block) - list_move_tail(&else_block.instrs, $5.else_block); - vkd3d_free($5.then_block); - vkd3d_free($5.else_block); + if (!strcmp(attr->name, "branch") + || !strcmp(attr->name, "flatten")) + { + hlsl_warning(ctx, &@1, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, "Unhandled attribute '%s'.", attr->name); + } + else + { + hlsl_warning(ctx, &@1, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, "Unrecognized attribute '%s'.", attr->name); + } + }
- if (!(instr = hlsl_new_if(ctx, condition, &then_block, &else_block, &@1))) + if (!(instr = hlsl_new_if(ctx, condition, $6.then_block, $6.else_block, &@2))) + { + destroy_block($6.then_block); + destroy_block($6.else_block); YYABORT; + } + destroy_block($6.then_block); + destroy_block($6.else_block); if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) { struct vkd3d_string_buffer *string; @@ -5771,8 +6157,8 @@ selection_statement: "if condition type %s is not scalar.", string->buffer); hlsl_release_string_buffer(ctx, string); } - $$ = $3; - list_add_tail($$, &instr->entry); + $$ = $4; + hlsl_block_add_instr($$, instr); }
if_body: @@ -5810,7 +6196,7 @@ loop_statement: expr_optional: %empty { - if (!($$ = make_empty_list(ctx))) + if (!($$ = make_empty_block(ctx))) YYABORT; } | expr @@ -5826,7 +6212,7 @@ func_arguments: { $$.args = NULL; $$.args_count = 0; - if (!($$.instrs = make_empty_list(ctx))) + if (!($$.instrs = make_empty_block(ctx))) YYABORT; $$.braces = false; } @@ -5839,7 +6225,7 @@ primary_expr:
if (!(c = hlsl_new_float_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, c))) + if (!($$ = make_block(ctx, c))) YYABORT; } | C_INTEGER @@ -5848,7 +6234,7 @@ primary_expr:
if (!(c = hlsl_new_int_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, c))) + if (!($$ = make_block(ctx, c))) YYABORT; } | boolean @@ -5857,7 +6243,7 @@ primary_expr:
if (!(c = hlsl_new_bool_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, c))) + if (!($$ = make_block(ctx, c))) { hlsl_free_instr(c); YYABORT; @@ -5875,7 +6261,7 @@ primary_expr: } if (!(load = hlsl_new_var_load(ctx, var, &@1))) YYABORT; - if (!($$ = make_list(ctx, &load->node))) + if (!($$ = make_block(ctx, &load->node))) YYABORT; } | '(' expr ')' @@ -5903,7 +6289,7 @@ primary_expr: YYABORT; if (!(load = hlsl_new_var_load(ctx, var, &@1))) YYABORT; - if (!($$ = make_list(ctx, &load->node))) + if (!($$ = make_block(ctx, &load->node))) YYABORT; } else @@ -5919,7 +6305,7 @@ postfix_expr: { if (!add_increment(ctx, $1, false, true, &@2)) { - destroy_instr_list($1); + destroy_block($1); YYABORT; } $$ = $1; @@ -5928,14 +6314,14 @@ postfix_expr: { if (!add_increment(ctx, $1, true, true, &@2)) { - destroy_instr_list($1); + destroy_block($1); YYABORT; } $$ = $1; } | postfix_expr '.' any_identifier { - struct hlsl_ir_node *node = node_from_list($1); + struct hlsl_ir_node *node = node_from_block($1);
if (node->data_type->class == HLSL_CLASS_STRUCT) { @@ -5963,7 +6349,7 @@ postfix_expr: hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid swizzle "%s".", $3); YYABORT; } - list_add_tail($1, &swizzle->entry); + hlsl_block_add_instr($1, swizzle); $$ = $1; } else @@ -5974,17 +6360,17 @@ postfix_expr: } | postfix_expr '[' expr ']' { - struct hlsl_ir_node *array = node_from_list($1), *index = node_from_list($3); + struct hlsl_ir_node *array = node_from_block($1), *index = node_from_block($3);
- list_move_head($1, $3); - vkd3d_free($3); + hlsl_block_add_block($3, $1); + destroy_block($1);
- if (!add_array_access(ctx, $1, array, index, &@2)) + if (!add_array_access(ctx, $3, array, index, &@2)) { - destroy_instr_list($1); + destroy_block($3); YYABORT; } - $$ = $1; + $$ = $3; }
/* var_modifiers is necessary to avoid shift/reduce conflicts. */ @@ -6025,14 +6411,14 @@ postfix_expr: } | postfix_expr '.' any_identifier '(' func_arguments ')' { - struct hlsl_ir_node *object = node_from_list($1); + struct hlsl_ir_node *object = node_from_block($1);
- list_move_tail($1, $5.instrs); + hlsl_block_add_block($1, $5.instrs); vkd3d_free($5.instrs);
if (!add_method_call(ctx, $1, object, $3, &$5, &@3)) { - hlsl_free_instr_list($1); + destroy_block($1); vkd3d_free($5.args); YYABORT; } @@ -6046,7 +6432,7 @@ unary_expr: { if (!add_increment(ctx, $2, false, false, &@1)) { - destroy_instr_list($2); + destroy_block($2); YYABORT; } $$ = $2; @@ -6055,7 +6441,7 @@ unary_expr: { if (!add_increment(ctx, $2, true, false, &@1)) { - destroy_instr_list($2); + destroy_block($2); YYABORT; } $$ = $2; @@ -6066,23 +6452,23 @@ unary_expr: } | '-' unary_expr { - add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_list($2), &@1); + add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_block($2), &@1); $$ = $2; } | '~' unary_expr { - add_unary_bitwise_expr(ctx, $2, HLSL_OP1_BIT_NOT, node_from_list($2), &@1); + add_unary_bitwise_expr(ctx, $2, HLSL_OP1_BIT_NOT, node_from_block($2), &@1); $$ = $2; } | '!' unary_expr { - add_unary_logical_expr(ctx, $2, HLSL_OP1_LOGIC_NOT, node_from_list($2), &@1); + add_unary_logical_expr(ctx, $2, HLSL_OP1_LOGIC_NOT, node_from_block($2), &@1); $$ = $2; } /* var_modifiers is necessary to avoid shift/reduce conflicts. */ | '(' var_modifiers type arrays ')' unary_expr { - struct hlsl_type *src_type = node_from_list($6)->data_type; + struct hlsl_type *src_type = node_from_block($6)->data_type; struct hlsl_type *dst_type; unsigned int i;
@@ -6118,9 +6504,9 @@ unary_expr: YYABORT; }
- if (!add_cast(ctx, $6, node_from_list($6), dst_type, &@3)) + if (!add_cast(ctx, $6, node_from_block($6), dst_type, &@3)) { - hlsl_free_instr_list($6); + destroy_block($6); YYABORT; } $$ = $6; @@ -6130,120 +6516,121 @@ mul_expr: unary_expr | mul_expr '*' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); } | mul_expr '/' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); } | mul_expr '%' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); }
add_expr: mul_expr | add_expr '+' mul_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); } | add_expr '-' mul_expr { struct hlsl_ir_node *neg;
- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), &@2))) + if (!(neg = add_unary_arithmetic_expr(ctx, $3, HLSL_OP1_NEG, node_from_block($3), &@2))) YYABORT; - list_add_tail($3, &neg->entry); - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); }
shift_expr: add_expr | shift_expr OP_LEFTSHIFT add_expr { - $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); } | shift_expr OP_RIGHTSHIFT add_expr { - $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); }
relational_expr: shift_expr | relational_expr '<' shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); } | relational_expr '>' shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); + $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); } | relational_expr OP_LE shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); + $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); } | relational_expr OP_GE shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); }
equality_expr: relational_expr | equality_expr OP_EQ relational_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); } | equality_expr OP_NE relational_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); }
bitand_expr: equality_expr | bitand_expr '&' equality_expr { - $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); }
bitxor_expr: bitand_expr | bitxor_expr '^' bitand_expr { - $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); }
bitor_expr: bitxor_expr | bitor_expr '|' bitxor_expr { - $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); }
logicand_expr: bitor_expr | logicand_expr OP_AND bitor_expr { - $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); }
logicor_expr: logicand_expr | logicor_expr OP_OR logicand_expr { - $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); }
conditional_expr: logicor_expr | logicor_expr '?' expr ':' assignment_expr { - struct hlsl_ir_node *cond = node_from_list($1), *first = node_from_list($3), *second = node_from_list($5); + struct hlsl_ir_node *cond = node_from_block($1); + struct hlsl_ir_node *first = node_from_block($3); + struct hlsl_ir_node *second = node_from_block($5); struct hlsl_type *common_type;
- list_move_tail($1, $3); - list_move_tail($1, $5); - vkd3d_free($3); - vkd3d_free($5); + hlsl_block_add_block($1, $3); + hlsl_block_add_block($1, $5); + destroy_block($3); + destroy_block($5);
if (!(common_type = get_common_numeric_type(ctx, first, second, &@3))) YYABORT; @@ -6264,15 +6651,15 @@ assignment_expr: conditional_expr | unary_expr assign_op assignment_expr { - struct hlsl_ir_node *lhs = node_from_list($1), *rhs = node_from_list($3); + struct hlsl_ir_node *lhs = node_from_block($1), *rhs = node_from_block($3);
if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) { hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Statement modifies a const expression."); YYABORT; } - list_move_tail($3, $1); - vkd3d_free($1); + hlsl_block_add_block($3, $1); + destroy_block($1); if (!add_assignment(ctx, $3, lhs, $2, rhs)) YYABORT; $$ = $3; @@ -6329,6 +6716,6 @@ expr: | expr ',' assignment_expr { $$ = $1; - list_move_tail($$, $3); - vkd3d_free($3); + hlsl_block_add_block($$, $3); + destroy_block($3); } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 765b1907426..bae8e5f9a5f 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -97,6 +97,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, const struct vkd3d_shader_location *loc) { + enum hlsl_regset regset = hlsl_type_get_regset(deref->data_type); struct hlsl_ir_node *offset = NULL; struct hlsl_type *type; unsigned int i; @@ -111,7 +112,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st struct hlsl_block idx_block;
if (!(offset = new_offset_from_path_index(ctx, &idx_block, type, offset, deref->path[i].node, - deref->offset_regset, loc))) + regset, loc))) return NULL;
hlsl_block_add_block(block, &idx_block); @@ -126,7 +127,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *instr) { - const struct hlsl_type *type; + struct hlsl_type *type; struct hlsl_ir_node *offset; struct hlsl_block block;
@@ -145,7 +146,7 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der return true; }
- deref->offset_regset = hlsl_type_get_regset(type); + deref->data_type = type;
if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &instr->loc))) return false; @@ -160,7 +161,7 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der /* Split uniforms into two variables representing the constant and temp * registers, and copy the former to the latter, so that writes to uniforms * work. */ -static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *temp) +static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *temp) { struct vkd3d_string_buffer *name; struct hlsl_ir_var *uniform; @@ -187,7 +188,7 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru
if (!(load = hlsl_new_var_load(ctx, uniform, &temp->loc))) return; - list_add_head(instrs, &load->node.entry); + list_add_head(&block->instrs, &load->node.entry);
if (!(store = hlsl_new_simple_store(ctx, temp, &load->node))) return; @@ -300,7 +301,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir return ext_var; }
-static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, +static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; @@ -320,9 +321,10 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct if (!semantic->name) return;
- vector_type_src = hlsl_get_vector_type(ctx, type->base_type, - (ctx->profile->major_version < 4) ? 4 : hlsl_type_minor_size(type)); vector_type_dst = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); + vector_type_src = vector_type_dst; + if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) + vector_type_src = hlsl_get_vector_type(ctx, type->base_type, 4);
for (i = 0; i < hlsl_type_major_size(type); ++i) { @@ -363,7 +365,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct } }
-static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, +static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct vkd3d_shader_location *loc = &lhs->node.loc; @@ -405,30 +407,30 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs return; list_add_after(&c->entry, &element_load->node.entry);
- prepend_input_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); + prepend_input_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); } } else { - prepend_input_copy(ctx, instrs, lhs, modifiers, semantic, semantic_index); + prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index); } }
/* Split inputs into two variables representing the semantic and temp registers, * and copy the former to the latter, so that writes to input variables work. */ -static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) +static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) { struct hlsl_ir_load *load;
/* This redundant load is expected to be deleted later by DCE. */ if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; - list_add_head(instrs, &load->node.entry); + list_add_head(&block->instrs, &load->node.entry);
- prepend_input_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); + prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); }
-static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, +static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct hlsl_type *type = rhs->node.data_type, *vector_type; @@ -463,11 +465,11 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct { if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - list_add_tail(instrs, &c->entry); + hlsl_block_add_instr(block, c);
if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) return; - list_add_tail(instrs, &load->node.entry); + hlsl_block_add_instr(block, &load->node); } else { @@ -475,16 +477,16 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct
if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) return; - list_add_tail(instrs, &load->node.entry); + hlsl_block_add_instr(block, &load->node); }
if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) return; - list_add_tail(instrs, &store->entry); + hlsl_block_add_instr(block, store); } }
-static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, +static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct vkd3d_shader_location *loc = &rhs->node.loc; @@ -519,34 +521,34 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs
if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - list_add_tail(instrs, &c->entry); + hlsl_block_add_instr(block, c);
if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) return; - list_add_tail(instrs, &element_load->node.entry); + hlsl_block_add_instr(block, &element_load->node);
- append_output_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); + append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); } } else { - append_output_copy(ctx, instrs, rhs, modifiers, semantic, semantic_index); + append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index); } }
/* Split outputs into two variables representing the temp and semantic * registers, and copy the former to the latter, so that reads from output * variables work. */ -static void append_output_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) +static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) { struct hlsl_ir_load *load;
/* This redundant load is expected to be deleted later by DCE. */ if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; - list_add_tail(instrs, &load->node.entry); + hlsl_block_add_instr(block, &load->node);
- append_output_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); + append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); }
bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), @@ -573,6 +575,37 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, return progress; }
+typedef bool (*PFN_lower_func)(struct hlsl_ctx *, struct hlsl_ir_node *, struct hlsl_block *); + +static bool call_lower_func(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + PFN_lower_func func = context; + struct hlsl_block block; + + hlsl_block_init(&block); + if (func(ctx, instr, &block)) + { + struct hlsl_ir_node *replacement = LIST_ENTRY(list_tail(&block.instrs), struct hlsl_ir_node, entry); + + list_move_before(&instr->entry, &block.instrs); + hlsl_replace_node(instr, replacement); + return true; + } + else + { + hlsl_block_cleanup(&block); + return false; + } +} + +/* Specific form of transform_ir() for passes which convert a single instruction + * to a block of one or more instructions. This helper takes care of setting up + * the block and calling hlsl_replace_node_with_block(). */ +static bool lower_ir(struct hlsl_ctx *ctx, PFN_lower_func func, struct hlsl_block *block) +{ + return hlsl_transform_ir(ctx, call_lower_func, block, func); +} + static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { bool res; @@ -666,7 +699,7 @@ static void insert_early_return_break(struct hlsl_ctx *ctx, return; list_add_after(&cf_instr->entry, &load->node.entry);
- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &cf_instr->loc))) + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc))) return; hlsl_block_add_instr(&then_block, jump);
@@ -1689,7 +1722,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ { struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr);
- if (!(load->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (!load->resource.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, "Loaded resource must have a single uniform source."); @@ -1704,7 +1737,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_
if (load->sampler.var) { - if (!(load->sampler.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (!load->sampler.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, "Resource load sampler must have a single uniform source."); @@ -1722,7 +1755,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ { struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr);
- if (!(store->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (!store->resource.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, "Accessed resource must have a single uniform source."); @@ -1889,7 +1922,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
if (rhs->type != HLSL_IR_LOAD) { - hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type.\n"); + hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type."); return false; }
@@ -2066,6 +2099,137 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir return false; }
+/* Lower combined samples and sampler variables to synthesized separated textures and samplers. + * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ +static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_resource_load *load; + struct vkd3d_string_buffer *name; + struct hlsl_ir_var *var; + unsigned int i; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + load = hlsl_ir_resource_load(instr); + + switch (load->load_type) + { + case HLSL_RESOURCE_LOAD: + case HLSL_RESOURCE_GATHER_RED: + case HLSL_RESOURCE_GATHER_GREEN: + case HLSL_RESOURCE_GATHER_BLUE: + case HLSL_RESOURCE_GATHER_ALPHA: + case HLSL_RESOURCE_RESINFO: + case HLSL_RESOURCE_SAMPLE_CMP: + case HLSL_RESOURCE_SAMPLE_CMP_LZ: + case HLSL_RESOURCE_SAMPLE_GRAD: + case HLSL_RESOURCE_SAMPLE_INFO: + return false; + + case HLSL_RESOURCE_SAMPLE: + case HLSL_RESOURCE_SAMPLE_LOD: + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + break; + } + if (load->sampler.var) + return false; + + if (!hlsl_type_is_resource(load->resource.var->data_type)) + { + hlsl_fixme(ctx, &instr->loc, "Lower combined samplers within structs."); + return false; + } + + assert(hlsl_type_get_regset(load->resource.var->data_type) == HLSL_REGSET_SAMPLERS); + + if (!(name = hlsl_get_string_buffer(ctx))) + return false; + vkd3d_string_buffer_printf(name, "<resource>%s", load->resource.var->name); + + TRACE("Lowering to separate resource %s.\n", debugstr_a(name->buffer)); + + if (!(var = hlsl_get_var(ctx->globals, name->buffer))) + { + struct hlsl_type *texture_array_type = hlsl_new_texture_type(ctx, load->sampling_dim, + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0); + + /* Create (possibly multi-dimensional) texture array type with the same dims as the sampler array. */ + struct hlsl_type *arr_type = load->resource.var->data_type; + for (i = 0; i < load->resource.path_len; ++i) + { + assert(arr_type->class == HLSL_CLASS_ARRAY); + texture_array_type = hlsl_new_array_type(ctx, texture_array_type, arr_type->e.array.elements_count); + arr_type = arr_type->e.array.type; + } + + if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, texture_array_type, &instr->loc, false))) + { + hlsl_release_string_buffer(ctx, name); + return false; + } + var->is_uniform = 1; + var->is_separated_resource = true; + + list_add_tail(&ctx->extern_vars, &var->extern_entry); + } + hlsl_release_string_buffer(ctx, name); + + if (load->sampling_dim != var->data_type->sampler_dim) + { + hlsl_error(ctx, &load->node.loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER, + "Cannot split combined samplers from "%s" if they have different usage dimensions.", + load->resource.var->name); + hlsl_note(ctx, &var->loc, VKD3D_SHADER_LOG_ERROR, "First use as combined sampler is here."); + return false; + + } + + hlsl_copy_deref(ctx, &load->sampler, &load->resource); + load->resource.var = var; + assert(hlsl_deref_get_type(ctx, &load->resource)->base_type == HLSL_TYPE_TEXTURE); + assert(hlsl_deref_get_type(ctx, &load->sampler)->base_type == HLSL_TYPE_SAMPLER); + + return true; +} + +static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl_ir_var *to_add, + enum hlsl_regset regset) +{ + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, list, struct hlsl_ir_var, extern_entry) + { + if (var->bind_count[regset] < to_add->bind_count[regset]) + { + list_add_before(&var->extern_entry, &to_add->extern_entry); + return; + } + } + + list_add_tail(list, &to_add->extern_entry); +} + +static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) +{ + struct list separated_resources; + struct hlsl_ir_var *var, *next; + + list_init(&separated_resources); + + LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_separated_resource) + { + list_remove(&var->extern_entry); + insert_ensuring_decreasing_bind_count(&separated_resources, var, HLSL_REGSET_TEXTURES); + } + } + + list_move_head(&ctx->extern_vars, &separated_resources); + + return false; +} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -2264,7 +2428,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return true; }
-struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, +struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) { struct hlsl_block then_block, else_block; @@ -2290,18 +2454,18 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *ins
if (!(iff = hlsl_new_if(ctx, condition, &then_block, &else_block, &condition->loc))) return NULL; - list_add_tail(instrs, &iff->entry); + hlsl_block_add_instr(instrs, iff);
if (!(load = hlsl_new_var_load(ctx, var, &condition->loc))) return NULL; - list_add_tail(instrs, &load->node.entry); + hlsl_block_add_instr(instrs, &load->node);
return &load->node; }
-static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { - struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; + struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; struct hlsl_type *type = instr->data_type, *utype; struct hlsl_constant_value high_bit_value; struct hlsl_ir_expr *expr; @@ -2322,56 +2486,52 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
if (!(xor = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_XOR, arg1, arg2))) return false; - list_add_before(&instr->entry, &xor->entry); + hlsl_block_add_instr(block, xor);
for (i = 0; i < type->dimx; ++i) high_bit_value.u[i].u = 0x80000000; if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) return false; - list_add_before(&instr->entry, &high_bit->entry); + hlsl_block_add_instr(block, high_bit);
if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, high_bit))) return false; - list_add_before(&instr->entry, &and->entry); + hlsl_block_add_instr(block, and);
if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) return false; - list_add_before(&instr->entry, &abs1->entry); + hlsl_block_add_instr(block, abs1);
if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast1->entry); + hlsl_block_add_instr(block, cast1);
if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) return false; - list_add_before(&instr->entry, &abs2->entry); + hlsl_block_add_instr(block, abs2);
if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast2->entry); + hlsl_block_add_instr(block, cast2);
if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, cast1, cast2))) return false; - list_add_before(&instr->entry, &div->entry); + hlsl_block_add_instr(block, div);
if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) return false; - list_add_before(&instr->entry, &cast3->entry); + hlsl_block_add_instr(block, cast3);
if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) return false; - list_add_before(&instr->entry, &neg->entry); + hlsl_block_add_instr(block, neg);
- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) - return false; - hlsl_replace_node(instr, cond); - - return true; + return hlsl_add_conditional(ctx, block, and, neg, cast3); }
-static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { - struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; + struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; struct hlsl_type *type = instr->data_type, *utype; struct hlsl_constant_value high_bit_value; struct hlsl_ir_expr *expr; @@ -2394,45 +2554,41 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, high_bit_value.u[i].u = 0x80000000; if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) return false; - list_add_before(&instr->entry, &high_bit->entry); + hlsl_block_add_instr(block, high_bit);
if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, high_bit))) return false; - list_add_before(&instr->entry, &and->entry); + hlsl_block_add_instr(block, and);
if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) return false; - list_add_before(&instr->entry, &abs1->entry); + hlsl_block_add_instr(block, abs1);
if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast1->entry); + hlsl_block_add_instr(block, cast1);
if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) return false; - list_add_before(&instr->entry, &abs2->entry); + hlsl_block_add_instr(block, abs2);
if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast2->entry); + hlsl_block_add_instr(block, cast2);
if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, cast1, cast2))) return false; - list_add_before(&instr->entry, &div->entry); + hlsl_block_add_instr(block, div);
if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) return false; - list_add_before(&instr->entry, &cast3->entry); + hlsl_block_add_instr(block, cast3);
if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) return false; - list_add_before(&instr->entry, &neg->entry); + hlsl_block_add_instr(block, neg);
- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) - return false; - hlsl_replace_node(instr, cond); - - return true; + return hlsl_add_conditional(ctx, block, and, neg, cast3); }
static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -2516,9 +2672,9 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void return false; }
-static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { - struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one; + struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one, *mul3; struct hlsl_type *type = instr->data_type, *btype; struct hlsl_constant_value one_value; struct hlsl_ir_expr *expr; @@ -2539,47 +2695,100 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) return false; - list_add_before(&instr->entry, &mul1->entry); + hlsl_block_add_instr(block, mul1);
if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, &instr->loc))) return false; - list_add_before(&instr->entry, &neg1->entry); + hlsl_block_add_instr(block, neg1);
if (!(ge = hlsl_new_binary_expr(ctx, HLSL_OP2_GEQUAL, mul1, neg1))) return false; ge->data_type = btype; - list_add_before(&instr->entry, &ge->entry); + hlsl_block_add_instr(block, ge);
if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, &instr->loc))) return false; - list_add_before(&instr->entry, &neg2->entry); + hlsl_block_add_instr(block, neg2);
- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, ge, arg2, neg2))) + if (!(cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2))) return false;
for (i = 0; i < type->dimx; ++i) one_value.u[i].f = 1.0f; if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) return false; - list_add_before(&instr->entry, &one->entry); + hlsl_block_add_instr(block, one);
if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, one, cond))) return false; - list_add_before(&instr->entry, &div->entry); + hlsl_block_add_instr(block, div);
if (!(mul2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, div, arg1))) return false; - list_add_before(&instr->entry, &mul2->entry); + hlsl_block_add_instr(block, mul2);
if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, &instr->loc))) return false; - list_add_before(&instr->entry, &frc->entry); + hlsl_block_add_instr(block, frc);
- expr->op = HLSL_OP2_MUL; - hlsl_src_remove(&expr->operands[0]); - hlsl_src_remove(&expr->operands[1]); - hlsl_src_from_node(&expr->operands[0], frc); - hlsl_src_from_node(&expr->operands[1], cond); + if (!(mul3 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, frc, cond))) + return false; + hlsl_block_add_instr(block, mul3); + + return true; +} + +static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; + static const struct hlsl_constant_value zero_value; + struct hlsl_type *arg_type, *cmp_type; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; + struct hlsl_ir_jump *jump; + struct hlsl_block block; + unsigned int i, count; + + if (instr->type != HLSL_IR_JUMP) + return false; + jump = hlsl_ir_jump(instr); + if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) + return false; + + hlsl_block_init(&block); + + arg_type = jump->condition.node->data_type; + if (!(zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(&block, zero); + + operands[0] = jump->condition.node; + operands[1] = zero; + cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy); + if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) + return false; + hlsl_block_add_instr(&block, cmp); + + if (!(bool_false = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(&block, bool_false); + + or = bool_false; + + count = hlsl_type_component_count(cmp_type); + for (i = 0; i < count; ++i) + { + if (!(load = hlsl_add_load_component(ctx, &block, cmp, i, &instr->loc))) + return false; + + if (!(or = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, or, load))) + return NULL; + hlsl_block_add_instr(&block, or); + } + + list_move_tail(&instr->entry, &block.instrs); + hlsl_src_remove(&jump->condition); + hlsl_src_from_node(&jump->condition, or); + jump->type = HLSL_IR_JUMP_DISCARD_NZ;
return true; } @@ -2698,7 +2907,7 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) continue; regset = hlsl_type_get_regset(var->data_type);
- if (var->reg_reservation.reg_type && var->regs[regset].bind_count) + if (var->reg_reservation.reg_type && var->regs[regset].allocation_size) { if (var->reg_reservation.reg_type != get_regset_name(regset)) { @@ -2716,7 +2925,7 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) var->regs[regset].id = var->reg_reservation.reg_index; TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, var->reg_reservation.reg_index, var->reg_reservation.reg_type, - var->reg_reservation.reg_index + var->regs[regset].bind_count); + var->reg_reservation.reg_index + var->regs[regset].allocation_size); } } } @@ -2806,7 +3015,8 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop load->sampler.offset.node->last_read = last_read; }
- load->coords.node->last_read = last_read; + if (load->coords.node) + load->coords.node->last_read = last_read; if (load->texel_offset.node) load->texel_offset.node->last_read = last_read; if (load->lod.node) @@ -2848,8 +3058,15 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop index->idx.node->last_read = last_read; break; } - case HLSL_IR_CONSTANT: case HLSL_IR_JUMP: + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + + if (jump->condition.node) + jump->condition.node->last_read = last_read; + break; + } + case HLSL_IR_CONSTANT: break; } } @@ -2966,7 +3183,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read);
ret.id = reg_idx; - ret.bind_count = 1; + ret.allocation_size = 1; ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); ret.allocated = true; return ret; @@ -3002,7 +3219,7 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read);
ret.id = reg_idx; - ret.bind_count = align(reg_size, 4) / 4; + ret.allocation_size = align(reg_size, 4) / 4; ret.allocated = true; return ret; } @@ -3034,7 +3251,7 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); }
-static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_ir_resource_load *load; struct hlsl_ir_var *var; @@ -3046,15 +3263,16 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n
load = hlsl_ir_resource_load(instr); var = load->resource.var; + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false;
if (regset == HLSL_REGSET_SAMPLERS) { enum hlsl_sampler_dim dim;
assert(!load->sampler.var); - if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) - return false;
dim = var->objects_usage[regset][index].sampler_dim; if (dim != load->sampling_dim) @@ -3072,25 +3290,39 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n return false; } } - var->objects_usage[regset][index].used = true; - var->objects_usage[regset][index].sampler_dim = load->sampling_dim; } - else - { - if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) - return false; + var->objects_usage[regset][index].sampler_dim = load->sampling_dim;
- var->objects_usage[regset][index].used = true; - var->objects_usage[regset][index].sampler_dim = load->sampling_dim; + return false; +}
- if (load->sampler.var) - { - var = load->sampler.var; - if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) - return false; +static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_resource_load *load; + struct hlsl_ir_var *var; + enum hlsl_regset regset; + unsigned int index;
- var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; - } + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + + load = hlsl_ir_resource_load(instr); + var = load->resource.var; + + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false; + + var->objects_usage[regset][index].used = true; + var->bind_count[regset] = max(var->bind_count[regset], index + 1); + if (load->sampler.var) + { + var = load->sampler.var; + if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) + return false; + + var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; + var->bind_count[HLSL_REGSET_SAMPLERS] = max(var->bind_count[HLSL_REGSET_SAMPLERS], index + 1); }
return false; @@ -3100,7 +3332,7 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) { struct hlsl_ir_var *var; struct hlsl_type *type; - unsigned int i, k; + unsigned int k;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -3108,12 +3340,10 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx)
for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) { - for (i = 0; i < type->reg_size[k]; ++i) - { - /* Samplers are only allocated until the last used one. */ - if (var->objects_usage[k][i].used) - var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; - } + bool is_separated = var->is_separated_resource; + + if (var->bind_count[k] > 0) + var->regs[k].allocation_size = (k == HLSL_REGSET_SAMPLERS || is_separated) ? var->bind_count[k] : type->reg_size[k]; } } } @@ -3192,10 +3422,33 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, } }
+static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f) +{ + struct hlsl_constant_defs *defs = &ctx->constant_defs; + struct hlsl_constant_register *reg; + size_t i; + + for (i = 0; i < defs->count; ++i) + { + reg = &defs->regs[i]; + if (reg->index == (component_index / 4)) + { + reg->value.f[component_index % 4] = f; + return; + } + } + + if (!hlsl_array_reserve(ctx, (void **)&defs->regs, &defs->size, defs->count + 1, sizeof(*defs->regs))) + return; + reg = &defs->regs[defs->count++]; + memset(reg, 0, sizeof(*reg)); + reg->index = component_index / 4; + reg->value.f[component_index % 4] = f; +} + static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct register_allocator *allocator) { - struct hlsl_constant_defs *defs = &ctx->constant_defs; struct hlsl_ir_node *instr;
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) @@ -3206,66 +3459,52 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, { struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); const struct hlsl_type *type = instr->data_type; - unsigned int x, y, i, writemask, end_reg; - unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; + unsigned int x, i;
constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type));
- if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, - constant->reg.id + reg_size / 4, sizeof(*defs->values))) - return; - end_reg = constant->reg.id + reg_size / 4; - if (end_reg > defs->count) - { - memset(&defs->values[defs->count], 0, sizeof(*defs->values) * (end_reg - defs->count)); - defs->count = end_reg; - } - assert(type->class <= HLSL_CLASS_LAST_NUMERIC); + assert(type->dimy == 1); + assert(constant->reg.writemask);
- if (!(writemask = constant->reg.writemask)) - writemask = (1u << type->dimx) - 1; - - for (y = 0; y < type->dimy; ++y) + for (x = 0, i = 0; x < 4; ++x) { - for (x = 0, i = 0; x < 4; ++x) + const union hlsl_constant_value_component *value; + float f; + + if (!(constant->reg.writemask & (1u << x))) + continue; + value = &constant->value.u[i++]; + + switch (type->base_type) { - const union hlsl_constant_value_component *value; - float f; - - if (!(writemask & (1u << x))) - continue; - value = &constant->value.u[i++]; - - switch (type->base_type) - { - case HLSL_TYPE_BOOL: - f = !!value->u; - break; - - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - f = value->f; - break; - - case HLSL_TYPE_INT: - f = value->i; - break; - - case HLSL_TYPE_UINT: - f = value->u; - break; - - case HLSL_TYPE_DOUBLE: - FIXME("Double constant.\n"); - return; - - default: - vkd3d_unreachable(); - } - defs->values[constant->reg.id + y].f[x] = f; + case HLSL_TYPE_BOOL: + f = !!value->u; + break; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + f = value->f; + break; + + case HLSL_TYPE_INT: + f = value->i; + break; + + case HLSL_TYPE_UINT: + f = value->u; + break; + + case HLSL_TYPE_DOUBLE: + FIXME("Double constant.\n"); + return; + + default: + vkd3d_unreachable(); } + + record_constant(ctx, constant->reg.id * 4 + x, f); }
break; @@ -3297,8 +3536,6 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi struct register_allocator allocator = {0}; struct hlsl_ir_var *var;
- allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_uniform && var->last_read) @@ -3315,6 +3552,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi } }
+ allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); + vkd3d_free(allocator.allocations); }
@@ -3410,7 +3649,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var { var->regs[HLSL_REGSET_NUMERIC].allocated = true; var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; - var->regs[HLSL_REGSET_NUMERIC].bind_count = 1; + var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1; var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); @@ -3497,7 +3736,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx)
LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var1->is_uniform || var1->data_type->class == HLSL_CLASS_OBJECT) + if (!var1->is_uniform || hlsl_type_is_resource(var1->data_type)) continue;
buffer = var1->buffer; @@ -3508,7 +3747,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) { unsigned int var1_reg_size, var2_reg_size;
- if (!var2->is_uniform || var2->data_type->class == HLSL_CLASS_OBJECT) + if (!var2->is_uniform || hlsl_type_is_resource(var2->data_type)) continue;
if (var1 == var2 || var1->buffer != var2->buffer) @@ -3558,7 +3797,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx)
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->is_uniform && var->data_type->class != HLSL_CLASS_OBJECT) + if (var->is_uniform && !hlsl_type_is_resource(var->data_type)) { if (var->is_param) var->buffer = ctx->params_buffer; @@ -3589,7 +3828,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) }
buffer->reg.id = buffer->reservation.reg_index; - buffer->reg.bind_count = 1; + buffer->reg.allocation_size = 1; buffer->reg.allocated = true; TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); } @@ -3599,7 +3838,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) ++index;
buffer->reg.id = index; - buffer->reg.bind_count = 1; + buffer->reg.allocation_size = 1; buffer->reg.allocated = true; TRACE("Allocated %s to cb%u.\n", buffer->name, index); ++index; @@ -3618,7 +3857,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) }
static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, - uint32_t index) + uint32_t index, bool allocated_only) { const struct hlsl_ir_var *var; unsigned int start, count; @@ -3632,11 +3871,14 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum * bound there even if the reserved vars aren't used. */ start = var->reg_reservation.reg_index; count = var->data_type->reg_size[regset]; + + if (!var->regs[regset].allocated && allocated_only) + continue; } else if (var->regs[regset].allocated) { start = var->regs[regset].id; - count = var->regs[regset].bind_count; + count = var->regs[regset].allocation_size; } else { @@ -3667,11 +3909,12 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset)
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - unsigned int count = var->regs[regset].bind_count; + unsigned int count = var->regs[regset].allocation_size;
if (count == 0) continue;
+ /* The variable was already allocated if it has a reservation. */ if (var->regs[regset].allocated) { const struct hlsl_ir_var *reserved_object, *last_reported = NULL; @@ -3690,7 +3933,10 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) { index = var->regs[regset].id + i;
- reserved_object = get_allocated_object(ctx, regset, index); + /* get_allocated_object() may return "var" itself, but we + * actually want that, otherwise we'll end up reporting the + * same conflict between the same two variables twice. */ + reserved_object = get_allocated_object(ctx, regset, index, true); if (reserved_object && reserved_object != var && reserved_object != last_reported) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, @@ -3709,7 +3955,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset)
while (available < count) { - if (get_allocated_object(ctx, regset, index)) + if (get_allocated_object(ctx, regset, index, false)) available = 0; else ++available; @@ -3853,6 +4099,7 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node; + enum hlsl_regset regset; unsigned int size;
if (!offset_node) @@ -3869,8 +4116,9 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref return false;
*offset = hlsl_ir_constant(offset_node)->value.u[0].u; + regset = hlsl_type_get_regset(deref->data_type);
- size = deref->var->data_type->reg_size[deref->offset_regset]; + size = deref->var->data_type->reg_size[regset]; if (*offset >= size) { hlsl_error(ctx, &deref->offset.node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, @@ -3900,7 +4148,8 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere struct hlsl_reg ret = var->regs[HLSL_REGSET_NUMERIC]; unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
- assert(deref->offset_regset == HLSL_REGSET_NUMERIC); + assert(deref->data_type); + assert(deref->data_type->class <= HLSL_CLASS_LAST_NUMERIC);
ret.id += offset / 4;
@@ -4008,7 +4257,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) { if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) - prepend_uniform_copy(ctx, &body->instrs, var); + prepend_uniform_copy(ctx, body, var); }
for (i = 0; i < entry_func->parameters.count; ++i) @@ -4017,7 +4266,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) { - prepend_uniform_copy(ctx, &body->instrs, var); + prepend_uniform_copy(ctx, body, var); } else { @@ -4033,9 +4282,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry }
if (var->storage_modifiers & HLSL_STORAGE_IN) - prepend_input_var_copy(ctx, &body->instrs, var); + prepend_input_var_copy(ctx, body, var); if (var->storage_modifiers & HLSL_STORAGE_OUT) - append_output_var_copy(ctx, &body->instrs, var); + append_output_var_copy(ctx, body, var); } } if (entry_func->return_var) @@ -4044,7 +4293,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Entry point "%s" is missing a return value semantic.", entry_func->func->name);
- append_output_var_copy(ctx, &body->instrs, entry_func->return_var); + append_output_var_copy(ctx, body, entry_func->return_var); }
for (i = 0; i < entry_func->attr_count; ++i) @@ -4062,6 +4311,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, "Entry point "%s" is missing a [numthreads] attribute.", entry_func->func->name);
+ if (profile->major_version >= 4) + { + hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); + } hlsl_transform_ir(ctx, lower_broadcasts, body, NULL); while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); do @@ -4075,10 +4328,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, lower_narrowing_casts, body, NULL); hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); hlsl_transform_ir(ctx, lower_int_dot, body, NULL); - hlsl_transform_ir(ctx, lower_int_division, body, NULL); - hlsl_transform_ir(ctx, lower_int_modulus, body, NULL); + lower_ir(ctx, lower_int_division, body); + lower_ir(ctx, lower_int_modulus, body); hlsl_transform_ir(ctx, lower_int_abs, body, NULL); - hlsl_transform_ir(ctx, lower_float_modulus, body, NULL); + lower_ir(ctx, lower_float_modulus, body); hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); do { @@ -4094,6 +4347,13 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); hlsl_transform_ir(ctx, lower_int_dot, body, NULL);
+ hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); + if (profile->major_version >= 4) + hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); + hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); + sort_synthetic_separated_samplers_first(ctx); + if (profile->major_version < 4) { hlsl_transform_ir(ctx, lower_division, body, NULL); @@ -4107,9 +4367,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, lower_abs, body, NULL); }
- hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); - hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); - /* TODO: move forward, remove when no longer needed */ transform_derefs(ctx, replace_deref_path_with_offset, body); while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index 301113c8477..41a72ab6c0d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -80,7 +80,7 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, return false; }
- for (k = 0; k < 4; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (src->node.data_type->base_type) { @@ -152,6 +152,51 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, return true; }
+static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) + { + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, + "Indefinite logarithm result."); + } + dst->u[k].f = log2f(src->value.u[k].f); + if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, + "Infinities and NaNs are not allowed by the shader model."); + } + break; + + case HLSL_TYPE_DOUBLE: + if (src->value.u[k].d < 0.0) + { + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, + "Indefinite logarithm result."); + } + dst->u[k].d = log2(src->value.u[k].d); + break; + + default: + FIXME("Fold 'log2' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { @@ -160,7 +205,7 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
assert(type == src->node.data_type->base_type);
- for (k = 0; k < 4; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { @@ -186,6 +231,96 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, return true; }
+static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + if (ctx->profile->major_version >= 4 && src->value.u[k].f == 0.0f) + { + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, + "Floating point division by zero."); + } + dst->u[k].f = 1.0f / src->value.u[k].f; + if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, + "Infinities and NaNs are not allowed by the shader model."); + } + break; + + case HLSL_TYPE_DOUBLE: + if (src->value.u[k].d == 0.0) + { + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, + "Floating point division by zero."); + } + dst->u[k].d = 1.0 / src->value.u[k].d; + break; + + default: + FIXME("Fold 'rcp' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + +static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) + { + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, + "Imaginary square root result."); + } + dst->u[k].f = sqrtf(src->value.u[k].f); + if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, + "Infinities and NaNs are not allowed by the shader model."); + } + break; + + case HLSL_TYPE_DOUBLE: + if (src->value.u[k].d < 0.0) + { + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, + "Imaginary square root result."); + } + dst->u[k].d = sqrt(src->value.u[k].d); + break; + + default: + FIXME("Fold 'sqrt' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { @@ -195,7 +330,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < 4; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { @@ -223,7 +358,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons return true; }
-static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { enum hlsl_base_type type = dst_type->base_type; @@ -232,65 +367,132 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type);
- for (k = 0; k < 4; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; break;
- case HLSL_TYPE_DOUBLE: - dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; + default: + FIXME("Fold bit/logic and for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; +} + +static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; break;
+ default: + FIXME("Fold bit/logic or for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; +} + +static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; + dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; break;
default: - FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); + FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { + enum hlsl_base_type type = dst_type->base_type; unsigned int k;
- assert(dst_type->base_type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + assert(src1->node.data_type->dimx == src2->node.data_type->dimx);
- for (k = 0; k < 4; ++k) + dst->u[0].f = 0.0f; + for (k = 0; k < src1->node.data_type->dimx; ++k) { - switch (src1->node.data_type->base_type) + switch (type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; + dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; break; + default: + FIXME("Fold 'dot' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + }
- case HLSL_TYPE_DOUBLE: - dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; - break; + return true; +}
- case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: - dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; - break; +static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k;
+ assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + assert(type == src3->node.data_type->base_type); + assert(src1->node.data_type->dimx == src2->node.data_type->dimx); + assert(src3->node.data_type->dimx == 1); + + dst->u[0].f = src3->value.u[0].f; + for (k = 0; k < src1->node.data_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; + break; default: - vkd3d_unreachable(); + FIXME("Fold 'dp2add' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; } - - dst->u[k].u *= ~0u; } + return true; }
@@ -363,45 +565,116 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons return true; }
-static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, - const struct vkd3d_shader_location *loc) +static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst_type->base_type; unsigned int k;
- assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->base_type == src2->node.data_type->base_type);
for (k = 0; k < dst_type->dimx; ++k) { - switch (type) + switch (src1->node.data_type->base_type) { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].u = src1->value.u[k].f == src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].u = src1->value.u[k].d == src2->value.u[k].d; + break; + case HLSL_TYPE_INT: - if (src2->value.u[k].i == 0) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); - return false; - } - if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) - dst->u[k].i = 0; - else - dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u == src2->value.u[k].u; + break; + + default: + vkd3d_unreachable(); + } + + dst->u[k].u *= ~0u; + } + return true; +} + +static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +{ + unsigned int k; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (src1->node.data_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].u = src1->value.u[k].f >= src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].u = src1->value.u[k].d >= src2->value.u[k].d; + break; + + case HLSL_TYPE_INT: + dst->u[k].u = src1->value.u[k].i >= src2->value.u[k].i; break;
case HLSL_TYPE_UINT: - if (src2->value.u[k].u == 0) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); - return false; - } - dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u >= src2->value.u[k].u; break;
default: - FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; + vkd3d_unreachable(); } + + dst->u[k].u *= ~0u; + } + return true; +} + +static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +{ + unsigned int k; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (src1->node.data_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].u = src1->value.u[k].f < src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].u = src1->value.u[k].d < src2->value.u[k].d; + break; + + case HLSL_TYPE_INT: + dst->u[k].u = src1->value.u[k].i < src2->value.u[k].i; + break; + + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u < src2->value.u[k].u; + break; + + default: + vkd3d_unreachable(); + } + + dst->u[k].u *= ~0u; } return true; } @@ -419,6 +692,15 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons { switch (type) { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].f = fmaxf(src1->value.u[k].f, src2->value.u[k].f); + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].d = fmax(src1->value.u[k].d, src2->value.u[k].d); + break; + case HLSL_TYPE_INT: dst->u[k].i = max(src1->value.u[k].i, src2->value.u[k].i); break; @@ -448,6 +730,15 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons { switch (type) { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].f = fminf(src1->value.u[k].f, src2->value.u[k].f); + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].d = fmin(src1->value.u[k].d, src2->value.u[k].d); + break; + case HLSL_TYPE_INT: dst->u[k].i = min(src1->value.u[k].i, src2->value.u[k].i); break; @@ -464,8 +755,9 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons return true; }
-static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, + const struct vkd3d_shader_location *loc) { enum hlsl_base_type type = dst_type->base_type; unsigned int k; @@ -478,19 +770,35 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, switch (type) { case HLSL_TYPE_INT: + if (src2->value.u[k].i == 0) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); + return false; + } + if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) + dst->u[k].i = 0; + else + dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; + break; + case HLSL_TYPE_UINT: - dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; + if (src2->value.u[k].u == 0) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); + return false; + } + dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; break;
default: - FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); + FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { enum hlsl_base_type type = dst_type->base_type; @@ -503,48 +811,67 @@ static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, { switch (type) { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; + break; + case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; + dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; break;
default: - FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst_type)); + FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; }
-static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst_type->base_type; unsigned int k;
- assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->base_type == src2->node.data_type->base_type);
for (k = 0; k < dst_type->dimx; ++k) { - switch (type) + switch (src1->node.data_type->base_type) { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; + break; + case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; break;
default: - FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; + vkd3d_unreachable(); } + + dst->u[k].u *= ~0u; } return true; }
bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_constant *arg1, *arg2 = NULL; + struct hlsl_ir_constant *arg1, *arg2 = NULL, *arg3 = NULL; struct hlsl_constant_value res = {0}; struct hlsl_ir_node *res_node; struct hlsl_ir_expr *expr; @@ -572,6 +899,8 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, arg1 = hlsl_ir_constant(expr->operands[0].node); if (expr->operands[1].node) arg2 = hlsl_ir_constant(expr->operands[1].node); + if (expr->operands[2].node) + arg3 = hlsl_ir_constant(expr->operands[2].node);
switch (expr->op) { @@ -583,28 +912,58 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_cast(ctx, &res, instr->data_type, arg1); break;
+ case HLSL_OP1_LOG2: + success = fold_log2(ctx, &res, instr->data_type, arg1, &instr->loc); + break; + case HLSL_OP1_NEG: success = fold_neg(ctx, &res, instr->data_type, arg1); break;
+ case HLSL_OP1_RCP: + success = fold_rcp(ctx, &res, instr->data_type, arg1, &instr->loc); + break; + + case HLSL_OP1_SQRT: + success = fold_sqrt(ctx, &res, instr->data_type, arg1, &instr->loc); + break; + case HLSL_OP2_ADD: success = fold_add(ctx, &res, instr->data_type, arg1, arg2); break;
- case HLSL_OP2_MUL: - success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); + case HLSL_OP2_BIT_AND: + case HLSL_OP2_LOGIC_AND: + success = fold_and(ctx, &res, instr->data_type, arg1, arg2); break;
- case HLSL_OP2_NEQUAL: - success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); + case HLSL_OP2_BIT_OR: + case HLSL_OP2_LOGIC_OR: + success = fold_or(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_BIT_XOR: + success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_DOT: + success = fold_dot(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_DIV: success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break;
- case HLSL_OP2_MOD: - success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); + case HLSL_OP2_EQUAL: + success = fold_equal(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_GEQUAL: + success = fold_gequal(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_LESS: + success = fold_less(ctx, &res, instr->data_type, arg1, arg2); break;
case HLSL_OP2_MAX: @@ -615,16 +974,20 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_min(ctx, &res, instr->data_type, arg1, arg2); break;
- case HLSL_OP2_BIT_XOR: - success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); + case HLSL_OP2_MOD: + success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break;
- case HLSL_OP2_BIT_AND: - success = fold_bit_and(ctx, &res, instr->data_type, arg1, arg2); + case HLSL_OP2_MUL: + success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); break;
- case HLSL_OP2_BIT_OR: - success = fold_bit_or(ctx, &res, instr->data_type, arg1, arg2); + case HLSL_OP2_NEQUAL: + success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP3_DP2ADD: + success = fold_dp2add(ctx, &res, instr->data_type, arg1, arg2, arg3); break;
default: diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index 9eefb82c226..705905f7888 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -85,6 +85,72 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id); }
+static const struct vkd3d_shader_varying_map *find_varying_map( + const struct vkd3d_shader_next_stage_info *next_stage, unsigned int signature_idx) +{ + unsigned int i; + + for (i = 0; i < next_stage->varying_count; ++i) + { + if (next_stage->varying_map[i].output_signature_index == signature_idx) + return &next_stage->varying_map[i]; + } + + return NULL; +} + +static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info) +{ + struct shader_signature *signature = &parser->shader_desc.output_signature; + const struct vkd3d_shader_next_stage_info *next_stage; + unsigned int i; + + if (!(next_stage = vkd3d_find_struct(compile_info->next, NEXT_STAGE_INFO))) + return VKD3D_OK; + + for (i = 0; i < signature->element_count; ++i) + { + const struct vkd3d_shader_varying_map *map = find_varying_map(next_stage, i); + struct signature_element *e = &signature->elements[i]; + + if (map) + { + unsigned int input_mask = map->input_mask; + + e->target_location = map->input_register_index; + + /* It is illegal in Vulkan if the next shader uses the same varying + * location with a different mask. */ + if (input_mask && input_mask != e->mask) + { + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: " + "Output mask %#x does not match input mask %#x.", + e->mask, input_mask); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + } + else + { + e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; + } + } + + for (i = 0; i < next_stage->varying_count; ++i) + { + if (next_stage->varying_map[i].output_signature_index >= signature->element_count) + { + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: " + "The next stage consumes varyings not written by this stage."); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + } + + return VKD3D_OK; +} + struct hull_flattener { struct vkd3d_shader_instruction_array instructions; @@ -247,13 +313,13 @@ static void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_s reg->immconst_type = VKD3D_IMMCONST_SCALAR; }
-static void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) +void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) { memset(ins, 0, sizeof(*ins)); ins->handler_idx = handler_idx; }
-enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) +static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) { struct hull_flattener flattener = {*src_instructions}; struct vkd3d_shader_instruction_array *instructions; @@ -388,7 +454,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p return VKD3D_OK; }
-enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( +static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature) { struct vkd3d_shader_instruction_array *instructions; @@ -999,7 +1065,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi shader_instruction_init(ins, VKD3DSIH_NOP); }
-enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, +static enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, struct shader_signature *output_signature, struct shader_signature *patch_constant_signature) { @@ -1070,3 +1136,159 @@ enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_i *instructions = normaliser.instructions; return VKD3D_OK; } + +struct flat_constant_def +{ + enum vkd3d_shader_d3dbc_constant_register set; + uint32_t index; + uint32_t value[4]; +}; + +struct flat_constants_normaliser +{ + struct vkd3d_shader_parser *parser; + struct flat_constant_def *defs; + size_t def_count, defs_capacity; +}; + +static bool get_flat_constant_register_type(const struct vkd3d_shader_register *reg, + enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index) +{ + static const struct + { + enum vkd3d_shader_register_type type; + enum vkd3d_shader_d3dbc_constant_register set; + uint32_t offset; + } + regs[] = + { + {VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 0}, + {VKD3DSPR_CONST2, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048}, + {VKD3DSPR_CONST3, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096}, + {VKD3DSPR_CONST4, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144}, + {VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, 0}, + {VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, 0}, + }; + + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(regs); ++i) + { + if (reg->type == regs[i].type) + { + if (reg->idx[0].rel_addr) + { + FIXME("Unhandled relative address.\n"); + return false; + } + + *set = regs[i].set; + *index = regs[i].offset + reg->idx[0].offset; + return true; + } + } + + return false; +} + +static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_param *param, + const struct flat_constants_normaliser *normaliser) +{ + enum vkd3d_shader_d3dbc_constant_register set; + uint32_t index; + size_t i, j; + + if (!get_flat_constant_register_type(¶m->reg, &set, &index)) + return; + + for (i = 0; i < normaliser->def_count; ++i) + { + if (normaliser->defs[i].set == set && normaliser->defs[i].index == index) + { + param->reg.type = VKD3DSPR_IMMCONST; + param->reg.idx_count = 0; + param->reg.immconst_type = VKD3D_IMMCONST_VEC4; + for (j = 0; j < 4; ++j) + param->reg.u.immconst_uint[j] = normaliser->defs[i].value[j]; + return; + } + } + + param->reg.type = VKD3DSPR_CONSTBUFFER; + param->reg.idx[0].offset = set; /* register ID */ + param->reg.idx[1].offset = set; /* register index */ + param->reg.idx[2].offset = index; /* buffer index */ + param->reg.idx_count = 3; +} + +static enum vkd3d_result instruction_array_normalise_flat_constants(struct vkd3d_shader_parser *parser) +{ + struct flat_constants_normaliser normaliser = {.parser = parser}; + unsigned int i, j; + + for (i = 0; i < parser->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &parser->instructions.elements[i]; + + if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB) + { + struct flat_constant_def *def; + + if (!vkd3d_array_reserve((void **)&normaliser.defs, &normaliser.defs_capacity, + normaliser.def_count + 1, sizeof(*normaliser.defs))) + { + vkd3d_free(normaliser.defs); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + def = &normaliser.defs[normaliser.def_count++]; + + get_flat_constant_register_type((struct vkd3d_shader_register *)&ins->dst[0].reg, &def->set, &def->index); + for (j = 0; j < 4; ++j) + def->value[j] = ins->src[0].reg.u.immconst_uint[j]; + + vkd3d_shader_instruction_make_nop(ins); + } + else + { + for (j = 0; j < ins->src_count; ++j) + shader_register_normalise_flat_constants((struct vkd3d_shader_src_param *)&ins->src[j], &normaliser); + } + } + + vkd3d_free(normaliser.defs); + return VKD3D_OK; +} + +enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info) +{ + struct vkd3d_shader_instruction_array *instructions = &parser->instructions; + enum vkd3d_result result = VKD3D_OK; + + if (parser->shader_desc.is_dxil) + return result; + + if (parser->shader_version.type != VKD3D_SHADER_TYPE_PIXEL + && (result = remap_output_signature(parser, compile_info)) < 0) + return result; + + if (parser->shader_version.type == VKD3D_SHADER_TYPE_HULL + && (result = instruction_array_flatten_hull_shader_phases(instructions)) >= 0) + { + result = instruction_array_normalise_hull_shader_control_point_io(instructions, + &parser->shader_desc.input_signature); + } + if (result >= 0) + result = instruction_array_normalise_io_registers(instructions, parser->shader_version.type, + &parser->shader_desc.input_signature, &parser->shader_desc.output_signature, + &parser->shader_desc.patch_constant_signature); + + if (result >= 0) + result = instruction_array_normalise_flat_constants(parser); + + if (result >= 0 && TRACE_ON()) + vkd3d_shader_trace(instructions, &parser->shader_version); + + return result; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index 94079696280..6fb61eff6c3 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -30,6 +30,13 @@
#define YY_DECL static int preproc_lexer_lex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, yyscan_t yyscanner)
+static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) +{ + if (!ctx->expansion_count) + return NULL; + return ctx->expansion_stack[ctx->expansion_count - 1].macro; +} + static void update_location(struct preproc_ctx *ctx);
#define YY_USER_ACTION update_location(yyget_extra(yyscanner)); @@ -125,7 +132,20 @@ INT_SUFFIX [uUlL]{0,2} const char *p;
if (!ctx->last_was_newline) - return T_HASHSTRING; + { + struct preproc_macro *macro; + + /* Stringification is only done for function-like macro bodies. + * Anywhere else, we need to parse it as two separate tokens. + * We could use a state for this, but yyless() is easier and cheap. + */ + + if ((macro = preproc_get_top_macro(ctx)) && macro->arg_count) + return T_HASHSTRING; + + yyless(1); + return T_TEXT; + }
for (p = yytext + 1; strchr(" \t", *p); ++p) ; @@ -219,13 +239,6 @@ static bool preproc_is_writing(struct preproc_ctx *ctx) return file->if_stack[file->if_count - 1].current_true; }
-static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) -{ - if (!ctx->expansion_count) - return NULL; - return ctx->expansion_stack[ctx->expansion_count - 1].macro; -} - /* Concatenation is not done for object-like macros, but is done for both * function-like macro bodies and their arguments. */ static bool should_concat(struct preproc_ctx *ctx) @@ -334,6 +347,43 @@ static bool preproc_push_expansion(struct preproc_ctx *ctx, return true; }
+static void preproc_stringify(struct preproc_ctx *ctx, struct vkd3d_string_buffer *buffer, const char *text) +{ + const struct preproc_text *expansion; + const char *p = text + 1; + unsigned int i; + + while (*p == ' ' || *p == '\t') + ++p; + + vkd3d_string_buffer_printf(buffer, """); + if ((expansion = find_arg_expansion(ctx, p))) + { + size_t len = expansion->text.content_size; + size_t start = 0; + + while (len && strchr(" \t\r\n", expansion->text.buffer[len - 1])) + --len; + + while (start < len && strchr(" \t\r\n", expansion->text.buffer[start])) + ++start; + + for (i = start; i < len; ++i) + { + char c = expansion->text.buffer[i]; + + if (c == '\' || c == '"') + vkd3d_string_buffer_printf(buffer, "\"); + vkd3d_string_buffer_printf(buffer, "%c", c); + } + } + else + { + vkd3d_string_buffer_printf(buffer, "%s", p); + } + vkd3d_string_buffer_printf(buffer, """); +} + int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) { struct preproc_ctx *ctx = yyget_extra(scanner); @@ -441,9 +491,6 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) switch (func_state->state) { case STATE_NONE: - { - struct preproc_macro *macro; - if (token == T_CONCAT && should_concat(ctx)) { while (ctx->buffer.content_size @@ -452,37 +499,17 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) break; }
- /* Stringification, however, is only done for function-like - * macro bodies. */ - if (token == T_HASHSTRING && (macro = preproc_get_top_macro(ctx)) && macro->arg_count) + if (token == T_HASHSTRING) { - const struct preproc_text *expansion; - const char *p = text + 1; - unsigned int i; + struct vkd3d_string_buffer buffer;
if (ctx->current_directive) return return_token(token, lval, text);
- while (*p == ' ' || *p == '\t') - ++p; - - vkd3d_string_buffer_printf(&ctx->buffer, """); - if ((expansion = find_arg_expansion(ctx, p))) - { - for (i = 0; i < expansion->text.content_size; ++i) - { - char c = expansion->text.buffer[i]; - - if (c == '\' || c == '"') - vkd3d_string_buffer_printf(&ctx->buffer, "\"); - vkd3d_string_buffer_printf(&ctx->buffer, "%c", c); - } - } - else - { - vkd3d_string_buffer_printf(&ctx->buffer, "%s", p); - } - vkd3d_string_buffer_printf(&ctx->buffer, """); + vkd3d_string_buffer_init(&buffer); + preproc_stringify(ctx, &buffer, text); + vkd3d_string_buffer_printf(&ctx->buffer, "%s", buffer.buffer); + vkd3d_string_buffer_cleanup(&buffer); break; }
@@ -586,7 +613,6 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) else vkd3d_string_buffer_printf(&ctx->buffer, "%s ", text); break; - }
case STATE_IDENTIFIER: if (token == '(') @@ -628,6 +654,41 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner)
switch (token) { + /* Most text gets left alone (e.g. if it contains macros, + * the macros should be evaluated later). + * Arguments are a special case, and are replaced with + * their values immediately. */ + case T_IDENTIFIER: + case T_IDENTIFIER_PAREN: + { + const struct preproc_text *expansion; + + if ((expansion = find_arg_expansion(ctx, text))) + { + preproc_push_expansion(ctx, expansion, NULL); + continue; + } + + if (current_arg) + preproc_text_add(current_arg, text); + break; + } + + /* Stringification is another special case. Unsurprisingly, + * we need to stringify if this is an argument. More + * surprisingly, we need to stringify even if it's not. */ + case T_HASHSTRING: + { + struct vkd3d_string_buffer buffer; + + vkd3d_string_buffer_init(&buffer); + preproc_stringify(ctx, &buffer, text); + if (current_arg) + preproc_text_add(current_arg, buffer.buffer); + vkd3d_string_buffer_cleanup(&buffer); + break; + } + case T_NEWLINE: if (current_arg) preproc_text_add(current_arg, " "); @@ -686,6 +747,9 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) if (current_arg) preproc_text_add(current_arg, text); } + + if (current_arg) + preproc_text_add(current_arg, " "); break; } } diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index bfe5272fd29..eb8125b0e55 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -199,6 +199,16 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d } }
+static inline bool register_is_undef(const struct vkd3d_shader_register *reg) +{ + return reg->type == VKD3DSPR_UNDEF; +} + +static inline bool register_is_constant_or_undef(const struct vkd3d_shader_register *reg) +{ + return register_is_constant(reg) || register_is_undef(reg); +} + #define VKD3D_SPIRV_VERSION 0x00010000 #define VKD3D_SPIRV_GENERATOR_ID 18 #define VKD3D_SPIRV_GENERATOR_VERSION 8 @@ -1746,6 +1756,38 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, } }
+static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder *builder, + enum vkd3d_data_type data_type, unsigned int component_count) +{ + uint32_t scalar_id; + + if (component_count == 1) + { + switch (data_type) + { + case VKD3D_DATA_FLOAT: + case VKD3D_DATA_SNORM: + case VKD3D_DATA_UNORM: + return vkd3d_spirv_get_op_type_float(builder, 32); + break; + case VKD3D_DATA_INT: + case VKD3D_DATA_UINT: + return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); + break; + case VKD3D_DATA_DOUBLE: + return vkd3d_spirv_get_op_type_float(builder, 64); + default: + FIXME("Unhandled data type %#x.\n", data_type); + return 0; + } + } + else + { + scalar_id = vkd3d_spirv_get_type_id_for_data_type(builder, data_type, 1); + return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); + } +} + static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, const char *entry_point) { vkd3d_spirv_stream_init(&builder->debug_stream); @@ -2263,7 +2305,7 @@ struct spirv_compiler
uint32_t binding_idx;
- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; unsigned int input_control_point_count; unsigned int output_control_point_count; bool use_vocp; @@ -2333,7 +2375,7 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler)
static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) { const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; @@ -2429,13 +2471,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve
compiler->shader_type = shader_version->type;
- compiler->input_signature = shader_desc->input_signature; - compiler->output_signature = shader_desc->output_signature; - compiler->patch_constant_signature = shader_desc->patch_constant_signature; - memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); - memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); - memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); - if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) { compiler->xfb_info = vkd3d_find_struct(compile_info->next, TRANSFORM_FEEDBACK_INFO); @@ -2536,13 +2571,13 @@ static bool spirv_compiler_check_shader_visibility(const struct spirv_compiler * }
static struct vkd3d_push_constant_buffer_binding *spirv_compiler_find_push_constant_buffer( - const struct spirv_compiler *compiler, const struct vkd3d_shader_constant_buffer *cb) + const struct spirv_compiler *compiler, const struct vkd3d_shader_register_range *range) { - unsigned int register_space = cb->range.space; - unsigned int reg_idx = cb->range.first; + unsigned int register_space = range->space; + unsigned int reg_idx = range->first; unsigned int i;
- if (cb->range.first != cb->range.last) + if (range->first != range->last) return NULL;
for (i = 0; i < compiler->shader_interface.push_constant_buffer_count; ++i) @@ -2559,8 +2594,8 @@ static struct vkd3d_push_constant_buffer_binding *spirv_compiler_find_push_const return NULL; }
-static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *compiler, - const struct vkd3d_shader_resource *resource, const struct vkd3d_shader_sampler *sampler) +static bool spirv_compiler_has_combined_sampler_for_resource(const struct spirv_compiler *compiler, + const struct vkd3d_shader_register_range *range) { const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; const struct vkd3d_shader_combined_resource_sampler *combined_sampler; @@ -2569,10 +2604,35 @@ static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *com if (!shader_interface->combined_sampler_count) return false;
- if (resource && (resource->reg.reg.type == VKD3DSPR_UAV || resource->range.last != resource->range.first)) + if (range->last != range->first) + return false; + + for (i = 0; i < shader_interface->combined_sampler_count; ++i) + { + combined_sampler = &shader_interface->combined_samplers[i]; + + if (!spirv_compiler_check_shader_visibility(compiler, combined_sampler->shader_visibility)) + continue; + + if ((combined_sampler->resource_space == range->space + && combined_sampler->resource_index == range->first)) + return true; + } + + return false; +} + +static bool spirv_compiler_has_combined_sampler_for_sampler(const struct spirv_compiler *compiler, + const struct vkd3d_shader_register_range *range) +{ + const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; + const struct vkd3d_shader_combined_resource_sampler *combined_sampler; + unsigned int i; + + if (!shader_interface->combined_sampler_count) return false;
- if (sampler && sampler->range.first != sampler->range.last) + if (range->last != range->first) return false;
for (i = 0; i < shader_interface->combined_sampler_count; ++i) @@ -2582,10 +2642,8 @@ static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *com if (!spirv_compiler_check_shader_visibility(compiler, combined_sampler->shader_visibility)) continue;
- if ((!resource || (combined_sampler->resource_space == resource->range.space - && combined_sampler->resource_index == resource->range.first)) - && (!sampler || (combined_sampler->sampler_space == sampler->range.space - && combined_sampler->sampler_index == sampler->range.first))) + if (combined_sampler->sampler_space == range->space + && combined_sampler->sampler_index == range->first) return true; }
@@ -2603,6 +2661,16 @@ static void VKD3D_PRINTF_FUNC(3, 4) spirv_compiler_error(struct spirv_compiler * compiler->failed = true; }
+static void VKD3D_PRINTF_FUNC(3, 4) spirv_compiler_warning(struct spirv_compiler *compiler, + enum vkd3d_shader_error error, const char *format, ...) +{ + va_list args; + + va_start(args, format); + vkd3d_shader_vwarning(compiler->message_context, &compiler->location, error, format, args); + va_end(args); +} + static struct vkd3d_string_buffer *vkd3d_shader_register_range_string(struct spirv_compiler *compiler, const struct vkd3d_shader_register_range *range) { @@ -3211,13 +3279,13 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil struct vkd3d_symbol reg_symbol, *symbol; struct rb_entry *entry;
- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); + assert(!register_is_constant_or_undef(reg));
if (reg->type == VKD3DSPR_TEMP) { assert(reg->idx[0].offset < compiler->temp_count); register_info->id = compiler->temp_id + reg->idx[0].offset; - register_info->storage_class = SpvStorageClassFunction; + register_info->storage_class = SpvStorageClassPrivate; register_info->descriptor_array = NULL; register_info->member_idx = 0; register_info->component_type = VKD3D_SHADER_COMPONENT_FLOAT; @@ -3553,6 +3621,19 @@ static uint32_t spirv_compiler_emit_load_constant64(struct spirv_compiler *compi vkd3d_component_type_from_data_type(reg->data_type), component_count, values); }
+static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler, + const struct vkd3d_shader_register *reg, DWORD write_mask) +{ + unsigned int component_count = vkd3d_write_mask_component_count(write_mask); + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id; + + assert(reg->type == VKD3DSPR_UNDEF); + + type_id = vkd3d_spirv_get_type_id_for_data_type(builder, reg->data_type, component_count); + return vkd3d_spirv_build_op_undef(builder, &builder->global_stream, type_id); +} + static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, DWORD swizzle, DWORD write_mask, const struct vkd3d_shader_register_info *reg_info) @@ -3563,7 +3644,7 @@ static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, enum vkd3d_shader_component_type component_type; unsigned int skipped_component_mask;
- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); + assert(!register_is_constant_or_undef(reg)); assert(vkd3d_write_mask_component_count(write_mask) == 1);
component_idx = vkd3d_write_mask_get_component_idx(write_mask); @@ -3615,6 +3696,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); else if (reg->type == VKD3DSPR_IMMCONST64) return spirv_compiler_emit_load_constant64(compiler, reg, swizzle, write_mask); + else if (reg->type == VKD3DSPR_UNDEF) + return spirv_compiler_emit_load_undef(compiler, reg, write_mask);
component_count = vkd3d_write_mask_component_count(write_mask); component_type = vkd3d_component_type_from_data_type(reg->data_type); @@ -3827,7 +3910,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, unsigned int src_write_mask = write_mask; uint32_t type_id;
- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); + assert(!register_is_constant_or_undef(reg));
if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) return; @@ -3998,6 +4081,11 @@ static void spirv_compiler_emit_interpolation_decorations(struct spirv_compiler vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationSample, NULL, 0); break; + case VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE: + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationNoPerspective, NULL, 0); + vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationSample, NULL, 0); + break; default: FIXME("Unhandled interpolation mode %#x.\n", mode); break; @@ -4542,7 +4630,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, } else { - unsigned int location = signature_element->register_index; + unsigned int location = signature_element->target_location;
input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, storage_class, component_type, input_component_count, array_sizes, 2); @@ -4918,9 +5006,15 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler,
spirv_compiler_emit_register_execution_mode(compiler, &dst->reg); } + else if (signature_element->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + { + storage_class = SpvStorageClassPrivate; + id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, component_type, output_component_count, array_sizes, 2); + } else { - unsigned int location = signature_element->register_index; + unsigned int location = signature_element->target_location;
if (is_patch_constant) location += shader_signature_next_location(&compiler->output_signature); @@ -4929,10 +5023,10 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, storage_class, component_type, output_component_count, array_sizes, 2); vkd3d_spirv_add_iface_variable(builder, id);
- if (is_dual_source_blending(compiler) && signature_element->register_index < 2) + if (is_dual_source_blending(compiler) && location < 2) { vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, signature_element->register_index); + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, location); } else { @@ -5258,8 +5352,7 @@ static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler WARN("Unhandled global flags %#x.\n", flags); }
-static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) +static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t count) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; size_t function_location; @@ -5270,11 +5363,11 @@ static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, vkd3d_spirv_begin_function_stream_insertion(builder, function_location);
assert(!compiler->temp_count); - compiler->temp_count = instruction->declaration.count; + compiler->temp_count = count; for (i = 0; i < compiler->temp_count; ++i) { - id = spirv_compiler_emit_variable(compiler, &builder->function_stream, - SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + id = spirv_compiler_emit_variable(compiler, &builder->global_stream, + SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); if (!i) compiler->temp_id = id; assert(id == compiler->temp_id + i); @@ -5473,28 +5566,31 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * return var_id; }
-static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) +static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, + const struct vkd3d_shader_register_range *range, unsigned int register_id, unsigned int size) { - const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t vec4_id, array_type_id, length_id, struct_id, var_id; const SpvStorageClass storage_class = SpvStorageClassUniform; - const struct vkd3d_shader_register *reg = &cb->src.reg; struct vkd3d_push_constant_buffer_binding *push_cb; struct vkd3d_descriptor_variable_info var_info; struct vkd3d_symbol reg_symbol;
- assert(!(instruction->flags & ~VKD3DSI_INDEXED_DYNAMIC)); + struct vkd3d_shader_register reg = + { + .type = VKD3DSPR_CONSTBUFFER, + .idx[0].offset = register_id, + .idx_count = 1, + };
- if ((push_cb = spirv_compiler_find_push_constant_buffer(compiler, cb))) + if ((push_cb = spirv_compiler_find_push_constant_buffer(compiler, range))) { /* Push constant buffers are handled in * spirv_compiler_emit_push_constant_buffers(). */ - unsigned int cb_size_in_bytes = cb->size * VKD3D_VEC4_SIZE * sizeof(uint32_t); - push_cb->reg = *reg; - push_cb->size = cb->size; + unsigned int cb_size_in_bytes = size * VKD3D_VEC4_SIZE * sizeof(uint32_t); + push_cb->reg = reg; + push_cb->size = size; if (cb_size_in_bytes > push_cb->pc.size) { WARN("Constant buffer size %u exceeds push constant size %u.\n", @@ -5504,19 +5600,19 @@ static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compi }
vec4_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); - length_id = spirv_compiler_get_constant_uint(compiler, cb->size); + length_id = spirv_compiler_get_constant_uint(compiler, size); array_type_id = vkd3d_spirv_build_op_type_array(builder, vec4_id, length_id); vkd3d_spirv_build_op_decorate1(builder, array_type_id, SpvDecorationArrayStride, 16);
struct_id = vkd3d_spirv_build_op_type_struct(builder, &array_type_id, 1); vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, SpvDecorationOffset, 0); - vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", cb->size); + vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size);
var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, - reg, &cb->range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); + ®, range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info);
- vkd3d_symbol_make_register(®_symbol, reg); + vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); reg_symbol.descriptor_array = var_info.array_symbol; @@ -5557,29 +5653,34 @@ static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compi spirv_compiler_put_symbol(compiler, ®_symbol); }
-static void spirv_compiler_emit_dcl_sampler(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) +static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compiler, + const struct vkd3d_shader_register_range *range, unsigned int register_id) { - const struct vkd3d_shader_sampler *sampler = &instruction->declaration.sampler; const SpvStorageClass storage_class = SpvStorageClassUniformConstant; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_register *reg = &sampler->src.reg; struct vkd3d_descriptor_variable_info var_info; struct vkd3d_symbol reg_symbol; uint32_t type_id, var_id;
- vkd3d_symbol_make_sampler(®_symbol, reg); - reg_symbol.info.sampler.range = sampler->range; + const struct vkd3d_shader_register reg = + { + .type = VKD3DSPR_SAMPLER, + .idx[0].offset = register_id, + .idx_count = 1, + }; + + vkd3d_symbol_make_sampler(®_symbol, ®); + reg_symbol.info.sampler.range = *range; spirv_compiler_put_symbol(compiler, ®_symbol);
- if (spirv_compiler_has_combined_sampler(compiler, NULL, sampler)) + if (spirv_compiler_has_combined_sampler_for_sampler(compiler, range)) return;
type_id = vkd3d_spirv_get_op_type_sampler(builder); - var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, - &sampler->range, VKD3D_SHADER_RESOURCE_NONE, false, &var_info); + var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, + range, VKD3D_SHADER_RESOURCE_NONE, false, &var_info);
- vkd3d_symbol_make_register(®_symbol, reg); + vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); reg_symbol.descriptor_array = var_info.array_symbol; @@ -5624,13 +5725,13 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty } }
-static const struct vkd3d_shader_descriptor_info *spirv_compiler_get_descriptor_info( +static const struct vkd3d_shader_descriptor_info1 *spirv_compiler_get_descriptor_info( struct spirv_compiler *compiler, enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register_range *range) { - const struct vkd3d_shader_scan_descriptor_info *descriptor_info = compiler->scan_descriptor_info; + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = compiler->scan_descriptor_info; unsigned int register_last = (range->last == ~0u) ? range->first : range->last; - const struct vkd3d_shader_descriptor_info *d; + const struct vkd3d_shader_descriptor_info1 *d; unsigned int i;
for (i = 0; i < descriptor_info->descriptor_count; ++i) @@ -5650,7 +5751,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler bool raw_structured, uint32_t depth) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_descriptor_info *d; + const struct vkd3d_shader_descriptor_info1 *d; bool uav_read, uav_atomics; uint32_t sampled_type_id; SpvImageFormat format; @@ -5685,7 +5786,7 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi const struct vkd3d_shader_combined_resource_sampler *current; uint32_t image_type_id, type_id, ptr_type_id, var_id; enum vkd3d_shader_binding_flag resource_type_flag; - const struct vkd3d_shader_descriptor_info *d; + const struct vkd3d_shader_descriptor_info1 *d; struct vkd3d_symbol symbol; unsigned int i; bool depth; @@ -5761,20 +5862,30 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi }
static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *compiler, - const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type, - enum vkd3d_data_type resource_data_type, unsigned int structure_stride, bool raw) + const struct vkd3d_shader_register_range *range, unsigned int register_id, + unsigned int sample_count, bool is_uav, enum vkd3d_shader_resource_type resource_type, + enum vkd3d_shader_resource_data_type resource_data_type, unsigned int structure_stride, bool raw) { struct vkd3d_descriptor_variable_info var_info, counter_var_info = {0}; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; SpvStorageClass storage_class = SpvStorageClassUniformConstant; uint32_t counter_type_id, type_id, var_id, counter_var_id = 0; - const struct vkd3d_shader_register *reg = &resource->reg.reg; const struct vkd3d_spirv_resource_type *resource_type_info; enum vkd3d_shader_component_type sampled_type; struct vkd3d_symbol resource_symbol; - bool is_uav;
- is_uav = reg->type == VKD3DSPR_UAV; + struct vkd3d_shader_register reg = + { + .type = is_uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, + .idx[0].offset = register_id, + .idx_count = 1, + }; + + if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && sample_count == 1) + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && sample_count == 1) + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; + if (!(resource_type_info = spirv_compiler_enable_resource_type(compiler, resource_type, is_uav))) { @@ -5782,11 +5893,11 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp return; }
- sampled_type = vkd3d_component_type_from_data_type(resource_data_type); + sampled_type = vkd3d_component_type_from_resource_data_type(resource_data_type);
- if (spirv_compiler_has_combined_sampler(compiler, resource, NULL)) + if (!is_uav && spirv_compiler_has_combined_sampler_for_resource(compiler, range)) { - spirv_compiler_emit_combined_sampler_declarations(compiler, reg, &resource->range, + spirv_compiler_emit_combined_sampler_declarations(compiler, ®, range, resource_type, sampled_type, structure_stride, raw, resource_type_info); return; } @@ -5809,19 +5920,18 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp } else { - type_id = spirv_compiler_get_image_type_id(compiler, reg, &resource->range, + type_id = spirv_compiler_get_image_type_id(compiler, ®, range, resource_type_info, sampled_type, structure_stride || raw, 0); }
- var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, - &resource->range, resource_type, false, &var_info); + var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, + range, resource_type, false, &var_info);
if (is_uav) { - const struct vkd3d_shader_descriptor_info *d; + const struct vkd3d_shader_descriptor_info1 *d;
- d = spirv_compiler_get_descriptor_info(compiler, - VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, &resource->range); + d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range);
if (!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); @@ -5853,15 +5963,15 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp type_id = struct_id; }
- counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, - &resource->range, resource_type, true, &counter_var_info); + counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, + type_id, ®, range, resource_type, true, &counter_var_info); } }
- vkd3d_symbol_make_resource(&resource_symbol, reg); + vkd3d_symbol_make_resource(&resource_symbol, ®); resource_symbol.id = var_id; resource_symbol.descriptor_array = var_info.array_symbol; - resource_symbol.info.resource.range = resource->range; + resource_symbol.info.resource.range = *range; resource_symbol.info.resource.sampled_type = sampled_type; resource_symbol.info.resource.type_id = type_id; resource_symbol.info.resource.resource_type_info = resource_type_info; @@ -5874,58 +5984,6 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp spirv_compiler_put_symbol(compiler, &resource_symbol); }
-static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - const struct vkd3d_shader_semantic *semantic = &instruction->declaration.semantic; - enum vkd3d_shader_resource_type resource_type = semantic->resource_type; - uint32_t flags = instruction->flags; - - /* We don't distinguish between APPEND and COUNTER UAVs. */ - flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; - if (flags) - FIXME("Unhandled UAV flags %#x.\n", flags); - - if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && semantic->sample_count == 1) - resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; - else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && semantic->sample_count == 1) - resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; - - spirv_compiler_emit_resource_declaration(compiler, &semantic->resource, - resource_type, semantic->resource_data_type[0], 0, false); -} - -static void spirv_compiler_emit_dcl_resource_raw(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - const struct vkd3d_shader_raw_resource *resource = &instruction->declaration.raw_resource; - uint32_t flags = instruction->flags; - - /* We don't distinguish between APPEND and COUNTER UAVs. */ - flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; - if (flags) - FIXME("Unhandled UAV flags %#x.\n", flags); - - spirv_compiler_emit_resource_declaration(compiler, &resource->resource, - VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_DATA_UINT, 0, true); -} - -static void spirv_compiler_emit_dcl_resource_structured(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - const struct vkd3d_shader_structured_resource *resource = &instruction->declaration.structured_resource; - unsigned int stride = resource->byte_stride; - uint32_t flags = instruction->flags; - - /* We don't distinguish between APPEND and COUNTER UAVs. */ - flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; - if (flags) - FIXME("Unhandled UAV flags %#x.\n", flags); - - spirv_compiler_emit_resource_declaration(compiler, &resource->resource, - VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_DATA_UINT, stride / 4, false); -} - static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride) { @@ -6236,9 +6294,6 @@ static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler)
vkd3d_spirv_build_op_function_end(builder);
- compiler->temp_id = 0; - compiler->temp_count = 0; - if (is_in_control_point_phase(compiler)) { if (compiler->epilogue_function_id) @@ -6640,7 +6695,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, uint32_t components[VKD3D_VEC4_SIZE]; unsigned int i, component_count;
- if (src->reg.type == VKD3DSPR_IMMCONST || src->reg.type == VKD3DSPR_IMMCONST64 || dst->modifiers || src->modifiers) + if (register_is_constant_or_undef(&src->reg) || dst->modifiers || src->modifiers) goto general_implementation;
spirv_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info); @@ -7398,7 +7453,13 @@ static int spirv_compiler_emit_control_flow_instruction(struct spirv_compiler *c assert(compiler->control_flow_depth); assert(cf_info->current_block == VKD3D_BLOCK_SWITCH);
- assert(src->swizzle == VKD3D_SHADER_NO_SWIZZLE && src->reg.type == VKD3DSPR_IMMCONST); + if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X)) + { + WARN("Unexpected src swizzle %#x.\n", src->swizzle); + spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE, + "The swizzle for a switch case value is not scalar."); + } + assert(src->reg.type == VKD3DSPR_IMMCONST); value = *src->reg.u.immconst_uint;
if (!vkd3d_array_reserve((void **)&cf_info->u.switch_.case_blocks, &cf_info->u.switch_.case_blocks_size, @@ -9103,33 +9164,12 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_GLOBAL_FLAGS: spirv_compiler_emit_dcl_global_flags(compiler, instruction); break; - case VKD3DSIH_DCL_TEMPS: - spirv_compiler_emit_dcl_temps(compiler, instruction); - break; case VKD3DSIH_DCL_INDEXABLE_TEMP: spirv_compiler_emit_dcl_indexable_temp(compiler, instruction); break; - case VKD3DSIH_DCL_CONSTANT_BUFFER: - spirv_compiler_emit_dcl_constant_buffer(compiler, instruction); - break; case VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER: spirv_compiler_emit_dcl_immediate_constant_buffer(compiler, instruction); break; - case VKD3DSIH_DCL_SAMPLER: - spirv_compiler_emit_dcl_sampler(compiler, instruction); - break; - case VKD3DSIH_DCL: - case VKD3DSIH_DCL_UAV_TYPED: - spirv_compiler_emit_dcl_resource(compiler, instruction); - break; - case VKD3DSIH_DCL_RESOURCE_RAW: - case VKD3DSIH_DCL_UAV_RAW: - spirv_compiler_emit_dcl_resource_raw(compiler, instruction); - break; - case VKD3DSIH_DCL_RESOURCE_STRUCTURED: - case VKD3DSIH_DCL_UAV_STRUCTURED: - spirv_compiler_emit_dcl_resource_structured(compiler, instruction); - break; case VKD3DSIH_DCL_TGSM_RAW: spirv_compiler_emit_dcl_tgsm_raw(compiler, instruction); break; @@ -9425,7 +9465,16 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_CUT_STREAM: spirv_compiler_emit_cut_stream(compiler, instruction); break; + case VKD3DSIH_DCL: + case VKD3DSIH_DCL_CONSTANT_BUFFER: case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: + case VKD3DSIH_DCL_RESOURCE_RAW: + case VKD3DSIH_DCL_RESOURCE_STRUCTURED: + case VKD3DSIH_DCL_SAMPLER: + case VKD3DSIH_DCL_TEMPS: + case VKD3DSIH_DCL_UAV_RAW: + case VKD3DSIH_DCL_UAV_STRUCTURED: + case VKD3DSIH_DCL_UAV_TYPED: case VKD3DSIH_HS_DECLS: case VKD3DSIH_NOP: /* nothing to do */ @@ -9437,6 +9486,50 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, return ret; }
+static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler) +{ + unsigned int i; + + for (i = 0; i < compiler->scan_descriptor_info->descriptor_count; ++i) + { + const struct vkd3d_shader_descriptor_info1 *descriptor = &compiler->scan_descriptor_info->descriptors[i]; + struct vkd3d_shader_register_range range; + + range.first = descriptor->register_index; + if (descriptor->count == ~0u) + range.last = ~0u; + else + range.last = descriptor->register_index + descriptor->count - 1; + range.space = descriptor->register_space; + + switch (descriptor->type) + { + case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: + spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor->register_id); + break; + + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: + spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor->register_id, descriptor->buffer_size); + break; + + case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: + spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, + descriptor->sample_count, false, descriptor->resource_type, descriptor->resource_data_type, + descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); + break; + + case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: + spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, + descriptor->sample_count, true, descriptor->resource_type, descriptor->resource_data_type, + descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); + break; + + default: + vkd3d_unreachable(); + } + } +} + static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, struct vkd3d_shader_code *spirv) @@ -9444,28 +9537,31 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + struct vkd3d_shader_desc *shader_desc = &parser->shader_desc; struct vkd3d_shader_instruction_array instructions; enum vkd3d_result result = VKD3D_OK; unsigned int i;
+ if (parser->shader_desc.temp_count) + spirv_compiler_emit_temps(compiler, parser->shader_desc.temp_count); + + spirv_compiler_emit_descriptor_declarations(compiler); + compiler->location.column = 0; compiler->location.line = 1;
+ if ((result = vkd3d_shader_normalise(parser, compile_info)) < 0) + return result; + instructions = parser->instructions; memset(&parser->instructions, 0, sizeof(parser->instructions));
- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL - && (result = instruction_array_flatten_hull_shader_phases(&instructions)) >= 0) - { - result = instruction_array_normalise_hull_shader_control_point_io(&instructions, - &compiler->input_signature); - } - if (result >= 0) - result = instruction_array_normalise_io_registers(&instructions, parser->shader_version.type, - &compiler->input_signature, &compiler->output_signature, &compiler->patch_constant_signature); - - if (result >= 0 && TRACE_ON()) - vkd3d_shader_trace(&instructions, &parser->shader_version); + compiler->input_signature = shader_desc->input_signature; + compiler->output_signature = shader_desc->output_signature; + compiler->patch_constant_signature = shader_desc->patch_constant_signature; + memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); + memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); + memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature));
if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) spirv_compiler_emit_shader_signature_outputs(compiler); @@ -9541,7 +9637,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, }
int spirv_compile(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index d066b13ee4e..7949be150bf 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -430,6 +430,8 @@ enum vkd3d_sm4_register_type VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, + + VKD3D_SM4_REGISTER_TYPE_COUNT, };
enum vkd3d_sm4_extended_operand_type @@ -505,7 +507,7 @@ enum vkd3d_sm4_input_primitive_type
enum vkd3d_sm4_swizzle_type { - VKD3D_SM4_SWIZZLE_NONE = 0x0, + VKD3D_SM4_SWIZZLE_NONE = 0x0, /* swizzle bitfield contains a mask */ VKD3D_SM4_SWIZZLE_VEC4 = 0x1, VKD3D_SM4_SWIZZLE_SCALAR = 0x2, }; @@ -571,6 +573,12 @@ struct sm4_index_range_array struct sm4_index_range ranges[MAX_REG_OUTPUT * 2]; };
+struct vkd3d_sm4_lookup_tables +{ + const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT]; + const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT]; +}; + struct vkd3d_shader_sm4_parser { const uint32_t *start, *end, *ptr; @@ -587,6 +595,8 @@ struct vkd3d_shader_sm4_parser struct sm4_index_range_array output_index_ranges; struct sm4_index_range_array patch_constant_index_ranges;
+ struct vkd3d_sm4_lookup_tables lookup; + struct vkd3d_shader_parser p; };
@@ -697,6 +707,19 @@ static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; }
+static void shader_sm4_read_case_condition(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, + (struct vkd3d_shader_src_param *)&ins->src[0]); + if (ins->src[0].reg.type != VKD3DSPR_IMMCONST) + { + FIXME("Switch case value is not a 32-bit constant.\n"); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE, + "Switch case value is not a 32-bit immediate constant register."); + } +} + static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) { @@ -989,6 +1012,8 @@ static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *i uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) { ins->declaration.count = *tokens; + if (opcode == VKD3D_SM4_OP_DCL_TEMPS) + priv->p.shader_desc.temp_count = max(priv->p.shader_desc.temp_count, *tokens); }
static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1203,7 +1228,8 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] = {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, + {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u", + shader_sm4_read_case_condition}, {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", shader_sm4_read_conditional_op}, @@ -1466,50 +1492,10 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] = {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, };
-static const enum vkd3d_shader_register_type register_type_table[] = -{ - /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, - /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, - /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, - /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, - /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, - /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, - /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, - /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, - /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, - /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, - /* UNKNOWN */ ~0u, - /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, - /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, - /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, - /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, - /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, - /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, - /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, - /* UNKNOWN */ ~0u, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, - /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, - /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, - /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, - /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, - /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, - /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, - /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, - /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, - /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, - /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, - /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, - /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, - /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, - /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, - /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, - /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, - /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, +struct vkd3d_sm4_register_type_info +{ + enum vkd3d_sm4_register_type sm4_type; + enum vkd3d_shader_register_type vkd3d_type; };
static const enum vkd3d_shader_register_precision register_precision_table[] = @@ -1522,18 +1508,104 @@ static const enum vkd3d_shader_register_precision register_precision_table[] = /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, };
+struct tpf_writer +{ + struct hlsl_ctx *ctx; + struct vkd3d_bytecode_buffer *buffer; + struct vkd3d_sm4_lookup_tables lookup; +}; + static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) { unsigned int i;
for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) { - if (opcode == opcode_table[i].opcode) return &opcode_table[i]; + if (opcode == opcode_table[i].opcode) + return &opcode_table[i]; }
return NULL; }
+static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +{ + const struct vkd3d_sm4_register_type_info *info; + unsigned int i; + + static const struct vkd3d_sm4_register_type_info register_type_table[] = + { + {VKD3D_SM4_RT_TEMP, VKD3DSPR_TEMP}, + {VKD3D_SM4_RT_INPUT, VKD3DSPR_INPUT}, + {VKD3D_SM4_RT_OUTPUT, VKD3DSPR_OUTPUT}, + {VKD3D_SM4_RT_INDEXABLE_TEMP, VKD3DSPR_IDXTEMP}, + {VKD3D_SM4_RT_IMMCONST, VKD3DSPR_IMMCONST}, + {VKD3D_SM4_RT_IMMCONST64, VKD3DSPR_IMMCONST64}, + {VKD3D_SM4_RT_SAMPLER, VKD3DSPR_SAMPLER}, + {VKD3D_SM4_RT_RESOURCE, VKD3DSPR_RESOURCE}, + {VKD3D_SM4_RT_CONSTBUFFER, VKD3DSPR_CONSTBUFFER}, + {VKD3D_SM4_RT_IMMCONSTBUFFER, VKD3DSPR_IMMCONSTBUFFER}, + {VKD3D_SM4_RT_PRIMID, VKD3DSPR_PRIMID}, + {VKD3D_SM4_RT_DEPTHOUT, VKD3DSPR_DEPTHOUT}, + {VKD3D_SM4_RT_NULL, VKD3DSPR_NULL}, + {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER}, + {VKD3D_SM4_RT_OMASK, VKD3DSPR_SAMPLEMASK}, + {VKD3D_SM5_RT_STREAM, VKD3DSPR_STREAM}, + {VKD3D_SM5_RT_FUNCTION_BODY, VKD3DSPR_FUNCTIONBODY}, + {VKD3D_SM5_RT_FUNCTION_POINTER, VKD3DSPR_FUNCTIONPOINTER}, + {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID, VKD3DSPR_OUTPOINTID}, + {VKD3D_SM5_RT_FORK_INSTANCE_ID, VKD3DSPR_FORKINSTID}, + {VKD3D_SM5_RT_JOIN_INSTANCE_ID, VKD3DSPR_JOININSTID}, + {VKD3D_SM5_RT_INPUT_CONTROL_POINT, VKD3DSPR_INCONTROLPOINT}, + {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT, VKD3DSPR_OUTCONTROLPOINT}, + {VKD3D_SM5_RT_PATCH_CONSTANT_DATA, VKD3DSPR_PATCHCONST}, + {VKD3D_SM5_RT_DOMAIN_LOCATION, VKD3DSPR_TESSCOORD}, + {VKD3D_SM5_RT_UAV, VKD3DSPR_UAV}, + {VKD3D_SM5_RT_SHARED_MEMORY, VKD3DSPR_GROUPSHAREDMEM}, + {VKD3D_SM5_RT_THREAD_ID, VKD3DSPR_THREADID}, + {VKD3D_SM5_RT_THREAD_GROUP_ID, VKD3DSPR_THREADGROUPID}, + {VKD3D_SM5_RT_LOCAL_THREAD_ID, VKD3DSPR_LOCALTHREADID}, + {VKD3D_SM5_RT_COVERAGE, VKD3DSPR_COVERAGE}, + {VKD3D_SM5_RT_LOCAL_THREAD_INDEX, VKD3DSPR_LOCALTHREADINDEX}, + {VKD3D_SM5_RT_GS_INSTANCE_ID, VKD3DSPR_GSINSTID}, + {VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL, VKD3DSPR_DEPTHOUTGE}, + {VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL, VKD3DSPR_DEPTHOUTLE}, + {VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF}, + }; + + memset(lookup, 0, sizeof(*lookup)); + + for (i = 0; i < ARRAY_SIZE(register_type_table); ++i) + { + info = ®ister_type_table[i]; + lookup->register_type_info_from_sm4[info->sm4_type] = info; + lookup->register_type_info_from_vkd3d[info->vkd3d_type] = info; + } +} + +static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + tpf->ctx = ctx; + tpf->buffer = buffer; + init_sm4_lookup_tables(&tpf->lookup); +} + +static const struct vkd3d_sm4_register_type_info *get_info_from_sm4_register_type( + const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_register_type sm4_type) +{ + if (sm4_type >= VKD3D_SM4_REGISTER_TYPE_COUNT) + return NULL; + return lookup->register_type_info_from_sm4[sm4_type]; +} + +static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_type( + const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_register_type vkd3d_type) +{ + if (vkd3d_type >= VKD3DSPR_COUNT) + return NULL; + return lookup->register_type_info_from_vkd3d[vkd3d_type]; +} + static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) { switch (sm4->p.shader_version.type) @@ -1640,6 +1712,7 @@ static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_typ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) { + const struct vkd3d_sm4_register_type_info *register_type_info; enum vkd3d_sm4_register_precision precision; enum vkd3d_sm4_register_type register_type; enum vkd3d_sm4_extended_operand_type type; @@ -1654,15 +1727,15 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui token = *(*ptr)++;
register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; - if (register_type >= ARRAY_SIZE(register_type_table) - || register_type_table[register_type] == VKD3DSPR_INVALID) + register_type_info = get_info_from_sm4_register_type(&priv->lookup, register_type); + if (!register_type_info) { FIXME("Unhandled register type %#x.\n", register_type); param->type = VKD3DSPR_TEMP; } else { - param->type = register_type_table[register_type]; + param->type = register_type_info->vkd3d_type; } param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; param->non_uniform = false; @@ -1953,6 +2026,7 @@ static bool shader_sm4_validate_input_output_register(struct vkd3d_shader_sm4_pa static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) { + unsigned int dimension, mask; DWORD token;
if (*ptr >= end) @@ -1968,37 +2042,63 @@ static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, cons return false; }
- if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) + switch ((dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT)) { - src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; - } - else - { - enum vkd3d_sm4_swizzle_type swizzle_type = - (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + case VKD3D_SM4_DIMENSION_NONE: + case VKD3D_SM4_DIMENSION_SCALAR: + src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + break;
- switch (swizzle_type) + case VKD3D_SM4_DIMENSION_VEC4: { - case VKD3D_SM4_SWIZZLE_NONE: - if (shader_sm4_is_scalar_register(&src_param->reg)) - src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - else + enum vkd3d_sm4_swizzle_type swizzle_type = + (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + + switch (swizzle_type) + { + case VKD3D_SM4_SWIZZLE_NONE: src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; - break;
- case VKD3D_SM4_SWIZZLE_SCALAR: - src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; - src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; - break; + mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; + /* Mask seems only to be used for vec4 constants and is always zero. */ + if (!register_is_constant(&src_param->reg)) + { + FIXME("Source mask %#x is not for a constant.\n", mask); + vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK, + "Unhandled mask %#x for a non-constant source register.", mask); + } + else if (mask) + { + FIXME("Unhandled mask %#x.\n", mask); + vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK, + "Unhandled source register mask %#x.", mask); + }
- case VKD3D_SM4_SWIZZLE_VEC4: - src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); - break; + break;
- default: - FIXME("Unhandled swizzle type %#x.\n", swizzle_type); - break; + case VKD3D_SM4_SWIZZLE_SCALAR: + src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; + src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; + break; + + case VKD3D_SM4_SWIZZLE_VEC4: + src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); + break; + + default: + FIXME("Unhandled swizzle type %#x.\n", swizzle_type); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE, + "Source register swizzle type %#x is invalid.", swizzle_type); + break; + } + break; } + + default: + FIXME("Unhandled dimension %#x.\n", dimension); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION, + "Source register dimension %#x is invalid.", dimension); + break; }
if (register_is_input_output(&src_param->reg) && !shader_sm4_validate_input_output_register(priv, @@ -2011,7 +2111,9 @@ static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, cons static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) { + enum vkd3d_sm4_swizzle_type swizzle_type; enum vkd3d_shader_src_modifier modifier; + unsigned int dimension, swizzle; DWORD token;
if (*ptr >= end) @@ -2033,10 +2135,53 @@ static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, cons return false; }
- dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; + switch ((dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT)) + { + case VKD3D_SM4_DIMENSION_NONE: + dst_param->write_mask = 0; + break; + + case VKD3D_SM4_DIMENSION_SCALAR: + dst_param->write_mask = VKD3DSP_WRITEMASK_0; + break; + + case VKD3D_SM4_DIMENSION_VEC4: + swizzle_type = (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + switch (swizzle_type) + { + case VKD3D_SM4_SWIZZLE_NONE: + dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; + break; + + case VKD3D_SM4_SWIZZLE_VEC4: + swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); + if (swizzle != VKD3D_SHADER_NO_SWIZZLE) + { + FIXME("Unhandled swizzle %#x.\n", swizzle); + vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_SWIZZLE, + "Unhandled destination register swizzle %#x.", swizzle); + } + dst_param->write_mask = VKD3DSP_WRITEMASK_ALL; + break; + + default: + FIXME("Unhandled swizzle type %#x.\n", swizzle_type); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE, + "Destination register swizzle type %#x is invalid.", swizzle_type); + break; + } + break; + + default: + FIXME("Unhandled dimension %#x.\n", dimension); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION, + "Destination register dimension %#x is invalid.", dimension); + break; + } + if (data_type == VKD3D_DATA_DOUBLE) dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); - /* Scalar registers are declared with no write mask in shader bytecode. */ + /* Some scalar registers are declared with no write mask in shader bytecode. */ if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) dst_param->write_mask = VKD3DSP_WRITEMASK_0; dst_param->modifiers = 0; @@ -2362,6 +2507,8 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t sm4->output_map[e->register_index] = e->semantic_index; }
+ init_sm4_lookup_tables(&sm4->lookup); + return true; }
@@ -2442,6 +2589,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi }
shader_desc = &sm4->p.shader_desc; + shader_desc->is_dxil = false; if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, shader_desc)) < 0) { @@ -2499,7 +2647,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi return sm4->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; }
-static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); +static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block);
static bool type_is_integer(const struct hlsl_type *type) { @@ -2516,7 +2664,7 @@ static bool type_is_integer(const struct hlsl_type *type) }
bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) + bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) { unsigned int i;
@@ -2526,24 +2674,24 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem bool output; enum vkd3d_shader_type shader_type; enum vkd3d_sm4_swizzle_type swizzle_type; - enum vkd3d_sm4_register_type type; + enum vkd3d_shader_register_type type; bool has_idx; } register_table[] = { - {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, - {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, - {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, + {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADID, false}, + {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADGROUPID, false}, + {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_LOCALTHREADID, false},
- {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3DSPR_PRIMID, false},
/* Put sv_target in this table, instead of letting it fall through to * default varying allocation, so that the register index matches the * usage index. */ - {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, + {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, };
for (i = 0; i < ARRAY_SIZE(register_table); ++i) @@ -2552,7 +2700,8 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem && output == register_table[i].output && ctx->profile->type == register_table[i].shader_type) { - *type = register_table[i].type; + if (type) + *type = register_table[i].type; if (swizzle_type) *swizzle_type = register_table[i].swizzle_type; *has_idx = register_table[i].has_idx; @@ -2624,7 +2773,8 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant return true; }
-static void add_section(struct dxbc_writer *dxbc, uint32_t tag, struct vkd3d_bytecode_buffer *buffer) +static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + uint32_t tag, struct vkd3d_bytecode_buffer *buffer) { /* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN * sections to be aligned. Without this, the sections themselves will be @@ -2632,6 +2782,9 @@ static void add_section(struct dxbc_writer *dxbc, uint32_t tag, struct vkd3d_byt size_t size = bytecode_align(buffer);
dxbc_writer_add_section(dxbc, tag, buffer->data, size); + + if (buffer->status < 0) + ctx->result = buffer->status; }
static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) @@ -2649,7 +2802,6 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; - enum vkd3d_sm4_register_type type; uint32_t usage_idx, reg_idx; D3D_NAME usage; bool has_idx; @@ -2663,14 +2815,13 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, continue; usage_idx = var->semantic.index;
- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, NULL, &has_idx)) { reg_idx = has_idx ? var->semantic.index : ~0u; } else { assert(var->regs[HLSL_REGSET_NUMERIC].allocated); - type = VKD3D_SM4_RT_INPUT; reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; }
@@ -2739,7 +2890,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc,
set_u32(&buffer, count_position, i);
- add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); + add_section(ctx, dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); }
static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) @@ -2827,6 +2978,22 @@ static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) return D3D_SVT_VERTEXSHADER; case HLSL_TYPE_VOID: return D3D_SVT_VOID; + case HLSL_TYPE_UAV: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3D_SVT_RWTEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3D_SVT_RWTEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3D_SVT_RWTEXTURE3D; + case HLSL_SAMPLER_DIM_1DARRAY: + return D3D_SVT_RWTEXTURE1DARRAY; + case HLSL_SAMPLER_DIM_2DARRAY: + return D3D_SVT_RWTEXTURE2DARRAY; + default: + vkd3d_unreachable(); + } default: vkd3d_unreachable(); } @@ -2967,47 +3134,154 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ } }
+struct extern_resource +{ + /* var is only not NULL if this resource is a whole variable, so it may be responsible for more + * than one component. */ + const struct hlsl_ir_var *var; + + char *name; + struct hlsl_type *data_type; + bool is_user_packed; + + enum hlsl_regset regset; + unsigned int id, bind_count; +}; + static int sm4_compare_extern_resources(const void *a, const void *b) { - const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; - const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; - enum hlsl_regset aa_regset, bb_regset; + const struct extern_resource *aa = (const struct extern_resource *)a; + const struct extern_resource *bb = (const struct extern_resource *)b; + int r;
- aa_regset = hlsl_type_get_regset(aa->data_type); - bb_regset = hlsl_type_get_regset(bb->data_type); + if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) + return r;
- if (aa_regset != bb_regset) - return aa_regset - bb_regset; + return vkd3d_u32_compare(aa->id, bb->id); +} + +static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) +{ + unsigned int i;
- return aa->regs[aa_regset].id - bb->regs[bb_regset].id; + for (i = 0; i < count; ++i) + vkd3d_free(extern_resources[i].name); + vkd3d_free(extern_resources); +} + +static const char *string_skip_tag(const char *string) +{ + if (!strncmp(string, "<resource>", strlen("<resource>"))) + return string + strlen("<resource>"); + return string; }
-static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) +static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) { - const struct hlsl_ir_var **extern_resources = NULL; + bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; + struct extern_resource *extern_resources = NULL; const struct hlsl_ir_var *var; enum hlsl_regset regset; size_t capacity = 0; + char *name;
*count = 0;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!hlsl_type_is_resource(var->data_type)) - continue; - regset = hlsl_type_get_regset(var->data_type); - if (!var->regs[regset].allocated) - continue; - - if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, - sizeof(*extern_resources)))) + if (separate_components) { - *count = 0; - return NULL; + unsigned int component_count = hlsl_type_component_count(var->data_type); + unsigned int k, regset_offset; + + for (k = 0; k < component_count; ++k) + { + struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); + struct vkd3d_string_buffer *name_buffer; + + if (!hlsl_type_is_resource(component_type)) + continue; + + regset = hlsl_type_get_regset(component_type); + regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, regset, k); + + if (regset_offset > var->regs[regset].allocation_size) + continue; + + if (var->objects_usage[regset][regset_offset].used) + { + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, + sizeof(*extern_resources)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + if (!(name_buffer = hlsl_component_to_string(ctx, var, k))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + hlsl_release_string_buffer(ctx, name_buffer); + return NULL; + } + hlsl_release_string_buffer(ctx, name_buffer); + + extern_resources[*count].var = NULL; + + extern_resources[*count].name = name; + extern_resources[*count].data_type = component_type; + extern_resources[*count].is_user_packed = false; + + extern_resources[*count].regset = regset; + extern_resources[*count].id = var->regs[regset].id + regset_offset; + extern_resources[*count].bind_count = 1; + + ++*count; + } + } } + else + { + if (!hlsl_type_is_resource(var->data_type)) + continue; + regset = hlsl_type_get_regset(var->data_type); + if (!var->regs[regset].allocated) + continue; + + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, + sizeof(*extern_resources)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + }
- extern_resources[*count] = var; - ++*count; + if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + extern_resources[*count].var = var; + + extern_resources[*count].name = name; + extern_resources[*count].data_type = var->data_type; + extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; + + extern_resources[*count].regset = regset; + extern_resources[*count].id = var->regs[regset].id; + extern_resources[*count].bind_count = var->bind_count[regset]; + + ++*count; + } }
qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); @@ -3020,8 +3294,8 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) size_t cbuffers_offset, resources_offset, creator_offset, string_offset; size_t cbuffer_position, resource_position, creator_position; const struct hlsl_profile_info *profile = ctx->profile; - const struct hlsl_ir_var **extern_resources; struct vkd3d_bytecode_buffer buffer = {0}; + struct extern_resource *extern_resources; const struct hlsl_buffer *cbuffer; const struct hlsl_ir_var *var;
@@ -3075,18 +3349,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
for (i = 0; i < extern_resources_count; ++i) { - enum hlsl_regset regset; + const struct extern_resource *resource = &extern_resources[i]; uint32_t flags = 0;
- var = extern_resources[i]; - regset = hlsl_type_get_regset(var->data_type); - - if (var->reg_reservation.reg_type) + if (resource->is_user_packed) flags |= D3D_SIF_USERPACKED;
put_u32(&buffer, 0); /* name */ - put_u32(&buffer, sm4_resource_type(var->data_type)); - if (regset == HLSL_REGSET_SAMPLERS) + put_u32(&buffer, sm4_resource_type(resource->data_type)); + if (resource->regset == HLSL_REGSET_SAMPLERS) { put_u32(&buffer, 0); put_u32(&buffer, 0); @@ -3094,15 +3365,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) } else { - unsigned int dimx = hlsl_type_get_component_type(ctx, var->data_type, 0)->e.resource_format->dimx; + unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource_format->dimx;
- put_u32(&buffer, sm4_resource_format(var->data_type)); - put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); + put_u32(&buffer, sm4_resource_format(resource->data_type)); + put_u32(&buffer, sm4_rdef_resource_dimension(resource->data_type)); put_u32(&buffer, ~0u); /* FIXME: multisample count */ flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; } - put_u32(&buffer, var->regs[regset].id); - put_u32(&buffer, var->regs[regset].bind_count); + put_u32(&buffer, resource->id); + put_u32(&buffer, resource->bind_count); put_u32(&buffer, flags); }
@@ -3128,9 +3399,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
for (i = 0; i < extern_resources_count; ++i) { - var = extern_resources[i]; + const struct extern_resource *resource = &extern_resources[i];
- string_offset = put_string(&buffer, var->name); + string_offset = put_string(&buffer, resource->name); set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); }
@@ -3234,9 +3505,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); set_u32(&buffer, creator_position, creator_offset);
- add_section(dxbc, TAG_RDEF, &buffer); + add_section(ctx, dxbc, TAG_RDEF, &buffer);
- vkd3d_free(extern_resources); + sm4_free_extern_resources(extern_resources, extern_resources_count); }
static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) @@ -3308,8 +3579,8 @@ static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_mod
struct sm4_register { - enum vkd3d_sm4_register_type type; - uint32_t idx[2]; + enum vkd3d_shader_register_type type; + struct vkd3d_shader_register_index idx[2]; unsigned int idx_count; enum vkd3d_sm4_dimension dim; uint32_t immconst_uint[4]; @@ -3346,8 +3617,9 @@ struct sm4_instruction
static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, - const struct hlsl_deref *deref, const struct hlsl_type *data_type) + const struct hlsl_deref *deref) { + const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); const struct hlsl_ir_var *var = deref->var;
if (var->is_uniform) @@ -3356,37 +3628,37 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r
if (regset == HLSL_REGSET_TEXTURES) { - reg->type = VKD3D_SM4_RT_RESOURCE; + reg->type = VKD3DSPR_RESOURCE; reg->dim = VKD3D_SM4_DIMENSION_VEC4; if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; - reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); - assert(deref->offset_regset == HLSL_REGSET_TEXTURES); + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + assert(regset == HLSL_REGSET_TEXTURES); reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } else if (regset == HLSL_REGSET_UAVS) { - reg->type = VKD3D_SM5_RT_UAV; + reg->type = VKD3DSPR_UAV; reg->dim = VKD3D_SM4_DIMENSION_VEC4; if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; - reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); - assert(deref->offset_regset == HLSL_REGSET_UAVS); + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + assert(regset == HLSL_REGSET_UAVS); reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } else if (regset == HLSL_REGSET_SAMPLERS) { - reg->type = VKD3D_SM4_RT_SAMPLER; + reg->type = VKD3DSPR_SAMPLER; reg->dim = VKD3D_SM4_DIMENSION_NONE; if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; - reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); - assert(deref->offset_regset == HLSL_REGSET_SAMPLERS); + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + assert(regset == HLSL_REGSET_SAMPLERS); reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } @@ -3395,12 +3667,12 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset;
assert(data_type->class <= HLSL_CLASS_VECTOR); - reg->type = VKD3D_SM4_RT_CONSTBUFFER; + reg->type = VKD3DSPR_CONSTBUFFER; reg->dim = VKD3D_SM4_DIMENSION_VEC4; if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->buffer->reg.id; - reg->idx[1] = offset / 4; + reg->idx[0].offset = var->buffer->reg.id; + reg->idx[1].offset = offset / 4; reg->idx_count = 2; *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); } @@ -3415,7 +3687,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r
if (has_idx) { - reg->idx[0] = var->semantic.index + offset / 4; + reg->idx[0].offset = var->semantic.index + offset / 4; reg->idx_count = 1; }
@@ -3427,11 +3699,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
assert(hlsl_reg.allocated); - reg->type = VKD3D_SM4_RT_INPUT; + reg->type = VKD3DSPR_INPUT; reg->dim = VKD3D_SM4_DIMENSION_VEC4; if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = hlsl_reg.id; + reg->idx[0].offset = hlsl_reg.id; reg->idx_count = 1; *writemask = hlsl_reg.writemask; } @@ -3446,11 +3718,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r
if (has_idx) { - reg->idx[0] = var->semantic.index + offset / 4; + reg->idx[0].offset = var->semantic.index + offset / 4; reg->idx_count = 1; }
- if (reg->type == VKD3D_SM4_RT_DEPTHOUT) + if (reg->type == VKD3DSPR_DEPTHOUT) reg->dim = VKD3D_SM4_DIMENSION_SCALAR; else reg->dim = VKD3D_SM4_DIMENSION_VEC4; @@ -3461,9 +3733,9 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
assert(hlsl_reg.allocated); - reg->type = VKD3D_SM4_RT_OUTPUT; + reg->type = VKD3DSPR_OUTPUT; reg->dim = VKD3D_SM4_DIMENSION_VEC4; - reg->idx[0] = hlsl_reg.id; + reg->idx[0].offset = hlsl_reg.id; reg->idx_count = 1; *writemask = hlsl_reg.writemask; } @@ -3473,22 +3745,22 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
assert(hlsl_reg.allocated); - reg->type = VKD3D_SM4_RT_TEMP; + reg->type = VKD3DSPR_TEMP; reg->dim = VKD3D_SM4_DIMENSION_VEC4; if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = hlsl_reg.id; + reg->idx[0].offset = hlsl_reg.id; reg->idx_count = 1; *writemask = hlsl_reg.writemask; } }
static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, - const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) + const struct hlsl_deref *deref, unsigned int map_writemask) { unsigned int writemask;
- sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); + sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref); if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); } @@ -3497,10 +3769,10 @@ static void sm4_register_from_node(struct sm4_register *reg, unsigned int *write enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) { assert(instr->reg.allocated); - reg->type = VKD3D_SM4_RT_TEMP; + reg->type = VKD3DSPR_TEMP; reg->dim = VKD3D_SM4_DIMENSION_VEC4; *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = instr->reg.id; + reg->idx[0].offset = instr->reg.id; reg->idx_count = 1; *writemask = instr->reg.writemask; } @@ -3516,7 +3788,7 @@ static void sm4_src_from_constant_value(struct sm4_src_register *src, const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) { src->swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - src->reg.type = VKD3D_SM4_RT_IMMCONST; + src->reg.type = VKD3DSPR_IMMCONST; if (width == 1) { src->reg.dim = VKD3D_SM4_DIMENSION_SCALAR; @@ -3529,8 +3801,10 @@ static void sm4_src_from_constant_value(struct sm4_src_register *src, src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; for (i = 0; i < 4; ++i) { - if (map_writemask & (1u << i)) + if ((map_writemask & (1u << i)) && (j < width)) src->reg.immconst_uint[i] = value->u[j++].u; + else + src->reg.immconst_uint[i] = 0; } } } @@ -3553,17 +3827,100 @@ static void sm4_src_from_node(struct sm4_src_register *src, src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); }
-static uint32_t sm4_encode_register(const struct sm4_register *reg) +static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct sm4_dst_register *dst) { - return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) - | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) - | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); + const struct vkd3d_sm4_register_type_info *register_type_info; + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + uint32_t sm4_reg_type, reg_dim; + uint32_t token = 0; + unsigned int j; + + register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, dst->reg.type); + if (!register_type_info) + { + FIXME("Unhandled vkd3d-shader register type %#x.\n", dst->reg.type); + sm4_reg_type = VKD3D_SM4_RT_TEMP; + } + else + { + sm4_reg_type = register_type_info->sm4_type; + } + + reg_dim = dst->reg.dim; + + token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; + token |= dst->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; + token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; + if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) + token |= dst->writemask << VKD3D_SM4_WRITEMASK_SHIFT; + put_u32(buffer, token); + + for (j = 0; j < dst->reg.idx_count; ++j) + { + put_u32(buffer, dst->reg.idx[j].offset); + assert(!dst->reg.idx[j].rel_addr); + } +} + +static void sm4_write_src_register(const struct tpf_writer *tpf, const struct sm4_src_register *src) +{ + const struct vkd3d_sm4_register_type_info *register_type_info; + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + uint32_t sm4_reg_type, reg_dim; + uint32_t token = 0; + unsigned int j; + + register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, src->reg.type); + if (!register_type_info) + { + FIXME("Unhandled vkd3d-shader register type %#x.\n", src->reg.type); + sm4_reg_type = VKD3D_SM4_RT_TEMP; + } + else + { + sm4_reg_type = register_type_info->sm4_type; + } + + reg_dim = src->reg.dim; + + token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; + token |= src->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; + token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; + if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) + { + token |= (uint32_t)src->swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + token |= src->swizzle << VKD3D_SM4_SWIZZLE_SHIFT; + } + if (src->reg.mod) + token |= VKD3D_SM4_EXTENDED_OPERAND; + put_u32(buffer, token); + + if (src->reg.mod) + put_u32(buffer, (src->reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) + | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); + + for (j = 0; j < src->reg.idx_count; ++j) + { + put_u32(buffer, src->reg.idx[j].offset); + assert(!src->reg.idx[j].rel_addr); + } + + if (src->reg.type == VKD3DSPR_IMMCONST) + { + put_u32(buffer, src->reg.immconst_uint[0]); + if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) + { + put_u32(buffer, src->reg.immconst_uint[1]); + put_u32(buffer, src->reg.immconst_uint[2]); + put_u32(buffer, src->reg.immconst_uint[3]); + } + } }
static uint32_t sm4_register_order(const struct sm4_register *reg) { uint32_t order = 1; - if (reg->type == VKD3D_SM4_RT_IMMCONST) + if (reg->type == VKD3DSPR_IMMCONST) order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; order += reg->idx_count; if (reg->mod) @@ -3571,8 +3928,9 @@ static uint32_t sm4_register_order(const struct sm4_register *reg) return order; }
-static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) +static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) { + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; uint32_t token = instr->opcode; unsigned int size = 1, i, j;
@@ -3600,43 +3958,10 @@ static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const st }
for (i = 0; i < instr->dst_count; ++i) - { - token = sm4_encode_register(&instr->dsts[i].reg); - if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) - token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; - put_u32(buffer, token); - - for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) - put_u32(buffer, instr->dsts[i].reg.idx[j]); - } + sm4_write_dst_register(tpf, &instr->dsts[i]);
for (i = 0; i < instr->src_count; ++i) - { - token = sm4_encode_register(&instr->srcs[i].reg); - token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; - token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; - if (instr->srcs[i].reg.mod) - token |= VKD3D_SM4_EXTENDED_OPERAND; - put_u32(buffer, token); - - if (instr->srcs[i].reg.mod) - put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) - | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); - - for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) - put_u32(buffer, instr->srcs[i].reg.idx[j]); - - if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) - { - put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); - if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) - { - put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); - put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); - put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); - } - } - } + sm4_write_src_register(tpf, &instr->srcs[i]);
if (instr->byte_stride) put_u32(buffer, instr->byte_stride); @@ -3672,67 +3997,75 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, return true; }
-static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) +static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) { const struct sm4_instruction instr = { .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER,
.srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, - .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, - .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, + .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, + .srcs[0].reg.idx[0].offset = cbuffer->reg.id, + .srcs[0].reg.idx[1].offset = (cbuffer->used_size + 3) / 4, .srcs[0].reg.idx_count = 2, .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), .src_count = 1, }; - write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) { - unsigned int i, count = var->data_type->reg_size[HLSL_REGSET_SAMPLERS]; + struct hlsl_type *component_type; + unsigned int i; struct sm4_instruction instr = { .opcode = VKD3D_SM4_OP_DCL_SAMPLER,
- .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, + .dsts[0].reg.type = VKD3DSPR_SAMPLER, .dsts[0].reg.idx_count = 1, .dst_count = 1, };
- if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) + component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); + + if (component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) instr.opcode |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT;
- for (i = 0; i < count; ++i) + assert(resource->regset == HLSL_REGSET_SAMPLERS); + + for (i = 0; i < resource->bind_count; ++i) { - if (!var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) continue;
- instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id + i; - write_sm4_instruction(buffer, &instr); + instr.dsts[0].reg.idx[0].offset = resource->id + i; + write_sm4_instruction(tpf, &instr); } }
-static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_var *var, bool uav) +static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct extern_resource *resource, + bool uav) { enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; - unsigned int i, count = var->data_type->reg_size[regset]; struct hlsl_type *component_type; struct sm4_instruction instr; + unsigned int i;
- component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); + assert(resource->regset == regset);
- for (i = 0; i < count; ++i) + component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); + + for (i = 0; i < resource->bind_count; ++i) { - if (!var->objects_usage[regset][i].used) + if (resource->var && !resource->var->objects_usage[regset][i].used) continue;
instr = (struct sm4_instruction) { - .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, - .dsts[0].reg.idx = {var->regs[regset].id + i}, + .dsts[0].reg.type = uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, + .dsts[0].reg.idx[0].offset = resource->id + i, .dsts[0].reg.idx_count = 1, .dst_count = 1,
@@ -3742,11 +4075,11 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b
if (uav) { - switch (var->data_type->sampler_dim) + switch (resource->data_type->sampler_dim) { case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; - instr.byte_stride = var->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; + instr.byte_stride = resource->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; break; default: instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; @@ -3765,13 +4098,13 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b instr.opcode |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; }
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); } }
-static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var) { - const struct hlsl_profile_info *profile = ctx->profile; + const struct hlsl_profile_info *profile = tpf->ctx->profile; const bool output = var->is_output_semantic; D3D_NAME usage; bool has_idx; @@ -3782,11 +4115,11 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b .dst_count = 1, };
- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) + if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) { if (has_idx) { - instr.dsts[0].reg.idx[0] = var->semantic.index; + instr.dsts[0].reg.idx[0].offset = var->semantic.index; instr.dsts[0].reg.idx_count = 1; } else @@ -3797,16 +4130,16 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b } else { - instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; - instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; + instr.dsts[0].reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; + instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; instr.dsts[0].reg.idx_count = 1; instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; }
- if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) + if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR;
- hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + hlsl_sm4_usage_from_semantic(tpf->ctx, &var->semantic, output, &usage); if (usage == ~0u) usage = D3D_NAME_UNDEFINED;
@@ -3866,10 +4199,10 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b break; }
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) +static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count) { struct sm4_instruction instr = { @@ -3879,33 +4212,35 @@ static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t t .idx_count = 1, };
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) +static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) { struct sm4_instruction instr = { .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP,
- .idx = {thread_count[0], thread_count[1], thread_count[2]}, + .idx[0] = thread_count[0], + .idx[1] = thread_count[1], + .idx[2] = thread_count[2], .idx_count = 3, };
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) +static void write_sm4_ret(const struct tpf_writer *tpf) { struct sm4_instruction instr = { .opcode = VKD3D_SM4_OP_RET, };
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, +static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) { struct sm4_instruction instr; @@ -3920,12 +4255,11 @@ static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_ instr.srcs[0].reg.mod = src_mod; instr.src_count = 1;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, - enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, - const struct hlsl_ir_node *src) +static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, unsigned dst_idx, const struct hlsl_ir_node *src) { struct sm4_instruction instr;
@@ -3935,7 +4269,7 @@ static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffe assert(dst_idx < ARRAY_SIZE(instr.dsts)); sm4_dst_from_node(&instr.dsts[dst_idx], dst); assert(1 - dst_idx >= 0); - instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; + instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; instr.dsts[1 - dst_idx].reg.idx_count = 0; instr.dst_count = 2; @@ -3943,10 +4277,10 @@ static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffe sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); instr.src_count = 1;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, +static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) { struct sm4_instruction instr; @@ -3961,11 +4295,11 @@ static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); instr.src_count = 2;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
/* dp# instructions don't map the swizzle. */ -static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, +static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) { struct sm4_instruction instr; @@ -3980,10 +4314,10 @@ static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum v sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); instr.src_count = 2;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, +static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) { @@ -3995,7 +4329,7 @@ static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buff assert(dst_idx < ARRAY_SIZE(instr.dsts)); sm4_dst_from_node(&instr.dsts[dst_idx], dst); assert(1 - dst_idx >= 0); - instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; + instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; instr.dsts[1 - dst_idx].reg.idx_count = 0; instr.dst_count = 2; @@ -4004,15 +4338,15 @@ static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buff sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); instr.src_count = 2;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, +static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, enum hlsl_sampler_dim dim) { + const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); @@ -4029,7 +4363,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf { if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) { - hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, "Offset must resolve to integer literal in the range -8 to 7."); return; } @@ -4052,7 +4386,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf
sm4_src_from_node(&instr.srcs[0], coords, coords_writemask);
- sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); + sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask);
instr.src_count = 2;
@@ -4067,13 +4401,13 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf
memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); instr.srcs[2].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - reg->type = VKD3D_SM4_RT_IMMCONST; + reg->type = VKD3DSPR_IMMCONST; reg->dim = VKD3D_SM4_DIMENSION_SCALAR; reg->immconst_uint[0] = index->value.u[0].u; } - else if (ctx->profile->major_version == 4 && ctx->profile->minor_version == 0) + else if (tpf->ctx->profile->major_version == 4 && tpf->ctx->profile->minor_version == 0) { - hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); + hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); } else { @@ -4083,13 +4417,11 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf ++instr.src_count; }
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_resource_load *load) +static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) { - const struct hlsl_type *resource_type = load->resource.var->data_type; const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *coords = load->coords.node; const struct hlsl_deref *resource = &load->resource; @@ -4132,7 +4464,7 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer { if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) { - hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, "Offset must resolve to integer literal in the range -8 to 7."); return; } @@ -4142,8 +4474,8 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer instr.dst_count = 1;
sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); - sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); + sm4_src_from_deref(tpf->ctx, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); instr.src_count = 3;
if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD @@ -4165,7 +4497,52 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer ++instr.src_count; }
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); +} + +static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *dst = &load->node; + struct sm4_instruction instr; + + assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; + if (dst->data_type->base_type == HLSL_TYPE_UINT) + instr.opcode |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_deref(tpf->ctx, &instr.srcs[0], resource, instr.dsts[0].writemask); + instr.src_count = 1; + + write_sm4_instruction(tpf, &instr); +} + +static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *dst = &load->node; + struct sm4_instruction instr; + + assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_RESINFO; + if (dst->data_type->base_type == HLSL_TYPE_UINT) + instr.opcode |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); + instr.src_count = 2; + + write_sm4_instruction(tpf, &instr); }
static bool type_is_float(const struct hlsl_type *type) @@ -4173,8 +4550,7 @@ static bool type_is_float(const struct hlsl_type *type) return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; }
-static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, +static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, const struct hlsl_ir_node *arg, uint32_t mask) { struct sm4_instruction instr; @@ -4187,16 +4563,15 @@ static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx,
sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; + instr.srcs[1].reg.type = VKD3DSPR_IMMCONST; instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; instr.srcs[1].reg.immconst_uint[0] = mask; instr.src_count = 2;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_cast(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) +static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) { static const union { @@ -4218,23 +4593,23 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, { case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); break;
case HLSL_TYPE_INT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); break;
case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); break;
case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); + write_sm4_cast_from_bool(tpf, expr, arg1, one.u); break;
case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to float."); break;
default: @@ -4247,20 +4622,20 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, { case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); break;
case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); + write_sm4_cast_from_bool(tpf, expr, arg1, 1); break;
case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to int."); break;
default: @@ -4273,20 +4648,20 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, { case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); break;
case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); + write_sm4_cast_from_bool(tpf, expr, arg1, 1); break;
case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to uint."); break;
default: @@ -4295,7 +4670,7 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, break;
case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast to double."); break;
case HLSL_TYPE_BOOL: @@ -4305,26 +4680,25 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, } }
-static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) +static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, + const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) { struct sm4_instruction instr;
memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED;
- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); + sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst); instr.dst_count = 1;
sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); instr.src_count = 2;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_expr(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) +static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) { const struct hlsl_ir_node *arg1 = expr->operands[0].node; const struct hlsl_ir_node *arg2 = expr->operands[1].node; @@ -4333,7 +4707,7 @@ static void write_sm4_expr(struct hlsl_ctx *ctx,
assert(expr->node.reg.allocated);
- if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) + if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) return;
switch (expr->op) @@ -4342,161 +4716,181 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); } break;
case HLSL_OP1_BIT_NOT: assert(type_is_integer(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); break;
case HLSL_OP1_CAST: - write_sm4_cast(ctx, buffer, expr); + write_sm4_cast(tpf, expr); break;
case HLSL_OP1_COS: assert(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); + write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); break;
case HLSL_OP1_DSX: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSX_COARSE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSX_FINE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); break;
case HLSL_OP1_DSY: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY_COARSE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY_FINE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); break;
case HLSL_OP1_EXP2: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); break;
case HLSL_OP1_FLOOR: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); break;
case HLSL_OP1_FRACT: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); break;
case HLSL_OP1_LOG2: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); break;
case HLSL_OP1_LOGIC_NOT: assert(dst_type->base_type == HLSL_TYPE_BOOL); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); break;
case HLSL_OP1_NEG: switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); } break;
case HLSL_OP1_REINTERPRET: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); break;
case HLSL_OP1_ROUND: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); break;
case HLSL_OP1_RSQ: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); break;
case HLSL_OP1_SAT: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), &expr->node, arg1, 0); break;
case HLSL_OP1_SIN: assert(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); + write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); break;
case HLSL_OP1_SQRT: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); break;
case HLSL_OP1_TRUNC: assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); break;
case HLSL_OP2_ADD: switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); } break;
case HLSL_OP2_BIT_AND: assert(type_is_integer(dst_type)); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); break;
case HLSL_OP2_BIT_OR: assert(type_is_integer(dst_type)); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); break;
case HLSL_OP2_BIT_XOR: assert(type_is_integer(dst_type)); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); break;
case HLSL_OP2_DIV: switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); break;
case HLSL_TYPE_UINT: - write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); + write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); } break;
@@ -4507,15 +4901,15 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (arg1->data_type->dimx) { case 4: - write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); + write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); break;
case 3: - write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); + write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); break;
case 2: - write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); + write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); break;
case 1: @@ -4525,7 +4919,7 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); } break;
@@ -4538,18 +4932,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (src_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); break;
case HLSL_TYPE_BOOL: case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between "%s" operands.", - debug_hlsl_type(ctx, src_type)); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 equality between "%s" operands.", + debug_hlsl_type(tpf->ctx, src_type)); break; } break; @@ -4564,21 +4958,21 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (src_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); break;
case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); break;
case HLSL_TYPE_BOOL: case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between "%s" operands.", - debug_hlsl_type(ctx, src_type)); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 greater-than-or-equal between "%s" operands.", + debug_hlsl_type(tpf->ctx, src_type)); break; } break; @@ -4593,21 +4987,21 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (src_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); break;
case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); break;
case HLSL_TYPE_BOOL: case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between "%s" operands.", - debug_hlsl_type(ctx, src_type)); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 less-than between "%s" operands.", + debug_hlsl_type(tpf->ctx, src_type)); break; } break; @@ -4615,37 +5009,37 @@ static void write_sm4_expr(struct hlsl_ctx *ctx,
case HLSL_OP2_LOGIC_AND: assert(dst_type->base_type == HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); break;
case HLSL_OP2_LOGIC_OR: assert(dst_type->base_type == HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); break;
case HLSL_OP2_LSHIFT: assert(type_is_integer(dst_type)); assert(dst_type->base_type != HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); break;
case HLSL_OP2_MAX: switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); break;
case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); break;
case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); } break;
@@ -4653,19 +5047,19 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); break;
case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); break;
case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); } break;
@@ -4673,11 +5067,11 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (dst_type->base_type) { case HLSL_TYPE_UINT: - write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); + write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); } break;
@@ -4685,18 +5079,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); break;
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: /* Using IMUL instead of UMUL because we're taking the low * bits, and the native compiler generates IMUL. */ - write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); + write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); } break;
@@ -4709,18 +5103,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, switch (src_type->base_type) { case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); break;
case HLSL_TYPE_BOOL: case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between "%s" operands.", - debug_hlsl_type(ctx, src_type)); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 inequality between "%s" operands.", + debug_hlsl_type(tpf->ctx, src_type)); break; } break; @@ -4729,18 +5123,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, case HLSL_OP2_RSHIFT: assert(type_is_integer(dst_type)); assert(dst_type->base_type != HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, + write_sm4_binary_op(tpf, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, &expr->node, arg1, arg2); break;
default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); }
- hlsl_release_string_buffer(ctx, dst_type_string); + hlsl_release_string_buffer(tpf->ctx, dst_type_string); }
-static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) +static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff) { struct sm4_instruction instr = { @@ -4751,26 +5145,25 @@ static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf assert(iff->condition.node->data_type->dimx == 1);
sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); - write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr);
- write_sm4_block(ctx, buffer, &iff->then_block); + write_sm4_block(tpf, &iff->then_block);
if (!list_empty(&iff->else_block.instrs)) { instr.opcode = VKD3D_SM4_OP_ELSE; instr.src_count = 0; - write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr);
- write_sm4_block(ctx, buffer, &iff->else_block); + write_sm4_block(tpf, &iff->else_block); }
instr.opcode = VKD3D_SM4_OP_ENDIF; instr.src_count = 0; - write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_jump(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) +static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_jump *jump) { struct sm4_instruction instr = {0};
@@ -4780,19 +5173,13 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, instr.opcode = VKD3D_SM4_OP_BREAK; break;
- case HLSL_IR_JUMP_DISCARD: + case HLSL_IR_JUMP_DISCARD_NZ: { - struct sm4_register *reg = &instr.srcs[0].reg; - instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ;
memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); - instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; instr.src_count = 1; - reg->type = VKD3D_SM4_RT_IMMCONST; - reg->dim = VKD3D_SM4_DIMENSION_SCALAR; - reg->immconst_uint[0] = ~0u; - + sm4_src_from_node(&instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); break; }
@@ -4800,11 +5187,11 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, vkd3d_unreachable();
default: - hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); + hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); return; }
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
/* Does this variable's data come directly from the API user, rather than being @@ -4818,8 +5205,7 @@ static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *va return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; }
-static void write_sm4_load(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) +static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) { const struct hlsl_type *type = load->node.data_type; struct sm4_instruction instr; @@ -4830,7 +5216,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx, instr.dst_count = 1;
assert(type->class <= HLSL_CLASS_LAST_NUMERIC); - if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(ctx, load->src.var)) + if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) { struct hlsl_constant_value value;
@@ -4839,7 +5225,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx,
instr.opcode = VKD3D_SM4_OP_MOVC;
- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); + sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask);
memset(&value, 0xff, sizeof(value)); sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].writemask); @@ -4851,33 +5237,31 @@ static void write_sm4_load(struct hlsl_ctx *ctx, { instr.opcode = VKD3D_SM4_OP_MOV;
- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); + sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); instr.src_count = 1; }
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_loop(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) +static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop) { struct sm4_instruction instr = { .opcode = VKD3D_SM4_OP_LOOP, };
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr);
- write_sm4_block(ctx, buffer, &loop->body); + write_sm4_block(tpf, &loop->body);
instr.opcode = VKD3D_SM4_OP_ENDLOOP; - write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_deref *sampler, - const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) +static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_deref *sampler, const struct hlsl_ir_node *coords, + unsigned int swizzle, const struct hlsl_ir_node *texel_offset) { struct sm4_src_register *src; struct sm4_instruction instr; @@ -4895,9 +5279,9 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer { if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) { - if (ctx->profile->major_version < 5) + if (tpf->ctx->profile->major_version < 5) { - hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); return; } @@ -4906,58 +5290,39 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer } }
- sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); + sm4_src_from_deref(tpf->ctx, &instr.srcs[instr.src_count++], resource, instr.dsts[0].writemask);
src = &instr.srcs[instr.src_count++]; - sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(tpf->ctx, src, sampler, VKD3DSP_WRITEMASK_ALL); src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; src->swizzle = swizzle;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_resource_load(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) +static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) { - const struct hlsl_type *resource_type = load->resource.var->data_type; const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *sample_index = load->sample_index.node; const struct hlsl_ir_node *coords = load->coords.node;
- if (!hlsl_type_is_resource(resource_type)) + if (load->sampler.var && !load->sampler.var->is_uniform) { - hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); + hlsl_fixme(tpf->ctx, &load->node.loc, "Sample using non-uniform sampler variable."); return; }
- if (load->sampler.var) - { - const struct hlsl_type *sampler_type = load->sampler.var->data_type; - - if (!hlsl_type_is_resource(sampler_type)) - { - hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); - return; - } - - if (!load->sampler.var->is_uniform) - { - hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); - return; - } - } - if (!load->resource.var->is_uniform) { - hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); + hlsl_fixme(tpf->ctx, &load->node.loc, "Load from non-uniform resource variable."); return; }
switch (load->load_type) { case HLSL_RESOURCE_LOAD: - write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, + write_sm4_ld(tpf, &load->node, &load->resource, coords, sample_index, texel_offset, load->sampling_dim); break;
@@ -4967,64 +5332,61 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, case HLSL_RESOURCE_SAMPLE_LOD: case HLSL_RESOURCE_SAMPLE_LOD_BIAS: case HLSL_RESOURCE_SAMPLE_GRAD: - if (!load->sampler.var) - { - hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); - return; - } - write_sm4_sample(ctx, buffer, load); + /* Combined sample expressions were lowered. */ + assert(load->sampler.var); + write_sm4_sample(tpf, load); break;
case HLSL_RESOURCE_GATHER_RED: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); + write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(X, X, X, X), texel_offset); break;
case HLSL_RESOURCE_GATHER_GREEN: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); + write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); break;
case HLSL_RESOURCE_GATHER_BLUE: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); + write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); break;
case HLSL_RESOURCE_GATHER_ALPHA: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); + write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(W, W, W, W), texel_offset); + break; + + case HLSL_RESOURCE_SAMPLE_INFO: + write_sm4_sampleinfo(tpf, load); + break; + + case HLSL_RESOURCE_RESINFO: + write_sm4_resinfo(tpf, load); break; } }
-static void write_sm4_resource_store(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) +static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store) { - const struct hlsl_type *resource_type = store->resource.var->data_type; - - if (!hlsl_type_is_resource(resource_type)) - { - hlsl_fixme(ctx, &store->node.loc, "Separate object fields as new variables."); - return; - } + struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource);
if (!store->resource.var->is_uniform) { - hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); + hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); return; }
if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) { - hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.\n"); + hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); return; }
- write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); + write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); }
-static void write_sm4_store(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) +static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store) { const struct hlsl_ir_node *rhs = store->rhs.node; struct sm4_instruction instr; @@ -5033,18 +5395,17 @@ static void write_sm4_store(struct hlsl_ctx *ctx, memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM4_OP_MOV;
- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); + sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs); instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); instr.dst_count = 1;
sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); instr.src_count = 1;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_swizzle(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) +static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle) { struct sm4_instruction instr; unsigned int writemask; @@ -5060,11 +5421,10 @@ static void write_sm4_swizzle(struct hlsl_ctx *ctx, swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); instr.src_count = 1;
- write_sm4_instruction(buffer, &instr); + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_block *block) +static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block) { const struct hlsl_ir_node *instr;
@@ -5074,12 +5434,12 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * { if (instr->data_type->class == HLSL_CLASS_MATRIX) { - hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); + hlsl_fixme(tpf->ctx, &instr->loc, "Matrix operations need to be lowered."); break; } else if (instr->data_type->class == HLSL_CLASS_OBJECT) { - hlsl_fixme(ctx, &instr->loc, "Object copy."); + hlsl_fixme(tpf->ctx, &instr->loc, "Object copy."); break; }
@@ -5099,43 +5459,43 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * vkd3d_unreachable();
case HLSL_IR_EXPR: - write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); + write_sm4_expr(tpf, hlsl_ir_expr(instr)); break;
case HLSL_IR_IF: - write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); + write_sm4_if(tpf, hlsl_ir_if(instr)); break;
case HLSL_IR_JUMP: - write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); + write_sm4_jump(tpf, hlsl_ir_jump(instr)); break;
case HLSL_IR_LOAD: - write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); + write_sm4_load(tpf, hlsl_ir_load(instr)); break;
case HLSL_IR_RESOURCE_LOAD: - write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); + write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); break;
case HLSL_IR_RESOURCE_STORE: - write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); + write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); break;
case HLSL_IR_LOOP: - write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); + write_sm4_loop(tpf, hlsl_ir_loop(instr)); break;
case HLSL_IR_STORE: - write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); + write_sm4_store(tpf, hlsl_ir_store(instr)); break;
case HLSL_IR_SWIZZLE: - write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); + write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); break;
default: - hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); } } } @@ -5144,12 +5504,13 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) { const struct hlsl_profile_info *profile = ctx->profile; - const struct hlsl_ir_var **extern_resources; struct vkd3d_bytecode_buffer buffer = {0}; + struct extern_resource *extern_resources; unsigned int extern_resources_count, i; const struct hlsl_buffer *cbuffer; const struct hlsl_ir_var *var; size_t token_count_position; + struct tpf_writer tpf;
static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = { @@ -5164,6 +5525,8 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, VKD3D_SM4_LIB, };
+ tpf_writer_init(&tpf, ctx, &buffer); + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); @@ -5172,45 +5535,42 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { if (cbuffer->reg.allocated) - write_sm4_dcl_constant_buffer(&buffer, cbuffer); + write_sm4_dcl_constant_buffer(&tpf, cbuffer); }
for (i = 0; i < extern_resources_count; ++i) { - enum hlsl_regset regset; - - var = extern_resources[i]; - regset = hlsl_type_get_regset(var->data_type); + const struct extern_resource *resource = &extern_resources[i];
- if (regset == HLSL_REGSET_SAMPLERS) - write_sm4_dcl_samplers(&buffer, var); - else if (regset == HLSL_REGSET_TEXTURES) - write_sm4_dcl_textures(ctx, &buffer, var, false); - else if (regset == HLSL_REGSET_UAVS) - write_sm4_dcl_textures(ctx, &buffer, var, true); + if (resource->regset == HLSL_REGSET_SAMPLERS) + write_sm4_dcl_samplers(&tpf, resource); + else if (resource->regset == HLSL_REGSET_TEXTURES) + write_sm4_dcl_textures(&tpf, resource, false); + else if (resource->regset == HLSL_REGSET_UAVS) + write_sm4_dcl_textures(&tpf, resource, true); }
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) - write_sm4_dcl_semantic(ctx, &buffer, var); + write_sm4_dcl_semantic(&tpf, var); }
if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) - write_sm4_dcl_thread_group(&buffer, ctx->thread_count); + write_sm4_dcl_thread_group(&tpf, ctx->thread_count);
if (ctx->temp_count) - write_sm4_dcl_temps(&buffer, ctx->temp_count); + write_sm4_dcl_temps(&tpf, ctx->temp_count);
- write_sm4_block(ctx, &buffer, &entry_func->body); + write_sm4_block(&tpf, &entry_func->body);
- write_sm4_ret(&buffer); + write_sm4_ret(&tpf);
set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t));
- add_section(dxbc, TAG_SHDR, &buffer); + add_section(ctx, dxbc, TAG_SHDR, &buffer);
- vkd3d_free(extern_resources); + sm4_free_extern_resources(extern_resources, extern_resources_count); }
int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index b8f36df07f1..3ad8ba82020 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -413,6 +413,8 @@ static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type t return "hlsl"; case VKD3D_SHADER_SOURCE_D3D_BYTECODE: return "d3dbc"; + case VKD3D_SHADER_SOURCE_DXBC_DXIL: + return "dxil"; default: FIXME("Unhandled source type %#x.\n", type); return "bin"; @@ -438,6 +440,18 @@ void vkd3d_shader_dump_shader(enum vkd3d_shader_source_type source_type, shader_get_source_type_suffix(source_type), shader->code, shader->size); }
+static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) +{ + struct vkd3d_shader_scan_signature_info *signature_info; + + if ((signature_info = vkd3d_find_struct(info->next, SCAN_SIGNATURE_INFO))) + { + memset(&signature_info->input, 0, sizeof(signature_info->input)); + memset(&signature_info->output, 0, sizeof(signature_info->output)); + memset(&signature_info->patch_constant, 0, sizeof(signature_info->patch_constant)); + } +} + bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vkd3d_shader_message_context *message_context, const char *source_name, const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, @@ -524,9 +538,46 @@ void vkd3d_shader_free_messages(char *messages) vkd3d_free(messages); }
+static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, + const struct shader_signature *src) +{ + unsigned int i; + + signature->element_count = src->element_count; + if (!src->elements) + { + assert(!signature->element_count); + signature->elements = NULL; + return true; + } + + if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) + return false; + + for (i = 0; i < signature->element_count; ++i) + { + struct vkd3d_shader_signature_element *d = &signature->elements[i]; + struct signature_element *e = &src->elements[i]; + + d->semantic_name = e->semantic_name; + d->semantic_index = e->semantic_index; + d->stream_index = e->stream_index; + d->sysval_semantic = e->sysval_semantic; + d->component_type = e->component_type; + d->register_index = e->register_index; + if (e->register_count > 1) + FIXME("Arrayed elements are not supported yet.\n"); + d->mask = e->mask; + d->used_mask = e->used_mask; + d->min_precision = e->min_precision; + } + + return true; +} + struct vkd3d_shader_scan_context { - struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; + struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; size_t descriptors_size;
struct vkd3d_shader_message_context *message_context; @@ -546,20 +597,12 @@ struct vkd3d_shader_scan_context size_t cf_info_size; size_t cf_info_count;
- struct - { - unsigned int id; - unsigned int descriptor_idx; - } *uav_ranges; - size_t uav_ranges_size; - size_t uav_range_count; - enum vkd3d_shader_api_version api_version; };
static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, + struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, struct vkd3d_shader_message_context *message_context) { unsigned int i; @@ -582,7 +625,6 @@ static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *con
static void vkd3d_shader_scan_context_cleanup(struct vkd3d_shader_scan_context *context) { - vkd3d_free(context->uav_ranges); vkd3d_free(context->cf_info); }
@@ -650,18 +692,23 @@ static struct vkd3d_shader_cf_info *vkd3d_shader_scan_find_innermost_loop_cf_inf return NULL; }
-static struct vkd3d_shader_descriptor_info *vkd3d_shader_scan_get_uav_descriptor_info( - const struct vkd3d_shader_scan_context *context, unsigned int range_id) +static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_register *reg, uint32_t flag) { + unsigned int range_id = reg->idx[0].offset; unsigned int i;
- for (i = 0; i < context->uav_range_count; ++i) + if (!context->scan_descriptor_info) + return; + + for (i = 0; i < context->scan_descriptor_info->descriptor_count; ++i) { - if (context->uav_ranges[i].id == range_id) - return &context->scan_descriptor_info->descriptors[context->uav_ranges[i].descriptor_idx]; + if (context->scan_descriptor_info->descriptors[i].register_id == range_id) + { + context->scan_descriptor_info->descriptors[i].flags |= flag; + break; + } } - - return NULL; }
static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) @@ -677,13 +724,7 @@ static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instr static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_register *reg) { - struct vkd3d_shader_descriptor_info *d; - - if (!context->scan_descriptor_info) - return; - - d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); - d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ; + vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); }
static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) @@ -696,13 +737,7 @@ static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_in static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_register *reg) { - struct vkd3d_shader_descriptor_info *d; - - if (!context->scan_descriptor_info) - return; - - d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); - d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER; + vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER); }
static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_instruction *instruction) @@ -715,93 +750,76 @@ static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_ static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_register *reg) { - struct vkd3d_shader_descriptor_info *d; - - if (!context->scan_descriptor_info) - return; - - d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); - d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS; + vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS); }
-static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, - enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register_range *range, - enum vkd3d_shader_resource_type resource_type, enum vkd3d_shader_resource_data_type resource_data_type, - unsigned int flags) +static struct vkd3d_shader_descriptor_info1 *vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, + enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register *reg, + const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, + enum vkd3d_shader_resource_data_type resource_data_type) { - struct vkd3d_shader_scan_descriptor_info *info = context->scan_descriptor_info; - struct vkd3d_shader_descriptor_info *d; + struct vkd3d_shader_scan_descriptor_info1 *info = context->scan_descriptor_info; + struct vkd3d_shader_descriptor_info1 *d;
if (!vkd3d_array_reserve((void **)&info->descriptors, &context->descriptors_size, info->descriptor_count + 1, sizeof(*info->descriptors))) { ERR("Failed to allocate descriptor info.\n"); - return false; + return NULL; }
d = &info->descriptors[info->descriptor_count]; + memset(d, 0, sizeof(*d)); d->type = type; + d->register_id = reg->idx[0].offset; d->register_space = range->space; d->register_index = range->first; d->resource_type = resource_type; d->resource_data_type = resource_data_type; - d->flags = flags; d->count = (range->last == ~0u) ? ~0u : range->last - range->first + 1; ++info->descriptor_count;
- return true; -} - -static bool vkd3d_shader_scan_add_uav_range(struct vkd3d_shader_scan_context *context, - unsigned int id, unsigned int descriptor_idx) -{ - if (!vkd3d_array_reserve((void **)&context->uav_ranges, &context->uav_ranges_size, - context->uav_range_count + 1, sizeof(*context->uav_ranges))) - { - ERR("Failed to allocate UAV range.\n"); - return false; - } - - context->uav_ranges[context->uav_range_count].id = id; - context->uav_ranges[context->uav_range_count].descriptor_idx = descriptor_idx; - ++context->uav_range_count; - - return true; + return d; }
static void vkd3d_shader_scan_constant_buffer_declaration(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; + struct vkd3d_shader_descriptor_info1 *d;
if (!context->scan_descriptor_info) return;
- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, &cb->range, - VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0); + if (!(d = vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, + &cb->src.reg, &cb->range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) + return; + d->buffer_size = cb->size * 16; }
static void vkd3d_shader_scan_sampler_declaration(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_sampler *sampler = &instruction->declaration.sampler; - unsigned int flags; + struct vkd3d_shader_descriptor_info1 *d;
if (!context->scan_descriptor_info) return;
+ if (!(d = vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, + &sampler->src.reg, &sampler->range, VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT))) + return; + if (instruction->flags & VKD3DSI_SAMPLER_COMPARISON_MODE) - flags = VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; - else - flags = 0; - vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler->range, - VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT, flags); + d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; }
static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_context *context, const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type, - enum vkd3d_shader_resource_data_type resource_data_type) + enum vkd3d_shader_resource_data_type resource_data_type, + unsigned int sample_count, unsigned int structure_stride, bool raw) { + struct vkd3d_shader_descriptor_info1 *d; enum vkd3d_shader_descriptor_type type;
if (!context->scan_descriptor_info) @@ -811,10 +829,13 @@ static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_cont type = VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; else type = VKD3D_SHADER_DESCRIPTOR_TYPE_SRV; - vkd3d_shader_scan_add_descriptor(context, type, &resource->range, resource_type, resource_data_type, 0); - if (type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) - vkd3d_shader_scan_add_uav_range(context, resource->reg.reg.idx[0].offset, - context->scan_descriptor_info->descriptor_count - 1); + if (!(d = vkd3d_shader_scan_add_descriptor(context, type, &resource->reg.reg, + &resource->range, resource_type, resource_data_type))) + return; + d->sample_count = sample_count; + d->structure_stride = structure_stride; + if (raw) + d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER; }
static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_scan_context *context, @@ -873,7 +894,7 @@ static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_sca }
vkd3d_shader_scan_resource_declaration(context, &semantic->resource, - semantic->resource_type, resource_data_type); + semantic->resource_type, resource_data_type, semantic->sample_count, 0, false); }
static void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, @@ -907,12 +928,13 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte case VKD3DSIH_DCL_RESOURCE_RAW: case VKD3DSIH_DCL_UAV_RAW: vkd3d_shader_scan_resource_declaration(context, &instruction->declaration.raw_resource.resource, - VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT); + VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, 0, true); break; case VKD3DSIH_DCL_RESOURCE_STRUCTURED: case VKD3DSIH_DCL_UAV_STRUCTURED: vkd3d_shader_scan_resource_declaration(context, &instruction->declaration.structured_resource.resource, - VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT); + VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, + instruction->declaration.structured_resource.byte_stride, false); break; case VKD3DSIH_IF: cf_info = vkd3d_shader_scan_push_cf_info(context); @@ -1064,22 +1086,64 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte return VKD3D_OK; }
+static enum vkd3d_result convert_descriptor_info(struct vkd3d_shader_scan_descriptor_info *info, + const struct vkd3d_shader_scan_descriptor_info1 *info1) +{ + unsigned int i; + + if (!(info->descriptors = vkd3d_calloc(info1->descriptor_count, sizeof(*info->descriptors)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < info1->descriptor_count; ++i) + { + const struct vkd3d_shader_descriptor_info1 *src = &info1->descriptors[i]; + struct vkd3d_shader_descriptor_info *dst = &info->descriptors[i]; + + dst->type = src->type; + dst->register_space = src->register_space; + dst->register_index = src->register_index; + dst->resource_type = src->resource_type; + dst->resource_data_type = src->resource_data_type; + dst->flags = src->flags; + dst->count = src->count; + } + info->descriptor_count = info1->descriptor_count; + + return VKD3D_OK; +} + +static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info) +{ + TRACE("scan_descriptor_info %p.\n", scan_descriptor_info); + + vkd3d_free(scan_descriptor_info->descriptors); +} + static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser *parser) + struct vkd3d_shader_message_context *message_context, + struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1, struct vkd3d_shader_parser *parser) { - struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; + struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; + struct vkd3d_shader_scan_descriptor_info *descriptor_info; + struct vkd3d_shader_scan_signature_info *signature_info; struct vkd3d_shader_instruction *instruction; struct vkd3d_shader_scan_context context; int ret = VKD3D_OK; unsigned int i;
- if ((scan_descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO))) + descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO); + if (descriptor_info1) + { + descriptor_info1->descriptors = NULL; + descriptor_info1->descriptor_count = 0; + } + else if (descriptor_info) { - scan_descriptor_info->descriptors = NULL; - scan_descriptor_info->descriptor_count = 0; + descriptor_info1 = &local_descriptor_info1; } + signature_info = vkd3d_find_struct(compile_info->next, SCAN_SIGNATURE_INFO);
- vkd3d_shader_scan_context_init(&context, compile_info, scan_descriptor_info, message_context); + vkd3d_shader_scan_context_init(&context, compile_info, descriptor_info1, message_context);
if (TRACE_ON()) { @@ -1090,13 +1154,52 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info { instruction = &parser->instructions.elements[i]; if ((ret = vkd3d_shader_scan_instruction(&context, instruction)) < 0) - { - if (scan_descriptor_info) - vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); break; + } + + for (i = 0; i < ARRAY_SIZE(parser->shader_desc.flat_constant_count); ++i) + { + unsigned int size = parser->shader_desc.flat_constant_count[i].external; + struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; + struct vkd3d_shader_register reg = {.idx[0].offset = i, .idx_count = 1}; + struct vkd3d_shader_descriptor_info1 *d; + + if (parser->shader_desc.flat_constant_count[i].external) + { + if ((d = vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ®, + &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) + d->buffer_size = size * 16; } }
+ if (!ret && signature_info) + { + if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) + || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, + &parser->shader_desc.output_signature) + || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, + &parser->shader_desc.patch_constant_signature)) + { + ret = VKD3D_ERROR_OUT_OF_MEMORY; + } + } + + if (!ret && descriptor_info) + ret = convert_descriptor_info(descriptor_info, descriptor_info1); + + if (ret < 0) + { + if (descriptor_info) + vkd3d_shader_free_scan_descriptor_info(descriptor_info); + if (descriptor_info1) + vkd3d_shader_free_scan_descriptor_info1(descriptor_info1); + if (signature_info) + vkd3d_shader_free_scan_signature_info(signature_info); + } + else + { + vkd3d_shader_free_scan_descriptor_info1(&local_descriptor_info1); + } vkd3d_shader_scan_context_cleanup(&context); return ret; } @@ -1113,7 +1216,7 @@ static int scan_dxbc(const struct vkd3d_shader_compile_info *compile_info, return ret; }
- ret = scan_with_parser(compile_info, message_context, parser); + ret = scan_with_parser(compile_info, message_context, NULL, parser); vkd3d_shader_parser_destroy(parser);
return ret; @@ -1131,7 +1234,25 @@ static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, return ret; }
- ret = scan_with_parser(compile_info, message_context, parser); + ret = scan_with_parser(compile_info, message_context, NULL, parser); + vkd3d_shader_parser_destroy(parser); + + return ret; +} + +static int scan_dxil(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_parser *parser; + int ret; + + if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) + { + WARN("Failed to initialise shader parser.\n"); + return ret; + } + + ret = scan_with_parser(compile_info, message_context, NULL, parser); vkd3d_shader_parser_destroy(parser);
return ret; @@ -1150,6 +1271,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char if ((ret = vkd3d_shader_validate_compile_info(compile_info, false)) < 0) return ret;
+ init_scan_signature_info(compile_info); + vkd3d_shader_message_context_init(&message_context, compile_info->log_level);
switch (compile_info->source_type) @@ -1167,6 +1290,10 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char ret = scan_d3dbc(compile_info, &message_context); break;
+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: + ret = scan_dxil(compile_info, &message_context); + break; + default: ERR("Unsupported source type %#x.\n", compile_info->source_type); ret = VKD3D_ERROR_INVALID_ARGUMENT; @@ -1184,7 +1311,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { - struct vkd3d_shader_scan_descriptor_info scan_descriptor_info; + struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; struct vkd3d_glsl_generator *glsl_generator; struct vkd3d_shader_compile_info scan_info; int ret; @@ -1192,11 +1319,8 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source);
scan_info = *compile_info; - scan_descriptor_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; - scan_descriptor_info.next = scan_info.next; - scan_info.next = &scan_descriptor_info;
- if ((ret = scan_with_parser(&scan_info, message_context, parser)) < 0) + if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) return ret;
switch (compile_info->target_type) @@ -1210,7 +1334,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, message_context, &parser->location))) { ERR("Failed to create GLSL generator.\n"); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); return VKD3D_ERROR; }
@@ -1228,7 +1352,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, assert(0); }
- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); return ret; }
@@ -1289,6 +1413,24 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_ return VKD3D_ERROR; }
+static int compile_dxbc_dxil(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_parser *parser; + int ret; + + if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) + { + WARN("Failed to initialise shader parser.\n"); + return ret; + } + + ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); + + vkd3d_shader_parser_destroy(parser); + return ret; +} + int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, char **messages) { @@ -1303,6 +1445,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, if ((ret = vkd3d_shader_validate_compile_info(compile_info, true)) < 0) return ret;
+ init_scan_signature_info(compile_info); + vkd3d_shader_message_context_init(&message_context, compile_info->log_level);
switch (compile_info->source_type) @@ -1319,6 +1463,10 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, ret = compile_d3d_bytecode(compile_info, out, &message_context); break;
+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: + ret = compile_dxbc_dxil(compile_info, out, &message_context); + break; + default: vkd3d_unreachable(); } @@ -1337,6 +1485,15 @@ void vkd3d_shader_free_scan_descriptor_info(struct vkd3d_shader_scan_descriptor_ vkd3d_free(scan_descriptor_info->descriptors); }
+void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info) +{ + TRACE("info %p.\n", info); + + vkd3d_shader_free_shader_signature(&info->input); + vkd3d_shader_free_shader_signature(&info->output); + vkd3d_shader_free_shader_signature(&info->patch_constant); +} + void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *shader_code) { TRACE("shader_code %p.\n", shader_code); @@ -1399,43 +1556,6 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu desc->version = 0; }
-static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, - const struct shader_signature *src) -{ - unsigned int i; - - signature->element_count = src->element_count; - if (!src->elements) - { - assert(!signature->element_count); - signature->elements = NULL; - return true; - } - - if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) - return false; - - for (i = 0; i < signature->element_count; ++i) - { - struct vkd3d_shader_signature_element *d = &signature->elements[i]; - struct signature_element *e = &src->elements[i]; - - d->semantic_name = e->semantic_name; - d->semantic_index = e->semantic_index; - d->stream_index = e->stream_index; - d->sysval_semantic = e->sysval_semantic; - d->component_type = e->component_type; - d->register_index = e->register_index; - if (e->register_count > 1) - FIXME("Arrayed elements are not supported yet.\n"); - d->mask = e->mask; - d->used_mask = e->used_mask; - d->min_precision = e->min_precision; - } - - return true; -} - void shader_signature_cleanup(struct shader_signature *signature) { vkd3d_free(signature->elements); @@ -1524,6 +1644,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns VKD3D_SHADER_SOURCE_DXBC_TPF, VKD3D_SHADER_SOURCE_HLSL, VKD3D_SHADER_SOURCE_D3D_BYTECODE, + VKD3D_SHADER_SOURCE_DXBC_DXIL, };
TRACE("count %p.\n", count); @@ -1562,6 +1683,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types(
switch (source_type) { + case VKD3D_SHADER_SOURCE_DXBC_DXIL: case VKD3D_SHADER_SOURCE_DXBC_TPF: *count = ARRAY_SIZE(dxbc_tpf_types); return dxbc_tpf_types; @@ -1790,3 +1912,41 @@ void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *ins vkd3d_free(instructions->icbs[i]); vkd3d_free(instructions->icbs); } + +void vkd3d_shader_build_varying_map(const struct vkd3d_shader_signature *output_signature, + const struct vkd3d_shader_signature *input_signature, + unsigned int *ret_count, struct vkd3d_shader_varying_map *varyings) +{ + unsigned int count = 0; + unsigned int i; + + TRACE("output_signature %p, input_signature %p, ret_count %p, varyings %p.\n", + output_signature, input_signature, ret_count, varyings); + + for (i = 0; i < input_signature->element_count; ++i) + { + const struct vkd3d_shader_signature_element *input_element, *output_element; + + input_element = &input_signature->elements[i]; + + if (input_element->sysval_semantic != VKD3D_SHADER_SV_NONE) + continue; + + varyings[count].input_register_index = input_element->register_index; + varyings[count].input_mask = input_element->mask; + + if ((output_element = vkd3d_shader_find_signature_element(output_signature, + input_element->semantic_name, input_element->semantic_index, 0))) + { + varyings[count].output_signature_index = output_element - output_signature->elements; + } + else + { + varyings[count].output_signature_index = output_signature->element_count; + } + + ++count; + } + + *ret_count = count; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index d77c5393940..4a2f6b96b82 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -78,9 +78,14 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS = 1004, VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER = 1005, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL = 1006, + VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE = 1007, + VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION = 1008, + VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE = 1009,
VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS = 1300, VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK = 1301, + VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK = 1302, + VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_SWIZZLE = 1303,
VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_BINDING_NOT_FOUND = 2000, VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE = 2001, @@ -88,6 +93,8 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_IDX_UNSUPPORTED = 2003, VKD3D_SHADER_ERROR_SPV_STENCIL_EXPORT_UNSUPPORTED = 2004,
+ VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE = 2300, + VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY = 3000, VKD3D_SHADER_ERROR_RS_INVALID_VERSION = 3001, VKD3D_SHADER_ERROR_RS_INVALID_ROOT_PARAMETER_TYPE = 3002, @@ -133,10 +140,14 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026, + VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT = 5027,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE = 5302, + VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, + VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, + VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE = 5305,
VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000,
@@ -145,8 +156,31 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE = 7002, VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE = 7003, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY = 7004, + VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX = 7005, + VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC = 7006,
VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS= 7300, + + VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY = 8000, + VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE = 8001, + VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET = 8002, + VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE = 8003, + VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE = 8004, + VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT = 8005, + VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_TABLE = 8006, + VKD3D_SHADER_ERROR_DXIL_INVALID_VALUE_SYMTAB = 8007, + VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT = 8008, + VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL = 8009, + VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID = 8010, + VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE = 8011, + + VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER = 8300, + VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE = 8301, + VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH = 8302, + VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH = 8303, + VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS = 8304, + + VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED = 9000, };
enum vkd3d_shader_opcode @@ -486,6 +520,9 @@ enum vkd3d_shader_register_type VKD3DSPR_DEPTHOUTLE, VKD3DSPR_RASTERIZER, VKD3DSPR_OUTSTENCILREF, + VKD3DSPR_UNDEF, + + VKD3DSPR_COUNT,
VKD3DSPR_INVALID = ~0u, }; @@ -516,6 +553,7 @@ enum vkd3d_data_type VKD3D_DATA_DOUBLE, VKD3D_DATA_CONTINUED, VKD3D_DATA_UNUSED, + VKD3D_DATA_UINT8, };
enum vkd3d_immconst_type @@ -784,6 +822,8 @@ enum vkd3d_shader_input_sysval_semantic VKD3D_SIV_LINE_DENSITY_TESS_FACTOR = 22, };
+#define SIGNATURE_TARGET_LOCATION_UNUSED (~0u) + struct signature_element { unsigned int sort_index; @@ -792,16 +832,21 @@ struct signature_element unsigned int stream_index; enum vkd3d_shader_sysval_semantic sysval_semantic; enum vkd3d_shader_component_type component_type; + /* Register index in the source shader. */ unsigned int register_index; unsigned int register_count; unsigned int mask; unsigned int used_mask; enum vkd3d_shader_minimum_precision min_precision; + /* Register index / location in the target shader. + * If SIGNATURE_TARGET_LOCATION_UNUSED, this element should not be written. */ + unsigned int target_location; };
struct shader_signature { struct signature_element *elements; + size_t elements_capacity; unsigned int element_count; };
@@ -811,9 +856,17 @@ struct vkd3d_shader_desc { const uint32_t *byte_code; size_t byte_code_size; + bool is_dxil; struct shader_signature input_signature; struct shader_signature output_signature; struct shader_signature patch_constant_signature; + + uint32_t temp_count; + + struct + { + uint32_t used, external; + } flat_constant_count[3]; };
struct vkd3d_shader_register_semantic @@ -945,6 +998,8 @@ struct vkd3d_shader_instruction } declaration; };
+void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx); + static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) { return ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w; @@ -965,6 +1020,11 @@ static inline bool vkd3d_shader_register_is_patch_constant(const struct vkd3d_sh return reg->type == VKD3DSPR_PATCHCONST; }
+static inline bool register_is_constant(const struct vkd3d_shader_register *reg) +{ + return (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_IMMCONST64); +} + struct vkd3d_shader_location { const char *source_name; @@ -1066,6 +1126,27 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse parser->ops->parser_destroy(parser); }
+struct vkd3d_shader_descriptor_info1 +{ + enum vkd3d_shader_descriptor_type type; + unsigned int register_space; + unsigned int register_index; + unsigned int register_id; + enum vkd3d_shader_resource_type resource_type; + enum vkd3d_shader_resource_data_type resource_data_type; + unsigned int flags; + unsigned int sample_count; + unsigned int buffer_size; + unsigned int structure_stride; + unsigned int count; +}; + +struct vkd3d_shader_scan_descriptor_info1 +{ + struct vkd3d_shader_descriptor_info1 *descriptors; + unsigned int descriptor_count; +}; + void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_version *shader_version);
@@ -1167,6 +1248,8 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); +int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser);
void free_shader_desc(struct vkd3d_shader_desc *desc);
@@ -1186,7 +1269,7 @@ void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); #define SPIRV_MAX_SRC_COUNT 6
int spirv_compile(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
@@ -1240,6 +1323,30 @@ static inline enum vkd3d_data_type vkd3d_data_type_from_component_type( } }
+static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resource_data_type( + enum vkd3d_shader_resource_data_type data_type) +{ + switch (data_type) + { + case VKD3D_SHADER_RESOURCE_DATA_FLOAT: + case VKD3D_SHADER_RESOURCE_DATA_UNORM: + case VKD3D_SHADER_RESOURCE_DATA_SNORM: + return VKD3D_SHADER_COMPONENT_FLOAT; + case VKD3D_SHADER_RESOURCE_DATA_UINT: + return VKD3D_SHADER_COMPONENT_UINT; + case VKD3D_SHADER_RESOURCE_DATA_INT: + return VKD3D_SHADER_COMPONENT_INT; + case VKD3D_SHADER_RESOURCE_DATA_DOUBLE: + case VKD3D_SHADER_RESOURCE_DATA_CONTINUED: + return VKD3D_SHADER_COMPONENT_DOUBLE; + default: + FIXME("Unhandled data type %#x.\n", data_type); + /* fall-through */ + case VKD3D_SHADER_RESOURCE_DATA_MIXED: + return VKD3D_SHADER_COMPONENT_UINT; + } +} + enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, unsigned int index);
@@ -1339,6 +1446,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, }
#define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) +#define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t)
#define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') @@ -1369,11 +1477,7 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void void dxbc_writer_init(struct dxbc_writer *dxbc); int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code);
-enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *instructions); -enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( - struct vkd3d_shader_instruction_array *instructions, const struct shader_signature *input_signature); -enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, - enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, - struct shader_signature *output_signature, struct shader_signature *patch_constant_signature); +enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info);
#endif /* __VKD3D_SHADER_PRIVATE_H */ diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index 32439eec7eb..42a98763438 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -26,6 +26,7 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t timeline_value); static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, struct d3d12_fence *fence, uint64_t value); +static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue); static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any); static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any);
@@ -453,9 +454,9 @@ static const struct d3d12_root_parameter *root_signature_get_root_descriptor( }
/* ID3D12Fence */ -static struct d3d12_fence *impl_from_ID3D12Fence(ID3D12Fence *iface) +static struct d3d12_fence *impl_from_ID3D12Fence1(ID3D12Fence1 *iface) { - return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence_iface); + return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence1_iface); }
static VkResult d3d12_fence_create_vk_fence(struct d3d12_fence *fence, VkFence *vk_fence) @@ -899,18 +900,19 @@ static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uin vkd3d_mutex_unlock(&fence->mutex); }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, +static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence1 *iface, REFIID riid, void **object) { TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object);
- if (IsEqualGUID(riid, &IID_ID3D12Fence) + if (IsEqualGUID(riid, &IID_ID3D12Fence1) + || IsEqualGUID(riid, &IID_ID3D12Fence) || IsEqualGUID(riid, &IID_ID3D12Pageable) || IsEqualGUID(riid, &IID_ID3D12DeviceChild) || IsEqualGUID(riid, &IID_ID3D12Object) || IsEqualGUID(riid, &IID_IUnknown)) { - ID3D12Fence_AddRef(iface); + ID3D12Fence1_AddRef(iface); *object = iface; return S_OK; } @@ -921,9 +923,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, return E_NOINTERFACE; }
-static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(ID3D12Fence *iface) +static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(ID3D12Fence1 *iface) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); ULONG refcount = InterlockedIncrement(&fence->refcount);
TRACE("%p increasing refcount to %u.\n", fence, refcount); @@ -936,9 +938,9 @@ static void d3d12_fence_incref(struct d3d12_fence *fence) InterlockedIncrement(&fence->internal_refcount); }
-static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence *iface) +static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence1 *iface) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); ULONG refcount = InterlockedDecrement(&fence->refcount);
TRACE("%p decreasing refcount to %u.\n", fence, refcount); @@ -971,10 +973,10 @@ static void d3d12_fence_decref(struct d3d12_fence *fence) } }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence *iface, +static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence1 *iface, REFGUID guid, UINT *data_size, void *data) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);
TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); @@ -982,10 +984,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence *iface, return vkd3d_get_private_data(&fence->private_store, guid, data_size, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence *iface, +static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence1 *iface, REFGUID guid, UINT data_size, const void *data) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);
TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); @@ -993,37 +995,37 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence *iface, return vkd3d_set_private_data(&fence->private_store, guid, data_size, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(ID3D12Fence *iface, +static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(ID3D12Fence1 *iface, REFGUID guid, const IUnknown *data) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);
TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);
return vkd3d_set_private_data_interface(&fence->private_store, guid, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(ID3D12Fence *iface, const WCHAR *name) +static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(ID3D12Fence1 *iface, const WCHAR *name) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);
TRACE("iface %p, name %s.\n", iface, debugstr_w(name, fence->device->wchar_size));
return name ? S_OK : E_INVALIDARG; }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(ID3D12Fence *iface, REFIID iid, void **device) +static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(ID3D12Fence1 *iface, REFIID iid, void **device) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);
TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);
return d3d12_device_query_interface(fence->device, iid, device); }
-static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence *iface) +static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence1 *iface) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); uint64_t completed_value;
TRACE("iface %p.\n", iface); @@ -1034,10 +1036,10 @@ static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence *iface return completed_value; }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *iface, +static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence1 *iface, UINT64 value, HANDLE event) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); unsigned int i; bool latch = false;
@@ -1105,9 +1107,9 @@ static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fen return d3d12_device_flush_blocked_queues(fence->device); }
-static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 value) +static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence1 *iface, UINT64 value) { - struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface);
TRACE("iface %p, value %#"PRIx64".\n", iface, value);
@@ -1116,7 +1118,16 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 v return d3d12_fence_signal(fence, value, VK_NULL_HANDLE, true); }
-static const struct ID3D12FenceVtbl d3d12_fence_vtbl = +static D3D12_FENCE_FLAGS STDMETHODCALLTYPE d3d12_fence_GetCreationFlags(ID3D12Fence1 *iface) +{ + struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + + TRACE("iface %p.\n", iface); + + return fence->flags; +} + +static const struct ID3D12Fence1Vtbl d3d12_fence_vtbl = { /* IUnknown methods */ d3d12_fence_QueryInterface, @@ -1133,14 +1144,18 @@ static const struct ID3D12FenceVtbl d3d12_fence_vtbl = d3d12_fence_GetCompletedValue, d3d12_fence_SetEventOnCompletion, d3d12_fence_Signal, + /* ID3D12Fence1 methods */ + d3d12_fence_GetCreationFlags, };
static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) { - if (!iface) + ID3D12Fence1 *iface1; + + if (!(iface1 = (ID3D12Fence1 *)iface)) return NULL; - assert(iface->lpVtbl == &d3d12_fence_vtbl); - return impl_from_ID3D12Fence(iface); + assert(iface1->lpVtbl == &d3d12_fence_vtbl); + return impl_from_ID3D12Fence1(iface1); }
static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device, @@ -1150,7 +1165,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * VkResult vr; HRESULT hr;
- fence->ID3D12Fence_iface.lpVtbl = &d3d12_fence_vtbl; + fence->ID3D12Fence1_iface.lpVtbl = &d3d12_fence_vtbl; fence->internal_refcount = 1; fence->refcount = 1;
@@ -1161,7 +1176,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *
vkd3d_cond_init(&fence->null_event_cond);
- if (flags) + if ((fence->flags = flags)) FIXME("Ignoring flags %#x.\n", flags);
fence->events = NULL; @@ -1315,32 +1330,26 @@ static HRESULT d3d12_command_allocator_allocate_command_buffer(struct d3d12_comm return hr; }
- allocator->current_command_list = list; - - return S_OK; -} - -static void d3d12_command_allocator_free_command_buffer(struct d3d12_command_allocator *allocator, - struct d3d12_command_list *list) -{ - struct d3d12_device *device = allocator->device; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - - TRACE("allocator %p, list %p.\n", allocator, list); - - if (allocator->current_command_list == list) - allocator->current_command_list = NULL; - if (!vkd3d_array_reserve((void **)&allocator->command_buffers, &allocator->command_buffers_size, allocator->command_buffer_count + 1, sizeof(*allocator->command_buffers))) { WARN("Failed to add command buffer.\n"); VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool, 1, &list->vk_command_buffer)); - return; + return E_OUTOFMEMORY; } - allocator->command_buffers[allocator->command_buffer_count++] = list->vk_command_buffer; + + allocator->current_command_list = list; + + return S_OK; +} + +static void d3d12_command_allocator_remove_command_list(struct d3d12_command_allocator *allocator, + const struct d3d12_command_list *list) +{ + if (allocator->current_command_list == list) + allocator->current_command_list = NULL; }
static bool d3d12_command_allocator_add_render_pass(struct d3d12_command_allocator *allocator, VkRenderPass pass) @@ -1910,10 +1919,32 @@ HRESULT d3d12_command_allocator_create(struct d3d12_device *device, return S_OK; }
+static void d3d12_command_signature_incref(struct d3d12_command_signature *signature) +{ + vkd3d_atomic_increment(&signature->internal_refcount); +} + +static void d3d12_command_signature_decref(struct d3d12_command_signature *signature) +{ + unsigned int refcount = vkd3d_atomic_decrement(&signature->internal_refcount); + + if (!refcount) + { + struct d3d12_device *device = signature->device; + + vkd3d_private_store_destroy(&signature->private_store); + + vkd3d_free((void *)signature->desc.pArgumentDescs); + vkd3d_free(signature); + + d3d12_device_release(device); + } +} + /* ID3D12CommandList */ -static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList2(ID3D12GraphicsCommandList2 *iface) +static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList3(ID3D12GraphicsCommandList3 *iface) { - return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); + return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList3_iface); }
static void d3d12_command_list_invalidate_current_framebuffer(struct d3d12_command_list *list) @@ -2259,12 +2290,13 @@ static void d3d12_command_list_track_resource_usage(struct d3d12_command_list *l } }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList2 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList3 *iface, REFIID iid, void **object) { TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object);
- if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) + if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList3) + || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList1) || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList) || IsEqualGUID(iid, &IID_ID3D12CommandList) @@ -2272,7 +2304,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic || IsEqualGUID(iid, &IID_ID3D12Object) || IsEqualGUID(iid, &IID_IUnknown)) { - ID3D12GraphicsCommandList2_AddRef(iface); + ID3D12GraphicsCommandList3_AddRef(iface); *object = iface; return S_OK; } @@ -2283,9 +2315,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic return E_NOINTERFACE; }
-static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList2 *iface) +static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList3 *iface) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); ULONG refcount = InterlockedIncrement(&list->refcount);
TRACE("%p increasing refcount to %u.\n", list, refcount); @@ -2298,9 +2330,9 @@ static void vkd3d_pipeline_bindings_cleanup(struct vkd3d_pipeline_bindings *bind vkd3d_free(bindings->vk_uav_counter_views); }
-static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList2 *iface) +static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList3 *iface) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); ULONG refcount = InterlockedDecrement(&list->refcount);
TRACE("%p decreasing refcount to %u.\n", list, refcount); @@ -2313,7 +2345,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL
/* When command pool is destroyed, all command buffers are implicitly freed. */ if (list->allocator) - d3d12_command_allocator_free_command_buffer(list->allocator, list); + d3d12_command_allocator_remove_command_list(list->allocator, list);
vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_COMPUTE]); vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_GRAPHICS]); @@ -2326,66 +2358,66 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL return refcount; }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList2 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList3 *iface, REFGUID guid, UINT *data_size, void *data) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data);
return vkd3d_get_private_data(&list->private_store, guid, data_size, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList2 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList3 *iface, REFGUID guid, UINT data_size, const void *data) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data);
return vkd3d_set_private_data(&list->private_store, guid, data_size, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList2 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList3 *iface, REFGUID guid, const IUnknown *data) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);
return vkd3d_set_private_data_interface(&list->private_store, guid, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList2 *iface, const WCHAR *name) +static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList3 *iface, const WCHAR *name) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, name %s.\n", iface, debugstr_w(name, list->device->wchar_size));
return name ? S_OK : E_INVALIDARG; }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList2 *iface, REFIID iid, void **device) +static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList3 *iface, REFIID iid, void **device) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);
return d3d12_device_query_interface(list->device, iid, device); }
-static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList2 *iface) +static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList3 *iface) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p.\n", iface);
return list->type; }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList2 *iface) +static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList3 *iface) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs; VkResult vr;
@@ -2411,7 +2443,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL
if (list->allocator) { - d3d12_command_allocator_free_command_buffer(list->allocator, list); + d3d12_command_allocator_remove_command_list(list->allocator, list); list->allocator = NULL; }
@@ -2429,7 +2461,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL static void d3d12_command_list_reset_state(struct d3d12_command_list *list, ID3D12PipelineState *initial_pipeline_state) { - ID3D12GraphicsCommandList2 *iface = &list->ID3D12GraphicsCommandList2_iface; + ID3D12GraphicsCommandList3 *iface = &list->ID3D12GraphicsCommandList3_iface;
memset(list->strides, 0, sizeof(list->strides)); list->primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; @@ -2465,14 +2497,14 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list,
list->descriptor_heap_count = 0;
- ID3D12GraphicsCommandList2_SetPipelineState(iface, initial_pipeline_state); + ID3D12GraphicsCommandList3_SetPipelineState(iface, initial_pipeline_state); }
-static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList2 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList3 *iface, ID3D12CommandAllocator *allocator, ID3D12PipelineState *initial_pipeline_state) { struct d3d12_command_allocator *allocator_impl = unsafe_impl_from_ID3D12CommandAllocator(allocator); - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); HRESULT hr;
TRACE("iface %p, allocator %p, initial_pipeline_state %p.\n", @@ -2499,7 +2531,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandL return hr; }
-static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList3 *iface, ID3D12PipelineState *pipeline_state) { FIXME("iface %p, pipline_state %p stub!\n", iface, pipeline_state); @@ -3185,6 +3217,23 @@ static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) } }
+static void command_list_add_descriptor_heap(struct d3d12_command_list *list, struct d3d12_descriptor_heap *heap) +{ + if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) + { + if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) + { + /* Descriptors can be written after binding. */ + FIXME("Flushing descriptor updates while list %p is not closed.\n", list); + vkd3d_mutex_lock(&heap->vk_sets_mutex); + d3d12_desc_flush_vk_heap_updates_locked(heap, list->device); + vkd3d_mutex_unlock(&heap->vk_sets_mutex); + return; + } + list->descriptor_heaps[list->descriptor_heap_count++] = heap; + } +} + static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap) { @@ -3209,18 +3258,6 @@ static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *l bindings->sampler_heap_id = heap->serial_id; }
- if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) - { - if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) - { - /* Descriptors can be written after binding. */ - FIXME("Flushing descriptor updates while list %p is not closed.\n", list); - command_list_flush_vk_heap_updates(list); - list->descriptor_heap_count = 0; - } - list->descriptor_heaps[list->descriptor_heap_count++] = heap; - } - vkd3d_mutex_lock(&heap->vk_sets_mutex);
for (set = 0; set < ARRAY_SIZE(heap->vk_descriptor_sets); ++set) @@ -3353,11 +3390,11 @@ static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_c } }
-static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList3 *iface, UINT vertex_count_per_instance, UINT instance_count, UINT start_vertex_location, UINT start_instance_location) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs;
TRACE("iface %p, vertex_count_per_instance %u, instance_count %u, " @@ -3377,11 +3414,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCom instance_count, start_vertex_location, start_instance_location)); }
-static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList3 *iface, UINT index_count_per_instance, UINT instance_count, UINT start_vertex_location, INT base_vertex_location, UINT start_instance_location) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs;
TRACE("iface %p, index_count_per_instance %u, instance_count %u, start_vertex_location %u, " @@ -3403,10 +3440,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12Grap instance_count, start_vertex_location, base_vertex_location, start_instance_location)); }
-static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList3 *iface, UINT x, UINT y, UINT z) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs;
TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z); @@ -3422,10 +3459,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandL VK_CALL(vkCmdDispatch(list->vk_command_buffer, x, y, z)); }
-static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *dst, UINT64 dst_offset, ID3D12Resource *src, UINT64 src_offset, UINT64 byte_count) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_resource *dst_resource, *src_resource; const struct vkd3d_vk_device_procs *vk_procs; VkBufferCopy buffer_copy; @@ -3624,7 +3661,7 @@ static HRESULT d3d12_command_list_allocate_transfer_buffer(struct d3d12_command_ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_command_list *list, struct d3d12_resource *dst_resource, unsigned int dst_sub_resource_idx, const struct vkd3d_format *dst_format, struct d3d12_resource *src_resource, - unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format) + unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format, unsigned int layer_count) { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; const D3D12_RESOURCE_DESC *dst_desc = &dst_resource->desc; @@ -3651,6 +3688,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com buffer_image_copy.bufferImageHeight = 0; vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, src_format, src_sub_resource_idx, src_desc->MipLevels); + buffer_image_copy.imageSubresource.layerCount = layer_count; src_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; buffer_image_copy.imageOffset.x = 0; buffer_image_copy.imageOffset.y = 0; @@ -3658,7 +3696,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com vk_extent_3d_from_d3d12_miplevel(&buffer_image_copy.imageExtent, src_desc, src_miplevel_idx);
buffer_size = src_format->byte_count * buffer_image_copy.imageExtent.width * - buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth; + buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth * layer_count; if (FAILED(hr = d3d12_command_list_allocate_transfer_buffer(list, buffer_size, &transfer_buffer))) { ERR("Failed to allocate transfer buffer, hr %#x.\n", hr); @@ -3684,6 +3722,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com
vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, dst_format, dst_sub_resource_idx, dst_desc->MipLevels); + buffer_image_copy.imageSubresource.layerCount = layer_count; dst_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel;
assert(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == @@ -3705,11 +3744,11 @@ static bool validate_d3d12_box(const D3D12_BOX *box) && box->back > box->front; }
-static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList3 *iface, const D3D12_TEXTURE_COPY_LOCATION *dst, UINT dst_x, UINT dst_y, UINT dst_z, const D3D12_TEXTURE_COPY_LOCATION *src, const D3D12_BOX *src_box) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_resource *dst_resource, *src_resource; const struct vkd3d_format *src_format, *dst_format; const struct vkd3d_vk_device_procs *vk_procs; @@ -3813,7 +3852,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic { d3d12_command_list_copy_incompatible_texture_region(list, dst_resource, dst->u.SubresourceIndex, dst_format, - src_resource, src->u.SubresourceIndex, src_format); + src_resource, src->u.SubresourceIndex, src_format, 1); return; }
@@ -3830,11 +3869,12 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic } }
-static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *dst, ID3D12Resource *src) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_format *dst_format, *src_format; const struct vkd3d_vk_device_procs *vk_procs; VkBufferCopy vk_buffer_copy; VkImageCopy vk_image_copy; @@ -3867,16 +3907,29 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm else { layer_count = d3d12_resource_desc_get_layer_count(&dst_resource->desc); + dst_format = dst_resource->format; + src_format = src_resource->format;
assert(d3d12_resource_is_texture(dst_resource)); assert(d3d12_resource_is_texture(src_resource)); assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc));
+ if (src_format->vk_aspect_mask != dst_format->vk_aspect_mask) + { + for (i = 0; i < dst_resource->desc.MipLevels; ++i) + { + d3d12_command_list_copy_incompatible_texture_region(list, + dst_resource, i, dst_format, + src_resource, i, src_format, layer_count); + } + return; + } + for (i = 0; i < dst_resource->desc.MipLevels; ++i) { vk_image_copy_from_d3d12(&vk_image_copy, i, i, &src_resource->desc, &dst_resource->desc, - src_resource->format, dst_resource->format, NULL, 0, 0, 0); + src_format, dst_format, NULL, 0, 0, 0); vk_image_copy.dstSubresource.layerCount = layer_count; vk_image_copy.srcSubresource.layerCount = layer_count; VK_CALL(vkCmdCopyImage(list->vk_command_buffer, src_resource->u.vk_image, @@ -3886,7 +3939,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm } }
-static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate, const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset, D3D12_TILE_COPY_FLAGS flags) @@ -3897,11 +3950,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommand buffer, buffer_offset, flags); }
-static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *dst, UINT dst_sub_resource_idx, ID3D12Resource *src, UINT src_sub_resource_idx, DXGI_FORMAT format) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_format *src_format, *dst_format, *vk_format; struct d3d12_resource *dst_resource, *src_resource; const struct vkd3d_vk_device_procs *vk_procs; @@ -3964,10 +4017,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12Graphi VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &vk_image_resolve)); }
-static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList3 *iface, D3D12_PRIMITIVE_TOPOLOGY topology) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, topology %#x.\n", iface, topology);
@@ -3978,11 +4031,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12Gr d3d12_command_list_invalidate_current_pipeline(list); }
-static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList3 *iface, UINT viewport_count, const D3D12_VIEWPORT *viewports) { VkViewport vk_viewports[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs; unsigned int i;
@@ -4016,10 +4069,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo VK_CALL(vkCmdSetViewport(list->vk_command_buffer, 0, viewport_count, vk_viewports)); }
-static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList3 *iface, UINT rect_count, const D3D12_RECT *rects) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); VkRect2D vk_rects[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; const struct vkd3d_vk_device_procs *vk_procs; unsigned int i; @@ -4044,10 +4097,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12Graphic VK_CALL(vkCmdSetScissor(list->vk_command_buffer, 0, rect_count, vk_rects)); }
-static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList3 *iface, const FLOAT blend_factor[4]) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs;
TRACE("iface %p, blend_factor %p.\n", iface, blend_factor); @@ -4056,10 +4109,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12Graphics VK_CALL(vkCmdSetBlendConstants(list->vk_command_buffer, blend_factor)); }
-static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList3 *iface, UINT stencil_ref) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs;
TRACE("iface %p, stencil_ref %u.\n", iface, stencil_ref); @@ -4068,11 +4121,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsC VK_CALL(vkCmdSetStencilReference(list->vk_command_buffer, VK_STENCIL_FRONT_AND_BACK, stencil_ref)); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList3 *iface, ID3D12PipelineState *pipeline_state) { struct d3d12_pipeline_state *state = unsafe_impl_from_ID3D12PipelineState(pipeline_state); - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, pipeline_state %p.\n", iface, pipeline_state);
@@ -4123,10 +4176,10 @@ static unsigned int d3d12_find_ds_multiplanar_transition(const D3D12_RESOURCE_BA return 0; }
-static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList3 *iface, UINT barrier_count, const D3D12_RESOURCE_BARRIER *barriers) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); bool have_aliasing_barriers = false, have_split_barriers = false; const struct vkd3d_vk_device_procs *vk_procs; const struct vkd3d_vulkan_info *vk_info; @@ -4349,13 +4402,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC WARN("Issuing split barrier(s) on D3D12_RESOURCE_BARRIER_FLAG_END_ONLY.\n"); }
-static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList3 *iface, ID3D12GraphicsCommandList *command_list) { FIXME("iface %p, command_list %p stub!\n", iface, command_list); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList3 *iface, UINT heap_count, ID3D12DescriptorHeap *const *heaps) { TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps); @@ -4381,10 +4434,10 @@ static void d3d12_command_list_set_root_signature(struct d3d12_command_list *lis d3d12_command_list_invalidate_root_parameters(list, bind_point); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList3 *iface, ID3D12RootSignature *root_signature) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_signature %p.\n", iface, root_signature);
@@ -4392,10 +4445,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12G unsafe_impl_from_ID3D12RootSignature(root_signature)); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList3 *iface, ID3D12RootSignature *root_signature) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_signature %p.\n", iface, root_signature);
@@ -4408,6 +4461,7 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l { struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; const struct d3d12_root_signature *root_signature = bindings->root_signature; + struct d3d12_descriptor_heap *descriptor_heap; struct d3d12_desc *desc;
assert(root_signature_get_descriptor_table(root_signature, index)); @@ -4418,15 +4472,25 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l if (bindings->descriptor_tables[index] == desc) return;
+ descriptor_heap = d3d12_desc_get_descriptor_heap(desc); + if (!(descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) + { + /* GetGPUDescriptorHandleForHeapStart() returns a null handle in this case, + * but a CPU handle could be passed. */ + WARN("Descriptor heap %p is not shader visible.\n", descriptor_heap); + return; + } + command_list_add_descriptor_heap(list, descriptor_heap); + bindings->descriptor_tables[index] = desc; bindings->descriptor_table_dirty_mask |= (uint64_t)1 << index; bindings->descriptor_table_active_mask |= (uint64_t)1 << index; }
-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", iface, root_parameter_index, base_descriptor.ptr); @@ -4435,10 +4499,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(I root_parameter_index, base_descriptor); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", iface, root_parameter_index, base_descriptor.ptr); @@ -4460,10 +4524,10 @@ static void d3d12_command_list_set_root_constants(struct d3d12_command_list *lis c->stage_flags, c->offset + offset * sizeof(uint32_t), count * sizeof(uint32_t), data)); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, UINT data, UINT dst_offset) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", iface, root_parameter_index, data, dst_offset); @@ -4472,10 +4536,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3 root_parameter_index, dst_offset, 1, &data); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, UINT data, UINT dst_offset) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", iface, root_parameter_index, data, dst_offset); @@ -4484,10 +4548,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID root_parameter_index, dst_offset, 1, &data); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", iface, root_parameter_index, constant_count, data, dst_offset); @@ -4496,10 +4560,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID root_parameter_index, dst_offset, constant_count, data); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", iface, root_parameter_index, constant_count, data, dst_offset); @@ -4561,9 +4625,9 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, }
static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferView( - ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4572,9 +4636,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferVie }
static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootConstantBufferView( - ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4633,9 +4697,9 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li }
static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceView( - ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4645,9 +4709,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceVie }
static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceView( - ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4657,9 +4721,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceVi }
static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessView( - ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4669,9 +4733,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessVi }
static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessView( - ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface);
TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4680,10 +4744,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessV root_parameter_index, address); }
-static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList3 *iface, const D3D12_INDEX_BUFFER_VIEW *view) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_vk_device_procs *vk_procs; struct d3d12_resource *resource; enum VkIndexType index_type; @@ -4723,10 +4787,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics view->BufferLocation - resource->gpu_address, index_type)); }
-static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList3 *iface, UINT start_slot, UINT view_count, const D3D12_VERTEX_BUFFER_VIEW *views) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct vkd3d_null_resources *null_resources; struct vkd3d_gpu_va_allocator *gpu_va_allocator; VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; @@ -4781,10 +4845,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi d3d12_command_list_invalidate_current_pipeline(list); }
-static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList3 *iface, UINT start_slot, UINT view_count, const D3D12_STREAM_OUTPUT_BUFFER_VIEW *views) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); VkDeviceSize offsets[ARRAY_SIZE(list->so_counter_buffers)]; VkDeviceSize sizes[ARRAY_SIZE(list->so_counter_buffers)]; VkBuffer buffers[ARRAY_SIZE(list->so_counter_buffers)]; @@ -4846,11 +4910,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsComm VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes)); }
-static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList3 *iface, UINT render_target_descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE *render_target_descriptors, BOOL single_descriptor_handle, const D3D12_CPU_DESCRIPTOR_HANDLE *depth_stencil_descriptor) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct d3d12_rtv_desc *rtv_desc; const struct d3d12_dsv_desc *dsv_desc; VkFormat prev_dsv_format; @@ -5051,12 +5115,12 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, } }
-static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList3 *iface, D3D12_CPU_DESCRIPTOR_HANDLE dsv, D3D12_CLEAR_FLAGS flags, float depth, UINT8 stencil, UINT rect_count, const D3D12_RECT *rects) { const union VkClearValue clear_value = {.depthStencil = {depth, stencil}}; - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct d3d12_dsv_desc *dsv_desc = d3d12_dsv_desc_from_cpu_handle(dsv); struct VkAttachmentDescription attachment_desc; struct VkAttachmentReference ds_reference; @@ -5100,10 +5164,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12Gra &clear_value, rect_count, rects); }
-static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList3 *iface, D3D12_CPU_DESCRIPTOR_HANDLE rtv, const FLOAT color[4], UINT rect_count, const D3D12_RECT *rects) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const struct d3d12_rtv_desc *rtv_desc = d3d12_rtv_desc_from_cpu_handle(rtv); struct VkAttachmentDescription attachment_desc; struct VkAttachmentReference color_reference; @@ -5348,11 +5412,11 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 } }
-static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList3 *iface, D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, const UINT values[4], UINT rect_count, const D3D12_RECT *rects) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct vkd3d_view *descriptor, *uint_view = NULL; struct d3d12_device *device = list->device; struct vkd3d_texture_view_desc view_desc; @@ -5414,11 +5478,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID vkd3d_view_decref(uint_view, device); }
-static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList3 *iface, D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, const float values[4], UINT rect_count, const D3D12_RECT *rects) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_resource *resource_impl; VkClearColorValue colour; struct vkd3d_view *view; @@ -5434,16 +5498,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); }
-static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *resource, const D3D12_DISCARD_REGION *region) { FIXME_ONCE("iface %p, resource %p, region %p stub!\n", iface, resource, region); }
-static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList3 *iface, ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); const struct vkd3d_vk_device_procs *vk_procs; VkQueryControlFlags flags = 0; @@ -5470,10 +5534,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsComman VK_CALL(vkCmdBeginQuery(list->vk_command_buffer, query_heap->vk_query_pool, index, flags)); }
-static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList3 *iface, ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); const struct vkd3d_vk_device_procs *vk_procs;
@@ -5515,12 +5579,12 @@ static size_t get_query_stride(D3D12_QUERY_TYPE type) return sizeof(uint64_t); }
-static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList3 *iface, ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT start_index, UINT query_count, ID3D12Resource *dst_buffer, UINT64 aligned_dst_buffer_offset) { const struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_resource *buffer = unsafe_impl_from_ID3D12Resource(dst_buffer); const struct vkd3d_vk_device_procs *vk_procs; unsigned int i, first, count; @@ -5596,10 +5660,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12Graphics } }
-static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *buffer, UINT64 aligned_buffer_offset, D3D12_PREDICATION_OP operation) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_resource *resource = unsafe_impl_from_ID3D12Resource(buffer); const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; const struct vkd3d_vk_device_procs *vk_procs; @@ -5668,19 +5732,19 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCo } }
-static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList3 *iface, UINT metadata, const void *data, UINT size) { FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); }
-static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList3 *iface, UINT metadata, const void *data, UINT size) { FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); }
-static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList2 *iface) +static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList3 *iface) { FIXME("iface %p stub!\n", iface); } @@ -5689,14 +5753,14 @@ STATIC_ASSERT(sizeof(VkDispatchIndirectCommand) == sizeof(D3D12_DISPATCH_ARGUMEN STATIC_ASSERT(sizeof(VkDrawIndexedIndirectCommand) == sizeof(D3D12_DRAW_INDEXED_ARGUMENTS)); STATIC_ASSERT(sizeof(VkDrawIndirectCommand) == sizeof(D3D12_DRAW_ARGUMENTS));
-static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList3 *iface, ID3D12CommandSignature *command_signature, UINT max_command_count, ID3D12Resource *arg_buffer, UINT64 arg_buffer_offset, ID3D12Resource *count_buffer, UINT64 count_buffer_offset) { struct d3d12_command_signature *sig_impl = unsafe_impl_from_ID3D12CommandSignature(command_signature); struct d3d12_resource *count_impl = unsafe_impl_from_ID3D12Resource(count_buffer); struct d3d12_resource *arg_impl = unsafe_impl_from_ID3D12Resource(arg_buffer); - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); const D3D12_COMMAND_SIGNATURE_DESC *signature_desc; const struct vkd3d_vk_device_procs *vk_procs; unsigned int i; @@ -5714,6 +5778,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC return; }
+ d3d12_command_signature_incref(sig_impl); + signature_desc = &sig_impl->desc; for (i = 0; i < signature_desc->NumArgumentDescs; ++i) { @@ -5776,6 +5842,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC if (!d3d12_command_list_update_compute_state(list)) { WARN("Failed to update compute state, ignoring dispatch.\n"); + d3d12_command_signature_decref(sig_impl); return; }
@@ -5788,9 +5855,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC break; } } + + d3d12_command_signature_decref(sig_impl); }
-static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *dst_buffer, UINT64 dst_offset, ID3D12Resource *src_buffer, UINT64 src_offset, UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, @@ -5803,7 +5872,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12Grap dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); }
-static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *dst_buffer, UINT64 dst_offset, ID3D12Resource *src_buffer, UINT64 src_offset, UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, @@ -5816,20 +5885,20 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); }
-static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList3 *iface, FLOAT min, FLOAT max) { FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList3 *iface, UINT sample_count, UINT pixel_count, D3D12_SAMPLE_POSITION *sample_positions) { FIXME("iface %p, sample_count %u, pixel_count %u, sample_positions %p stub!\n", iface, sample_count, pixel_count, sample_positions); }
-static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList3 *iface, ID3D12Resource *dst_resource, UINT dst_sub_resource_idx, UINT dst_x, UINT dst_y, ID3D12Resource *src_resource, UINT src_sub_resource_idx, D3D12_RECT *src_rect, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode) @@ -5841,16 +5910,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12 src_resource, src_sub_resource_idx, src_rect, format, mode); }
-static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList2 *iface, UINT mask) +static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList3 *iface, UINT mask) { FIXME("iface %p, mask %#x stub!\n", iface, mask); }
-static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList2 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList3 *iface, UINT count, const D3D12_WRITEBUFFERIMMEDIATE_PARAMETER *parameters, const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); struct d3d12_resource *resource; unsigned int i;
@@ -5863,7 +5932,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12Grap } }
-static const struct ID3D12GraphicsCommandList2Vtbl d3d12_command_list_vtbl = +static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(ID3D12GraphicsCommandList3 *iface, + ID3D12ProtectedResourceSession *protected_session) +{ + FIXME("iface %p, protected_session %p stub!\n", iface, protected_session); +} + +static const struct ID3D12GraphicsCommandList3Vtbl d3d12_command_list_vtbl = { /* IUnknown methods */ d3d12_command_list_QueryInterface, @@ -5939,6 +6014,8 @@ static const struct ID3D12GraphicsCommandList2Vtbl d3d12_command_list_vtbl = d3d12_command_list_SetViewInstanceMask, /* ID3D12GraphicsCommandList2 methods */ d3d12_command_list_WriteBufferImmediate, + /* ID3D12GraphicsCommandList3 methods */ + d3d12_command_list_SetProtectedResourceSession, };
static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12CommandList *iface) @@ -5946,7 +6023,7 @@ static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12Comma if (!iface) return NULL; assert(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); - return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); + return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList3_iface); }
static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d3d12_device *device, @@ -5955,7 +6032,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d { HRESULT hr;
- list->ID3D12GraphicsCommandList2_iface.lpVtbl = &d3d12_command_list_vtbl; + list->ID3D12GraphicsCommandList3_iface.lpVtbl = &d3d12_command_list_vtbl; list->refcount = 1;
list->type = type; @@ -6063,8 +6140,35 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if return refcount; }
+static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op) +{ + switch (op->opcode) + { + case VKD3D_CS_OP_WAIT: + d3d12_fence_decref(op->u.wait.fence); + break; + + case VKD3D_CS_OP_SIGNAL: + d3d12_fence_decref(op->u.signal.fence); + break; + + case VKD3D_CS_OP_EXECUTE: + vkd3d_free(op->u.execute.buffers); + break; + + case VKD3D_CS_OP_UPDATE_MAPPINGS: + case VKD3D_CS_OP_COPY_MAPPINGS: + break; + } +} + static void d3d12_command_queue_op_array_destroy(struct d3d12_command_queue_op_array *array) { + unsigned int i; + + for (i = 0; i < array->count; ++i) + d3d12_command_queue_destroy_op(&array->ops[i]); + vkd3d_free(array->ops); }
@@ -6162,17 +6266,131 @@ static struct vkd3d_cs_op_data *d3d12_command_queue_op_array_require_space(struc return &array->ops[array->count++]; }
+static bool clone_array_parameter(void **dst, const void *src, size_t elem_size, unsigned int count) +{ + void *buffer; + + *dst = NULL; + if (src) + { + if (!(buffer = vkd3d_calloc(count, elem_size))) + return false; + memcpy(buffer, src, count * elem_size); + *dst = buffer; + } + return true; +} + +static void update_mappings_cleanup(struct vkd3d_cs_update_mappings *update_mappings) +{ + vkd3d_free(update_mappings->region_start_coordinates); + vkd3d_free(update_mappings->region_sizes); + vkd3d_free(update_mappings->range_flags); + vkd3d_free(update_mappings->heap_range_offsets); + vkd3d_free(update_mappings->range_tile_counts); +} + static void STDMETHODCALLTYPE d3d12_command_queue_UpdateTileMappings(ID3D12CommandQueue *iface, ID3D12Resource *resource, UINT region_count, const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates, const D3D12_TILE_REGION_SIZE *region_sizes, ID3D12Heap *heap, UINT range_count, const D3D12_TILE_RANGE_FLAGS *range_flags, - UINT *heap_range_offsets, UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) + const UINT *heap_range_offsets, const UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) { - FIXME("iface %p, resource %p, region_count %u, region_start_coordinates %p, " + struct d3d12_resource *resource_impl = unsafe_impl_from_ID3D12Resource(resource); + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + struct d3d12_heap *heap_impl = unsafe_impl_from_ID3D12Heap(heap); + struct vkd3d_cs_update_mappings update_mappings = {0}; + struct vkd3d_cs_op_data *op; + + TRACE("iface %p, resource %p, region_count %u, region_start_coordinates %p, " "region_sizes %p, heap %p, range_count %u, range_flags %p, heap_range_offsets %p, " - "range_tile_counts %p, flags %#x stub!\n", + "range_tile_counts %p, flags %#x.\n", iface, resource, region_count, region_start_coordinates, region_sizes, heap, range_count, range_flags, heap_range_offsets, range_tile_counts, flags); + + if (!region_count || !range_count) + return; + + if (!command_queue->supports_sparse_binding) + { + FIXME("Command queue %p does not support sparse binding.\n", command_queue); + return; + } + + if (!resource_impl->tiles.subresource_count) + { + WARN("Resource %p is not a tiled resource.\n", resource_impl); + return; + } + + if (region_count > 1 && !region_start_coordinates) + { + WARN("Region start coordinates must not be NULL when region count is > 1.\n"); + return; + } + + if (range_count > 1 && !range_tile_counts) + { + WARN("Range tile counts must not be NULL when range count is > 1.\n"); + return; + } + + update_mappings.resource = resource_impl; + update_mappings.heap = heap_impl; + if (!clone_array_parameter((void **)&update_mappings.region_start_coordinates, + region_start_coordinates, sizeof(*region_start_coordinates), region_count)) + { + ERR("Failed to allocate region start coordinates.\n"); + return; + } + if (!clone_array_parameter((void **)&update_mappings.region_sizes, + region_sizes, sizeof(*region_sizes), region_count)) + { + ERR("Failed to allocate region sizes.\n"); + goto free_clones; + } + if (!clone_array_parameter((void **)&update_mappings.range_flags, + range_flags, sizeof(*range_flags), range_count)) + { + ERR("Failed to allocate range flags.\n"); + goto free_clones; + } + if (!clone_array_parameter((void **)&update_mappings.heap_range_offsets, + heap_range_offsets, sizeof(*heap_range_offsets), range_count)) + { + ERR("Failed to allocate heap range offsets.\n"); + goto free_clones; + } + if (!clone_array_parameter((void **)&update_mappings.range_tile_counts, + range_tile_counts, sizeof(*range_tile_counts), range_count)) + { + ERR("Failed to allocate range tile counts.\n"); + goto free_clones; + } + update_mappings.region_count = region_count; + update_mappings.range_count = range_count; + update_mappings.flags = flags; + + vkd3d_mutex_lock(&command_queue->op_mutex); + + if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) + { + ERR("Failed to add op.\n"); + goto unlock_mutex; + } + + op->opcode = VKD3D_CS_OP_UPDATE_MAPPINGS; + op->u.update_mappings = update_mappings; + + d3d12_command_queue_submit_locked(command_queue); + + vkd3d_mutex_unlock(&command_queue->op_mutex); + return; + +unlock_mutex: + vkd3d_mutex_unlock(&command_queue->op_mutex); +free_clones: + update_mappings_cleanup(&update_mappings); }
static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12CommandQueue *iface, @@ -6183,10 +6401,34 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command const D3D12_TILE_REGION_SIZE *region_size, D3D12_TILE_MAPPING_FLAGS flags) { - FIXME("iface %p, dst_resource %p, dst_region_start_coordinate %p, " - "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x stub!\n", + struct d3d12_resource *dst_resource_impl = impl_from_ID3D12Resource(dst_resource); + struct d3d12_resource *src_resource_impl = impl_from_ID3D12Resource(src_resource); + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + struct vkd3d_cs_op_data *op; + + TRACE("iface %p, dst_resource %p, dst_region_start_coordinate %p, " + "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x.\n", iface, dst_resource, dst_region_start_coordinate, src_resource, src_region_start_coordinate, region_size, flags); + + vkd3d_mutex_lock(&command_queue->op_mutex); + + if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) + { + ERR("Failed to add op.\n"); + return; + } + op->opcode = VKD3D_CS_OP_COPY_MAPPINGS; + op->u.copy_mappings.dst_resource = dst_resource_impl; + op->u.copy_mappings.src_resource = src_resource_impl; + op->u.copy_mappings.dst_region_start_coordinate = *dst_region_start_coordinate; + op->u.copy_mappings.src_region_start_coordinate = *src_region_start_coordinate; + op->u.copy_mappings.region_size = *region_size; + op->u.copy_mappings.flags = flags; + + d3d12_command_queue_submit_locked(command_queue); + + vkd3d_mutex_unlock(&command_queue->op_mutex); }
static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queue, @@ -6214,8 +6456,6 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu ERR("Failed to submit queue(s), vr %d.\n", vr);
vkd3d_queue_release(vkd3d_queue); - - vkd3d_free(buffers); }
static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue) @@ -6273,7 +6513,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) { ERR("Failed to add op.\n"); - return; + goto done; } op->opcode = VKD3D_CS_OP_EXECUTE; op->u.execute.buffers = buffers; @@ -6281,6 +6521,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm
d3d12_command_queue_submit_locked(command_queue);
+done: vkd3d_mutex_unlock(&command_queue->op_mutex); return; } @@ -6348,6 +6589,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *
if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) { + ERR("Failed to add op.\n"); hr = E_OUTOFMEMORY; goto done; } @@ -6686,6 +6928,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if
if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) { + ERR("Failed to add op.\n"); hr = E_OUTOFMEMORY; goto done; } @@ -6922,22 +7165,31 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * return d3d12_command_queue_fixup_after_flush_locked(queue); } d3d12_command_queue_wait_locked(queue, fence, op->u.wait.value); - d3d12_fence_decref(fence); break;
case VKD3D_CS_OP_SIGNAL: d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); - d3d12_fence_decref(op->u.signal.fence); break;
case VKD3D_CS_OP_EXECUTE: d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); break;
+ case VKD3D_CS_OP_UPDATE_MAPPINGS: + FIXME("Tiled resource binding is not supported yet.\n"); + update_mappings_cleanup(&op->u.update_mappings); + break; + + case VKD3D_CS_OP_COPY_MAPPINGS: + FIXME("Tiled resource mapping copying is not supported yet.\n"); + break; + default: vkd3d_unreachable(); }
+ d3d12_command_queue_destroy_op(op); + *flushed_any |= true; }
@@ -7000,6 +7252,8 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, queue->vkd3d_queue, device))) goto fail_destroy_op_mutex;
+ queue->supports_sparse_binding = !!(queue->vkd3d_queue->vk_queue_flags & VK_QUEUE_SPARSE_BINDING_BIT); + d3d12_device_add_ref(queue->device = device);
return S_OK; @@ -7105,16 +7359,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_signature_Release(ID3D12CommandSign TRACE("%p decreasing refcount to %u.\n", signature, refcount);
if (!refcount) - { - struct d3d12_device *device = signature->device; - - vkd3d_private_store_destroy(&signature->private_store); - - vkd3d_free((void *)signature->desc.pArgumentDescs); - vkd3d_free(signature); - - d3d12_device_release(device); - } + d3d12_command_signature_decref(signature);
return refcount; } @@ -7221,6 +7466,7 @@ HRESULT d3d12_command_signature_create(struct d3d12_device *device, const D3D12_
object->ID3D12CommandSignature_iface.lpVtbl = &d3d12_command_signature_vtbl; object->refcount = 1; + object->internal_refcount = 1;
object->desc = *desc; if (!(object->desc.pArgumentDescs = vkd3d_calloc(desc->NumArgumentDescs, sizeof(*desc->pArgumentDescs)))) diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index 4263dcf4184..c33061073a3 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -1464,6 +1464,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
vulkan_info->device_limits = physical_device_info->properties2.properties.limits; vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; + vulkan_info->sparse_binding = features->sparseBinding; + vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; vulkan_info->uav_read_without_format = features->shaderStorageImageReadWithoutFormat; @@ -2433,34 +2435,39 @@ static void device_init_descriptor_pool_sizes(struct d3d12_device *device)
static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) { - cache->head = NULL; + memset(cache, 0, sizeof(*cache)); cache->size = size; }
static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cache) { union d3d12_desc_object u; + unsigned int i; void *next;
- for (u.object = cache->head; u.object; u.object = next) + for (i = 0; i < ARRAY_SIZE(cache->heads); ++i) { - next = u.header->next; - vkd3d_free(u.object); + for (u.object = cache->heads[i].head; u.object; u.object = next) + { + next = u.header->next; + vkd3d_free(u.object); + } } }
/* ID3D12Device */ -static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface) +static inline struct d3d12_device *impl_from_ID3D12Device1(ID3D12Device1 *iface) { - return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device_iface); + return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device1_iface); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device1 *iface, REFIID riid, void **object) { TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object);
- if (IsEqualGUID(riid, &IID_ID3D12Device) + if (IsEqualGUID(riid, &IID_ID3D12Device1) + || IsEqualGUID(riid, &IID_ID3D12Device) || IsEqualGUID(riid, &IID_ID3D12Object) || IsEqualGUID(riid, &IID_IUnknown)) { @@ -2475,9 +2482,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device *iface return E_NOINTERFACE; }
-static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device *iface) +static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device1 *iface) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); ULONG refcount = InterlockedIncrement(&device->refcount);
TRACE("%p increasing refcount to %u.\n", device, refcount); @@ -2485,9 +2492,9 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device *iface) return refcount; }
-static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) +static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device1 *iface) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); ULONG refcount = InterlockedDecrement(&device->refcount);
TRACE("%p decreasing refcount to %u.\n", device, refcount); @@ -2521,10 +2528,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) return refcount; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device1 *iface, REFGUID guid, UINT *data_size, void *data) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); @@ -2532,10 +2539,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device *iface return vkd3d_get_private_data(&device->private_store, guid, data_size, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device1 *iface, REFGUID guid, UINT data_size, const void *data) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); @@ -2543,19 +2550,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device *iface return vkd3d_set_private_data(&device->private_store, guid, data_size, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device1 *iface, REFGUID guid, const IUnknown *data) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);
return vkd3d_set_private_data_interface(&device->private_store, guid, data); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device *iface, const WCHAR *name) +static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device1 *iface, const WCHAR *name) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
TRACE("iface %p, name %s.\n", iface, debugstr_w(name, device->wchar_size));
@@ -2563,17 +2570,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device *iface, const VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, name); }
-static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device *iface) +static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device1 *iface) { TRACE("iface %p.\n", iface);
return 1; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device1 *iface, const D3D12_COMMAND_QUEUE_DESC *desc, REFIID riid, void **command_queue) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_command_queue *object; HRESULT hr;
@@ -2587,10 +2594,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device *i riid, command_queue); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device1 *iface, D3D12_COMMAND_LIST_TYPE type, REFIID riid, void **command_allocator) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_command_allocator *object; HRESULT hr;
@@ -2604,10 +2611,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Devic riid, command_allocator); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device1 *iface, const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_pipeline_state *object; HRESULT hr;
@@ -2621,10 +2628,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12 &IID_ID3D12PipelineState, riid, pipeline_state); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device1 *iface, const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_pipeline_state *object; HRESULT hr;
@@ -2638,11 +2645,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12D &IID_ID3D12PipelineState, riid, pipeline_state); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device1 *iface, UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *command_allocator, ID3D12PipelineState *initial_pipeline_state, REFIID riid, void **command_list) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_command_list *object; HRESULT hr;
@@ -2655,8 +2662,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device *if initial_pipeline_state, &object))) return hr;
- return return_interface(&object->ID3D12GraphicsCommandList2_iface, - &IID_ID3D12GraphicsCommandList2, riid, command_list); + return return_interface(&object->ID3D12GraphicsCommandList3_iface, + &IID_ID3D12GraphicsCommandList3, riid, command_list); }
/* Direct3D feature levels restrict which formats can be optionally supported. */ @@ -2765,10 +2772,10 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) return true; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device1 *iface, D3D12_FEATURE feature, void *feature_data, UINT feature_data_size) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
TRACE("iface %p, feature %#x, feature_data %p, feature_data_size %u.\n", iface, feature, feature_data, feature_data_size); @@ -3267,10 +3274,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device * } }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device1 *iface, const D3D12_DESCRIPTOR_HEAP_DESC *desc, REFIID riid, void **descriptor_heap) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_descriptor_heap *object; HRESULT hr;
@@ -3284,7 +3291,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device &IID_ID3D12DescriptorHeap, riid, descriptor_heap); }
-static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device *iface, +static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device1 *iface, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) { TRACE("iface %p, descriptor_heap_type %#x.\n", iface, descriptor_heap_type); @@ -3307,11 +3314,11 @@ static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D } }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device1 *iface, UINT node_mask, const void *bytecode, SIZE_T bytecode_length, REFIID riid, void **root_signature) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_root_signature *object; HRESULT hr;
@@ -3327,10 +3334,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device * &IID_ID3D12RootSignature, riid, root_signature); }
-static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device1 *iface, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_desc tmp = {0};
TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); @@ -3339,11 +3346,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
-static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device1 *iface, ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_desc tmp = {0};
TRACE("iface %p, resource %p, desc %p, descriptor %#lx.\n", @@ -3353,11 +3360,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
-static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device1 *iface, ID3D12Resource *resource, ID3D12Resource *counter_resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_desc tmp = {0};
TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %#lx.\n", @@ -3368,7 +3375,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
-static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device1 *iface, ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { @@ -3376,10 +3383,10 @@ static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device * iface, resource, desc, descriptor.ptr);
d3d12_rtv_desc_create_rtv(d3d12_rtv_desc_from_cpu_handle(descriptor), - impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + impl_from_ID3D12Device1(iface), unsafe_impl_from_ID3D12Resource(resource), desc); }
-static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device1 *iface, ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { @@ -3387,13 +3394,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device * iface, resource, desc, descriptor.ptr);
d3d12_dsv_desc_create_dsv(d3d12_dsv_desc_from_cpu_handle(descriptor), - impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + impl_from_ID3D12Device1(iface), unsafe_impl_from_ID3D12Resource(resource), desc); }
-static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device1 *iface, const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_desc tmp = {0};
TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); @@ -3402,16 +3409,17 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
-static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device1 *iface, UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, const UINT *dst_descriptor_range_sizes, UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, const UINT *src_descriptor_range_sizes, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; unsigned int dst_range_size, src_range_size; + struct d3d12_descriptor_heap *dst_heap; const struct d3d12_desc *src; struct d3d12_desc *dst;
@@ -3441,13 +3449,14 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1;
dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); + dst_heap = d3d12_desc_get_descriptor_heap(dst); src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]);
for (; dst_idx < dst_range_size && src_idx < src_range_size; ++dst_idx, ++src_idx) { if (dst[dst_idx].s.u.object == src[src_idx].s.u.object) continue; - d3d12_desc_copy(&dst[dst_idx], &src[src_idx], device); + d3d12_desc_copy(&dst[dst_idx], &src[src_idx], dst_heap, device); }
if (dst_idx >= dst_range_size) @@ -3463,7 +3472,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, } }
-static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device1 *iface, UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) @@ -3478,10 +3487,10 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *i }
static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo( - ID3D12Device *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, + ID3D12Device1 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, UINT count, const D3D12_RESOURCE_DESC *resource_descs) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); const D3D12_RESOURCE_DESC *desc; uint64_t requested_alignment;
@@ -3554,10 +3563,10 @@ invalid: return info; }
-static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device *iface, +static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device1 *iface, D3D12_HEAP_PROPERTIES *heap_properties, UINT node_mask, D3D12_HEAP_TYPE heap_type) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); bool coherent;
TRACE("iface %p, heap_properties %p, node_mask 0x%08x, heap_type %#x.\n", @@ -3597,12 +3606,12 @@ static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapPrope return heap_properties; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device1 *iface, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_resource *object; HRESULT hr;
@@ -3621,10 +3630,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Devi return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device1 *iface, const D3D12_HEAP_DESC *desc, REFIID iid, void **heap) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_heap *object; HRESULT hr;
@@ -3640,12 +3649,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device *iface, return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device1 *iface, ID3D12Heap *heap, UINT64 heap_offset, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_heap *heap_object; struct d3d12_resource *object; HRESULT hr; @@ -3664,11 +3673,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device1 *iface, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_resource *object; HRESULT hr;
@@ -3682,11 +3691,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Devic return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device1 *iface, ID3D12DeviceChild *object, const SECURITY_ATTRIBUTES *attributes, DWORD access, const WCHAR *name, HANDLE *handle) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n", iface, object, attributes, access, debugstr_w(name, device->wchar_size), handle); @@ -3694,7 +3703,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device *i return E_NOTIMPL; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device1 *iface, HANDLE handle, REFIID riid, void **object) { FIXME("iface %p, handle %p, riid %s, object %p stub!\n", @@ -3703,10 +3712,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device *ifa return E_NOTIMPL; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device1 *iface, const WCHAR *name, DWORD access, HANDLE *handle) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
FIXME("iface %p, name %s, access %#x, handle %p stub!\n", iface, debugstr_w(name, device->wchar_size), access, handle); @@ -3714,7 +3723,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Devic return E_NOTIMPL; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device1 *iface, UINT object_count, ID3D12Pageable * const *objects) { FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", @@ -3723,7 +3732,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device *iface, return S_OK; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device1 *iface, UINT object_count, ID3D12Pageable * const *objects) { FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", @@ -3732,10 +3741,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device *iface, return S_OK; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device1 *iface, UINT64 initial_value, D3D12_FENCE_FLAGS flags, REFIID riid, void **fence) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_fence *object; HRESULT hr;
@@ -3745,24 +3754,24 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface, if (FAILED(hr = d3d12_fence_create(device, initial_value, flags, &object))) return hr;
- return return_interface(&object->ID3D12Fence_iface, &IID_ID3D12Fence, riid, fence); + return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device *iface) +static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device1 *iface) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
TRACE("iface %p.\n", iface);
return device->removed_reason; }
-static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device1 *iface, const D3D12_RESOURCE_DESC *desc, UINT first_sub_resource, UINT sub_resource_count, UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_counts, UINT64 *row_sizes, UINT64 *total_bytes) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
unsigned int i, sub_resource_idx, miplevel_idx, row_count, row_size, row_pitch; unsigned int width, height, depth, plane_count, sub_resources_per_plane; @@ -3842,10 +3851,10 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *i *total_bytes = total; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device1 *iface, const D3D12_QUERY_HEAP_DESC *desc, REFIID iid, void **heap) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_query_heap *object; HRESULT hr;
@@ -3858,18 +3867,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device *ifac return return_interface(&object->ID3D12QueryHeap_iface, &IID_ID3D12QueryHeap, iid, heap); }
-static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device *iface, BOOL enable) +static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device1 *iface, BOOL enable) { FIXME("iface %p, enable %#x stub!\n", iface, enable);
return E_NOTIMPL; }
-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device1 *iface, const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature, REFIID iid, void **command_signature) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); struct d3d12_command_signature *object; HRESULT hr;
@@ -3883,23 +3892,29 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Devic &IID_ID3D12CommandSignature, iid, command_signature); }
-static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device *iface, +static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device1 *iface, ID3D12Resource *resource, UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, D3D12_SUBRESOURCE_TILING *sub_resource_tilings) { - FIXME("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " + const struct d3d12_resource *resource_impl = impl_from_ID3D12Resource(resource); + struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " "standard_title_shape %p, sub_resource_tiling_count %p, " - "first_sub_resource_tiling %u, sub_resource_tilings %p stub!\n", + "first_sub_resource_tiling %u, sub_resource_tilings %p.\n", iface, resource, total_tile_count, packed_mip_info, standard_tile_shape, sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); + + d3d12_resource_get_tiling(device, resource_impl, total_tile_count, packed_mip_info, standard_tile_shape, + sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); }
-static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device *iface, LUID *luid) +static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device1 *iface, LUID *luid) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_device *device = impl_from_ID3D12Device1(iface);
TRACE("iface %p, luid %p.\n", iface, luid);
@@ -3908,7 +3923,33 @@ static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device *iface, return luid; }
-static const struct ID3D12DeviceVtbl d3d12_device_vtbl = +static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device1 *iface, + const void *blob, SIZE_T blob_size, REFIID iid, void **lib) +{ + FIXME("iface %p, blob %p, blob_size %lu, iid %s, lib %p stub!\n", iface, blob, blob_size, debugstr_guid(iid), lib); + + return DXGI_ERROR_UNSUPPORTED; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device1 *iface, + ID3D12Fence *const *fences, const UINT64 *values, UINT fence_count, + D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags, HANDLE event) +{ + FIXME("iface %p, fences %p, values %p, fence_count %u, flags %#x, event %p stub!\n", + iface, fences, values, fence_count, flags, event); + + return E_NOTIMPL; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device1 *iface, + UINT object_count, ID3D12Pageable *const *objects, const D3D12_RESIDENCY_PRIORITY *priorities) +{ + FIXME_ONCE("iface %p, object_count %u, objects %p, priorities %p stub!\n", iface, object_count, objects, priorities); + + return S_OK; +} + +static const struct ID3D12Device1Vtbl d3d12_device_vtbl = { /* IUnknown methods */ d3d12_device_QueryInterface, @@ -3957,14 +3998,18 @@ static const struct ID3D12DeviceVtbl d3d12_device_vtbl = d3d12_device_CreateCommandSignature, d3d12_device_GetResourceTiling, d3d12_device_GetAdapterLuid, + /* ID3D12Device1 methods */ + d3d12_device_CreatePipelineLibrary, + d3d12_device_SetEventOnMultipleFenceCompletion, + d3d12_device_SetResidencyPriority, };
-struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface) +struct d3d12_device *unsafe_impl_from_ID3D12Device1(ID3D12Device1 *iface) { if (!iface) return NULL; assert(iface->lpVtbl == &d3d12_device_vtbl); - return impl_from_ID3D12Device(iface); + return impl_from_ID3D12Device1(iface); }
static HRESULT d3d12_device_init(struct d3d12_device *device, @@ -3973,7 +4018,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, const struct vkd3d_vk_device_procs *vk_procs; HRESULT hr;
- device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; + device->ID3D12Device1_iface.lpVtbl = &d3d12_device_vtbl; device->refcount = 1;
vkd3d_instance_incref(device->vkd3d_instance = instance); @@ -4170,28 +4215,28 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha
IUnknown *vkd3d_get_device_parent(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device);
return d3d12_device->parent; }
VkDevice vkd3d_get_vk_device(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device);
return d3d12_device->vk_device; }
VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device);
return d3d12_device->vk_physical_device; }
struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device);
return d3d12_device->vkd3d_instance; } diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c index ea7b6859cc1..f3842958d96 100644 --- a/libs/vkd3d/libs/vkd3d/resource.c +++ b/libs/vkd3d/libs/vkd3d/resource.c @@ -779,6 +779,7 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, VkImageFormatListCreateInfoKHR format_list; const struct vkd3d_format *format; VkImageCreateInfo image_info; + uint32_t count; VkResult vr;
if (resource) @@ -914,6 +915,20 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, if (resource && image_info.tiling == VK_IMAGE_TILING_LINEAR) resource->flags |= VKD3D_RESOURCE_LINEAR_TILING;
+ if (sparse_resource) + { + count = 0; + VK_CALL(vkGetPhysicalDeviceSparseImageFormatProperties(device->vk_physical_device, image_info.format, + image_info.imageType, image_info.samples, image_info.usage, image_info.tiling, &count, NULL)); + + if (!count) + { + FIXME("Sparse images are not supported with format %u, type %u, samples %u, usage %#x.\n", + image_info.format, image_info.imageType, image_info.samples, image_info.usage); + return E_INVALIDARG; + } + } + if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, vk_image))) < 0) WARN("Failed to create Vulkan image, vr %d.\n", vr);
@@ -928,6 +943,7 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, D3D12_RESOURCE_DESC validated_desc; VkMemoryRequirements requirements; VkImage vk_image; + bool tiled; HRESULT hr;
assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); @@ -940,8 +956,10 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, desc = &validated_desc; }
+ tiled = desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; + /* XXX: We have to create an image to get its memory requirements. */ - if (SUCCEEDED(hr = vkd3d_create_image(device, &heap_properties, 0, desc, NULL, &vk_image))) + if (SUCCEEDED(hr = vkd3d_create_image(device, tiled ? NULL : &heap_properties, 0, desc, NULL, &vk_image))) { VK_CALL(vkGetImageMemoryRequirements(device->vk_device, vk_image, &requirements)); VK_CALL(vkDestroyImage(device->vk_device, vk_image, NULL)); @@ -953,6 +971,11 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, return hr; }
+static void d3d12_resource_tile_info_cleanup(struct d3d12_resource *resource) +{ + vkd3d_free(resource->tiles.subresources); +} + static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; @@ -968,6 +991,8 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12 else VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL));
+ d3d12_resource_tile_info_cleanup(resource); + if (resource->heap) d3d12_heap_resource_destroyed(resource->heap); } @@ -1039,12 +1064,196 @@ static void d3d12_resource_get_level_box(const struct d3d12_resource *resource, box->back = d3d12_resource_desc_get_depth(&resource->desc, level); }
-/* ID3D12Resource */ -static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) +static void compute_image_subresource_size_in_tiles(const VkExtent3D *tile_extent, + const struct D3D12_RESOURCE_DESC *desc, unsigned int miplevel_idx, + struct vkd3d_tiled_region_extent *size) { - return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); + unsigned int width, height, depth; + + width = d3d12_resource_desc_get_width(desc, miplevel_idx); + height = d3d12_resource_desc_get_height(desc, miplevel_idx); + depth = d3d12_resource_desc_get_depth(desc, miplevel_idx); + size->width = (width + tile_extent->width - 1) / tile_extent->width; + size->height = (height + tile_extent->height - 1) / tile_extent->height; + size->depth = (depth + tile_extent->depth - 1) / tile_extent->depth; +} + +void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, + UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, + UINT *subresource_tiling_count, UINT first_subresource_tiling, + D3D12_SUBRESOURCE_TILING *subresource_tilings) +{ + unsigned int i, subresource, subresource_count, miplevel_idx, count; + const struct vkd3d_subresource_tile_info *tile_info; + const VkExtent3D *tile_extent; + + tile_extent = &resource->tiles.tile_extent; + + if (packed_mip_info) + { + packed_mip_info->NumStandardMips = resource->tiles.standard_mip_count; + packed_mip_info->NumPackedMips = resource->desc.MipLevels - packed_mip_info->NumStandardMips; + packed_mip_info->NumTilesForPackedMips = !!resource->tiles.packed_mip_tile_count; /* non-zero dummy value */ + packed_mip_info->StartTileIndexInOverallResource = packed_mip_info->NumPackedMips + ? resource->tiles.subresources[resource->tiles.standard_mip_count].offset : 0; + } + + if (standard_tile_shape) + { + /* D3D12 docs say tile shape is cleared to zero if there is no standard mip, but drivers don't to do this. */ + standard_tile_shape->WidthInTexels = tile_extent->width; + standard_tile_shape->HeightInTexels = tile_extent->height; + standard_tile_shape->DepthInTexels = tile_extent->depth; + } + + if (total_tile_count) + *total_tile_count = resource->tiles.total_count; + + if (!subresource_tiling_count) + return; + + subresource_count = resource->tiles.subresource_count; + + count = subresource_count - min(first_subresource_tiling, subresource_count); + count = min(count, *subresource_tiling_count); + + for (i = 0; i < count; ++i) + { + subresource = i + first_subresource_tiling; + miplevel_idx = subresource % resource->desc.MipLevels; + if (miplevel_idx >= resource->tiles.standard_mip_count) + { + memset(&subresource_tilings[i], 0, sizeof(subresource_tilings[i])); + subresource_tilings[i].StartTileIndexInOverallResource = D3D12_PACKED_TILE; + continue; + } + + tile_info = &resource->tiles.subresources[subresource]; + subresource_tilings[i].StartTileIndexInOverallResource = tile_info->offset; + subresource_tilings[i].WidthInTiles = tile_info->extent.width; + subresource_tilings[i].HeightInTiles = tile_info->extent.height; + subresource_tilings[i].DepthInTiles = tile_info->extent.depth; + } + *subresource_tiling_count = i; }
+static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3d12_device *device) +{ + unsigned int i, start_idx, subresource_count, tile_count, miplevel_idx; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkSparseImageMemoryRequirements *sparse_requirements_array; + VkSparseImageMemoryRequirements sparse_requirements = {0}; + struct vkd3d_subresource_tile_info *tile_info; + VkMemoryRequirements requirements; + const VkExtent3D *tile_extent; + uint32_t requirement_count; + + subresource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc); + + if (!(resource->tiles.subresources = vkd3d_calloc(subresource_count, sizeof(*resource->tiles.subresources)))) + { + ERR("Failed to allocate subresource info array.\n"); + return false; + } + + if (d3d12_resource_is_buffer(resource)) + { + assert(subresource_count == 1); + + VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, resource->u.vk_buffer, &requirements)); + if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) + FIXME("Vulkan device tile size is greater than the standard D3D12 tile size.\n"); + + tile_info = &resource->tiles.subresources[0]; + tile_info->offset = 0; + tile_info->extent.width = align(resource->desc.Width, D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) + / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; + tile_info->extent.height = 1; + tile_info->extent.depth = 1; + tile_info->count = tile_info->extent.width; + + resource->tiles.tile_extent.width = D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; + resource->tiles.tile_extent.height = 1; + resource->tiles.tile_extent.depth = 1; + resource->tiles.total_count = tile_info->extent.width; + resource->tiles.subresource_count = 1; + resource->tiles.standard_mip_count = 1; + resource->tiles.packed_mip_tile_count = 0; + } + else + { + VK_CALL(vkGetImageMemoryRequirements(device->vk_device, resource->u.vk_image, &requirements)); + if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) + FIXME("Vulkan device tile size is greater than the standard D3D12 tile size.\n"); + + requirement_count = 0; + VK_CALL(vkGetImageSparseMemoryRequirements(device->vk_device, resource->u.vk_image, &requirement_count, NULL)); + if (!(sparse_requirements_array = vkd3d_calloc(requirement_count, sizeof(*sparse_requirements_array)))) + { + ERR("Failed to allocate sparse requirements array.\n"); + return false; + } + VK_CALL(vkGetImageSparseMemoryRequirements(device->vk_device, resource->u.vk_image, + &requirement_count, sparse_requirements_array)); + + for (i = 0; i < requirement_count; ++i) + { + if (sparse_requirements_array[i].formatProperties.aspectMask & resource->format->vk_aspect_mask) + { + if (sparse_requirements.formatProperties.aspectMask) + { + WARN("Ignoring properties for aspect mask %#x.\n", + sparse_requirements_array[i].formatProperties.aspectMask); + } + else + { + sparse_requirements = sparse_requirements_array[i]; + } + } + } + vkd3d_free(sparse_requirements_array); + if (!sparse_requirements.formatProperties.aspectMask) + { + WARN("Failed to get sparse requirements.\n"); + return false; + } + + resource->tiles.tile_extent = sparse_requirements.formatProperties.imageGranularity; + resource->tiles.subresource_count = subresource_count; + resource->tiles.standard_mip_count = sparse_requirements.imageMipTailSize + ? sparse_requirements.imageMipTailFirstLod : resource->desc.MipLevels; + resource->tiles.packed_mip_tile_count = (resource->tiles.standard_mip_count < resource->desc.MipLevels) + ? sparse_requirements.imageMipTailSize / requirements.alignment : 0; + + for (i = 0, start_idx = 0; i < subresource_count; ++i) + { + miplevel_idx = i % resource->desc.MipLevels; + + tile_extent = &sparse_requirements.formatProperties.imageGranularity; + tile_info = &resource->tiles.subresources[i]; + compute_image_subresource_size_in_tiles(tile_extent, &resource->desc, miplevel_idx, &tile_info->extent); + tile_info->offset = start_idx; + tile_info->count = 0; + + if (miplevel_idx < resource->tiles.standard_mip_count) + { + tile_count = tile_info->extent.width * tile_info->extent.height * tile_info->extent.depth; + start_idx += tile_count; + tile_info->count = tile_count; + } + else if (miplevel_idx == resource->tiles.standard_mip_count) + { + tile_info->count = 1; /* Non-zero dummy value */ + start_idx += 1; + } + } + resource->tiles.total_count = start_idx; + } + + return true; +} + +/* ID3D12Resource */ static HRESULT STDMETHODCALLTYPE d3d12_resource_QueryInterface(ID3D12Resource *iface, REFIID riid, void **object) { @@ -1661,6 +1870,21 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d return E_INVALIDARG; }
+ if (desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE) + { + if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D && !device->vk_info.sparse_residency_3d) + { + WARN("The device does not support tiled 3D images.\n"); + return E_INVALIDARG; + } + if (format->plane_count > 1) + { + WARN("Invalid format %#x. D3D12 does not support multiplanar formats for tiled resources.\n", + format->dxgi_format); + return E_INVALIDARG; + } + } + if (!d3d12_resource_validate_texture_format(desc, format) || !d3d12_resource_validate_texture_alignment(desc, format)) return E_INVALIDARG; @@ -1722,6 +1946,12 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12
resource->desc = *desc;
+ if (!heap_properties && !device->vk_info.sparse_binding) + { + WARN("The device does not support tiled images.\n"); + return E_INVALIDARG; + } + if (heap_properties && !d3d12_resource_validate_heap_properties(resource, heap_properties, initial_state)) return E_INVALIDARG;
@@ -1787,6 +2017,8 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 resource->heap = NULL; resource->heap_offset = 0;
+ memset(&resource->tiles, 0, sizeof(resource->tiles)); + if (FAILED(hr = vkd3d_private_store_init(&resource->private_store))) { d3d12_resource_destroy(resource, device); @@ -1972,6 +2204,12 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, desc, initial_state, optimized_clear_value, &object))) return hr;
+ if (!d3d12_resource_init_tiles(object, device)) + { + d3d12_resource_Release(&object->ID3D12Resource_iface); + return E_OUTOFMEMORY; + } + TRACE("Created reserved resource %p.\n", object);
*resource = object; @@ -1982,7 +2220,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, HRESULT vkd3d_create_image_resource(ID3D12Device *device, const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource) { - struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device(device); + struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device1((ID3D12Device1 *)device); struct d3d12_resource *object; HRESULT hr;
@@ -2044,38 +2282,67 @@ ULONG vkd3d_resource_decref(ID3D12Resource *resource) return d3d12_resource_decref(impl_from_ID3D12Resource(resource)); }
-/* Objects are cached so that vkd3d_view_incref() can safely check the refcount - * of an object freed by another thread. */ +#define HEAD_INDEX_MASK (ARRAY_SIZE(cache->heads) - 1) + +/* Objects are cached so that vkd3d_view_incref() can safely check the refcount of an + * object freed by another thread. This could be implemented as a single atomic linked + * list, but it requires handling the ABA problem, which brings issues with cross-platform + * support, compiler support, and non-universal x86-64 support for 128-bit CAS. */ static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) { union d3d12_desc_object u; - void *next; + unsigned int i;
- do + STATIC_ASSERT(!(ARRAY_SIZE(cache->heads) & HEAD_INDEX_MASK)); + + i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK; + for (;;) { - u.object = cache->head; - if (!u.object) + if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) + { + if ((u.object = cache->heads[i].head)) + { + vkd3d_atomic_decrement(&cache->free_count); + cache->heads[i].head = u.header->next; + vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); + return u.object; + } + vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); + } + /* Keeping a free count avoids uncertainty over when this loop should terminate, + * which could result in excess allocations gradually increasing without limit. */ + if (cache->free_count < ARRAY_SIZE(cache->heads)) return vkd3d_malloc(cache->size); - next = u.header->next; - } - while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, u.object, next));
- return u.object; + i = (i + 1) & HEAD_INDEX_MASK; + } }
static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, void *object) { union d3d12_desc_object u = {object}; + unsigned int i; void *head;
- do + /* Using the same index as above may result in a somewhat uneven distribution, + * but the main objective is to avoid costly spinlock contention. */ + i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK; + for (;;) { - head = cache->head; - u.header->next = head; + if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) + break; + i = (i + 1) & HEAD_INDEX_MASK; } - while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, head, u.object)); + + head = cache->heads[i].head; + u.header->next = head; + cache->heads[i].head = u.object; + vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); + vkd3d_atomic_increment(&cache->free_count); }
+#undef HEAD_INDEX_MASK + static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device *device) { struct vkd3d_cbuffer_desc *desc; @@ -2368,13 +2635,11 @@ void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descr descriptor_writes_free_object_refs(&writes, device); }
-static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst) +static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst, struct d3d12_descriptor_heap *descriptor_heap) { - struct d3d12_descriptor_heap *descriptor_heap; unsigned int i, head;
i = dst->index; - descriptor_heap = d3d12_desc_get_descriptor_heap(dst); head = descriptor_heap->dirty_list_head;
/* Only one thread can swap the value away from zero. */ @@ -2388,14 +2653,20 @@ static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst) } }
-void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_device *device) +static inline void descriptor_heap_write_atomic(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_desc *dst, + const struct d3d12_desc *src, struct d3d12_device *device) { void *object = src->s.u.object;
d3d12_desc_replace(dst, object, device); - if (device->use_vk_heaps && object && !dst->next) - d3d12_desc_mark_as_modified(dst); + if (descriptor_heap->use_vk_heaps && object && !dst->next) + d3d12_desc_mark_as_modified(dst, descriptor_heap); +} + +void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device) +{ + descriptor_heap_write_atomic(d3d12_desc_get_descriptor_heap(dst), dst, src, device); }
static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) @@ -2403,7 +2674,9 @@ static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_devic d3d12_desc_replace(descriptor, NULL, device); }
-void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, +/* This is a major performance bottleneck for some games, so do not load the device + * pointer from dst_heap. In some cases device will not be used. */ +void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_descriptor_heap *dst_heap, struct d3d12_device *device) { struct d3d12_desc tmp; @@ -2411,7 +2684,7 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, assert(dst != src);
tmp.s.u.object = d3d12_desc_get_object_ref(src, device); - d3d12_desc_write_atomic(dst, &tmp, device); + descriptor_heap_write_atomic(dst_heap, dst, &tmp, device); }
static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12_device *device, @@ -3810,7 +4083,15 @@ static D3D12_GPU_DESCRIPTOR_HANDLE * STDMETHODCALLTYPE d3d12_descriptor_heap_Get
TRACE("iface %p, descriptor %p.\n", iface, descriptor);
- descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors; + if (heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) + { + descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors; + } + else + { + WARN("Heap %p is not shader-visible.\n", iface); + descriptor->ptr = 0; + }
return descriptor; } @@ -3913,7 +4194,7 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE; memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets));
- if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV + if (!descriptor_heap->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) return S_OK;
@@ -3944,6 +4225,7 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript if (FAILED(hr = vkd3d_private_store_init(&descriptor_heap->private_store))) return hr;
+ descriptor_heap->use_vk_heaps = device->use_vk_heaps && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex);
diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index 5e46b467252..7ae46c862cc 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -20,6 +20,7 @@
#include "vkd3d_private.h" #include "vkd3d_shaders.h" +#include "vkd3d_shader_utils.h"
/* ID3D12RootSignature */ static inline struct d3d12_root_signature *impl_from_ID3D12RootSignature(ID3D12RootSignature *iface) @@ -374,8 +375,8 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig
if (unbounded && range->OffsetInDescriptorsFromTableStart == D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) { - WARN("An unbounded range with offset D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND occurs after " - "another unbounded range.\n"); + WARN("A range with offset D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND occurs after " + "an unbounded range.\n"); return E_INVALIDARG; }
@@ -1978,14 +1979,14 @@ static HRESULT create_shader_stage(struct d3d12_device *device, compile_info.next = shader_interface; compile_info.source.code = code->pShaderBytecode; compile_info.source.size = code->BytecodeLength; - compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; compile_info.options = options; compile_info.option_count = ARRAY_SIZE(options); compile_info.log_level = VKD3D_SHADER_LOG_NONE; compile_info.source_name = NULL;
- if ((ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) + if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0 + || (ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) { WARN("Failed to compile shader, vkd3d result %d.\n", ret); return hresult_from_vkd3d_result(ret); @@ -2008,6 +2009,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER struct vkd3d_shader_scan_descriptor_info *descriptor_info) { struct vkd3d_shader_compile_info compile_info; + enum vkd3d_result ret;
const struct vkd3d_shader_compile_option options[] = { @@ -2019,13 +2021,15 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER compile_info.next = descriptor_info; compile_info.source.code = code->pShaderBytecode; compile_info.source.size = code->BytecodeLength; - compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; compile_info.options = options; compile_info.option_count = ARRAY_SIZE(options); compile_info.log_level = VKD3D_SHADER_LOG_NONE; compile_info.source_name = NULL;
+ if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0) + return ret; + return vkd3d_shader_scan(&compile_info, NULL); }
diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c index 88301fbb313..159560afd8e 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c +++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c @@ -71,11 +71,11 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info,
if (!device) { - ID3D12Device_Release(&object->ID3D12Device_iface); + ID3D12Device_Release(&object->ID3D12Device1_iface); return S_FALSE; }
- return return_interface(&object->ID3D12Device_iface, &IID_ID3D12Device, iid, device); + return return_interface(&object->ID3D12Device1_iface, &IID_ID3D12Device, iid, device); }
/* ID3D12RootSignatureDeserializer */ diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index e8d6371709c..363a7132c3a 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -147,9 +147,12 @@ struct vkd3d_vulkan_info unsigned int max_vertex_attrib_divisor;
VkPhysicalDeviceLimits device_limits; - VkPhysicalDeviceSparseProperties sparse_properties; struct vkd3d_device_descriptor_limits descriptor_limits;
+ VkPhysicalDeviceSparseProperties sparse_properties; + bool sparse_binding; + bool sparse_residency_3d; + VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties;
unsigned int shader_extension_count; @@ -250,6 +253,11 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) { }
+static inline unsigned int vkd3d_atomic_increment(unsigned int volatile *x) +{ + return InterlockedIncrement((LONG volatile *)x); +} + static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) { return InterlockedDecrement((LONG volatile *)x); @@ -384,6 +392,15 @@ static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) } # else # error "vkd3d_atomic_decrement() not implemented for this platform" +# endif /* HAVE_SYNC_SUB_AND_FETCH */ + +# if HAVE_SYNC_ADD_AND_FETCH +static inline unsigned int vkd3d_atomic_increment(unsigned int volatile *x) +{ + return __sync_add_and_fetch(x, 1); +} +# else +# error "vkd3d_atomic_increment() not implemented for this platform" # endif /* HAVE_SYNC_ADD_AND_FETCH */
# if HAVE_SYNC_BOOL_COMPARE_AND_SWAP @@ -599,10 +616,12 @@ struct vkd3d_signaled_semaphore /* ID3D12Fence */ struct d3d12_fence { - ID3D12Fence ID3D12Fence_iface; + ID3D12Fence1 ID3D12Fence1_iface; LONG internal_refcount; LONG refcount;
+ D3D12_FENCE_FLAGS flags; + uint64_t value; uint64_t max_pending_value; struct vkd3d_mutex mutex; @@ -670,6 +689,30 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface); #define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008 #define VKD3D_RESOURCE_LINEAR_TILING 0x00000010
+struct vkd3d_tiled_region_extent +{ + unsigned int width; + unsigned int height; + unsigned int depth; +}; + +struct vkd3d_subresource_tile_info +{ + unsigned int offset; + unsigned int count; + struct vkd3d_tiled_region_extent extent; +}; + +struct d3d12_resource_tile_info +{ + VkExtent3D tile_extent; + unsigned int total_count; + unsigned int standard_mip_count; + unsigned int packed_mip_tile_count; + unsigned int subresource_count; + struct vkd3d_subresource_tile_info *subresources; +}; + /* ID3D12Resource */ struct d3d12_resource { @@ -698,9 +741,16 @@ struct d3d12_resource
struct d3d12_device *device;
+ struct d3d12_resource_tile_info tiles; + struct vkd3d_private_store private_store; };
+static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); +} + static inline bool d3d12_resource_is_buffer(const struct d3d12_resource *resource) { return resource->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER; @@ -713,6 +763,10 @@ static inline bool d3d12_resource_is_texture(const struct d3d12_resource *resour
bool d3d12_resource_is_cpu_accessible(const struct d3d12_resource *resource); HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d12_device *device); +void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, + UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, + UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, + D3D12_SUBRESOURCE_TILING *sub_resource_tilings);
HRESULT d3d12_committed_resource_create(struct d3d12_device *device, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, @@ -853,8 +907,9 @@ static inline void *d3d12_desc_get_object_ref(const volatile struct d3d12_desc * { do { - view = src->s.u.object; - } while (view && !vkd3d_view_incref(view)); + if (!(view = src->s.u.object)) + return NULL; + } while (!vkd3d_view_incref(view));
/* Check if the object is still in src to handle the case where it was * already freed and reused elsewhere when the refcount was incremented. */ @@ -880,7 +935,10 @@ static inline void d3d12_desc_copy_raw(struct d3d12_desc *dst, const struct d3d1 dst->s = src->s; }
-void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); +struct d3d12_descriptor_heap; + +void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_descriptor_heap *dst_heap, + struct d3d12_device *device); void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc); void d3d12_desc_create_srv(struct d3d12_desc *descriptor, @@ -983,6 +1041,7 @@ struct d3d12_descriptor_heap D3D12_DESCRIPTOR_HEAP_DESC desc;
struct d3d12_device *device; + bool use_vk_heaps;
struct vkd3d_private_store private_store;
@@ -1367,7 +1426,7 @@ enum vkd3d_pipeline_bind_point /* ID3D12CommandList */ struct d3d12_command_list { - ID3D12GraphicsCommandList2 ID3D12GraphicsCommandList2_iface; + ID3D12GraphicsCommandList3 ID3D12GraphicsCommandList3_iface; LONG refcount;
D3D12_COMMAND_LIST_TYPE type; @@ -1454,6 +1513,8 @@ enum vkd3d_cs_op VKD3D_CS_OP_WAIT, VKD3D_CS_OP_SIGNAL, VKD3D_CS_OP_EXECUTE, + VKD3D_CS_OP_UPDATE_MAPPINGS, + VKD3D_CS_OP_COPY_MAPPINGS, };
struct vkd3d_cs_wait @@ -1474,6 +1535,30 @@ struct vkd3d_cs_execute unsigned int buffer_count; };
+struct vkd3d_cs_update_mappings +{ + struct d3d12_resource *resource; + struct d3d12_heap *heap; + D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates; + D3D12_TILE_REGION_SIZE *region_sizes; + D3D12_TILE_RANGE_FLAGS *range_flags; + UINT *heap_range_offsets; + UINT *range_tile_counts; + UINT region_count; + UINT range_count; + D3D12_TILE_MAPPING_FLAGS flags; +}; + +struct vkd3d_cs_copy_mappings +{ + struct d3d12_resource *dst_resource; + struct d3d12_resource *src_resource; + D3D12_TILED_RESOURCE_COORDINATE dst_region_start_coordinate; + D3D12_TILED_RESOURCE_COORDINATE src_region_start_coordinate; + D3D12_TILE_REGION_SIZE region_size; + D3D12_TILE_MAPPING_FLAGS flags; +}; + struct vkd3d_cs_op_data { enum vkd3d_cs_op opcode; @@ -1482,6 +1567,8 @@ struct vkd3d_cs_op_data struct vkd3d_cs_wait wait; struct vkd3d_cs_signal signal; struct vkd3d_cs_execute execute; + struct vkd3d_cs_update_mappings update_mappings; + struct vkd3d_cs_copy_mappings copy_mappings; } u; };
@@ -1519,6 +1606,8 @@ struct d3d12_command_queue * set, aux_op_queue.count must be zero. */ struct d3d12_command_queue_op_array aux_op_queue;
+ bool supports_sparse_binding; + struct vkd3d_private_store private_store; };
@@ -1530,6 +1619,7 @@ struct d3d12_command_signature { ID3D12CommandSignature ID3D12CommandSignature_iface; LONG refcount; + unsigned int internal_refcount;
D3D12_COMMAND_SIGNATURE_DESC desc;
@@ -1600,9 +1690,17 @@ struct vkd3d_uav_clear_state HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device);
+struct desc_object_cache_head +{ + void *head; + unsigned int spinlock; +}; + struct vkd3d_desc_object_cache { - void * volatile head; + struct desc_object_cache_head heads[16]; + unsigned int next_index; + unsigned int free_count; size_t size; };
@@ -1611,7 +1709,7 @@ struct vkd3d_desc_object_cache /* ID3D12Device */ struct d3d12_device { - ID3D12Device ID3D12Device_iface; + ID3D12Device1 ID3D12Device1_iface; LONG refcount;
VkDevice vk_device; @@ -1677,27 +1775,27 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); -struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface); +struct d3d12_device *unsafe_impl_from_ID3D12Device1(ID3D12Device1 *iface);
static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) { - return ID3D12Device_QueryInterface(&device->ID3D12Device_iface, iid, object); + return ID3D12Device1_QueryInterface(&device->ID3D12Device1_iface, iid, object); }
static inline ULONG d3d12_device_add_ref(struct d3d12_device *device) { - return ID3D12Device_AddRef(&device->ID3D12Device_iface); + return ID3D12Device1_AddRef(&device->ID3D12Device1_iface); }
static inline ULONG d3d12_device_release(struct d3d12_device *device) { - return ID3D12Device_Release(&device->ID3D12Device_iface); + return ID3D12Device1_Release(&device->ID3D12Device1_iface); }
static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(struct d3d12_device *device, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_type) { - return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); + return ID3D12Device1_GetDescriptorHandleIncrementSize(&device->ID3D12Device1_iface, descriptor_type); }
/* utils */