From: Alexandre Julliard julliard@winehq.org
--- dlls/d3dcompiler_43/tests/hlsl_d3d9.c | 16 +- dlls/d3dx9_36/tests/shader.c | 9 +- libs/vkd3d/COPYING | 2 +- libs/vkd3d/include/vkd3d.h | 30 +- libs/vkd3d/include/vkd3d_shader.h | 230 +- libs/vkd3d/libs/vkd3d-common/debug.c | 13 + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 339 +- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1118 ++-- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 35 + libs/vkd3d/libs/vkd3d-shader/dxil.c | 88 +- libs/vkd3d/libs/vkd3d-shader/fx.c | 177 +- libs/vkd3d/libs/vkd3d-shader/glsl.c | 9 +- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 532 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 224 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 5 + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1239 +++-- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 4679 ++++++++++++++--- .../libs/vkd3d-shader/hlsl_constant_ops.c | 384 +- libs/vkd3d/libs/vkd3d-shader/ir.c | 2096 ++++++-- libs/vkd3d/libs/vkd3d-shader/msl.c | 464 +- libs/vkd3d/libs/vkd3d-shader/spirv.c | 779 ++- libs/vkd3d/libs/vkd3d-shader/tpf.c | 2895 ++-------- .../libs/vkd3d-shader/vkd3d_shader_main.c | 32 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 89 +- libs/vkd3d/libs/vkd3d/command.c | 330 +- libs/vkd3d/libs/vkd3d/device.c | 93 +- libs/vkd3d/libs/vkd3d/resource.c | 9 +- libs/vkd3d/libs/vkd3d/state.c | 250 +- libs/vkd3d/libs/vkd3d/utils.c | 2 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 70 +- 30 files changed, 10542 insertions(+), 5696 deletions(-)
diff --git a/dlls/d3dcompiler_43/tests/hlsl_d3d9.c b/dlls/d3dcompiler_43/tests/hlsl_d3d9.c index 95ba6d286d8..3f930e53758 100644 --- a/dlls/d3dcompiler_43/tests/hlsl_d3d9.c +++ b/dlls/d3dcompiler_43/tests/hlsl_d3d9.c @@ -1164,18 +1164,14 @@ static void test_samplers(void) { hr = IDirect3DDevice9_Clear(test_context.device, 0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(255, 0, 0), 1.0f, 0); ok(hr == D3D_OK, "Test %u: Got unexpected hr %#lx.\n", i, hr); - todo_wine_if (i > 2) ps_code = compile_shader(tests[i], "ps_2_0", 0); - if (ps_code) - { - draw_quad(test_context.device, ps_code); + draw_quad(test_context.device, ps_code);
- v = get_color_vec4(test_context.device, 0, 0); - ok(compare_vec4(&v, 1.0f, 0.0f, 1.0f, 0.0f, 0), - "Test %u: Got unexpected value {%.8e, %.8e, %.8e, %.8e}.\n", i, v.x, v.y, v.z, v.w); + v = get_color_vec4(test_context.device, 0, 0); + ok(compare_vec4(&v, 1.0f, 0.0f, 1.0f, 0.0f, 0), + "Test %u: Got unexpected value {%.8e, %.8e, %.8e, %.8e}.\n", i, v.x, v.y, v.z, v.w);
- ID3D10Blob_Release(ps_code); - } + ID3D10Blob_Release(ps_code); }
IDirect3DTexture9_Release(texture); @@ -1835,7 +1831,7 @@ static void test_hlsl_double(void) #if D3D_COMPILER_VERSION >= 46 todo_wine ok(hr == E_FAIL, "Unexpected hr %#lx.\n", hr); #else - todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); #endif if (FAILED(hr)) { diff --git a/dlls/d3dx9_36/tests/shader.c b/dlls/d3dx9_36/tests/shader.c index 4ce668b92c3..4ff54925adf 100644 --- a/dlls/d3dx9_36/tests/shader.c +++ b/dlls/d3dx9_36/tests/shader.c @@ -6869,14 +6869,7 @@ static void test_hlsl_double(void) ok(hr == D3D_OK, "Unexpected hr %#lx.\n", hr);
hr = D3DXCompileShader(ps_hlsl, sizeof(ps_hlsl), NULL, NULL, "main", "ps_2_0", 0, &ps_bytecode, &errors, NULL); - todo_wine ok(hr == D3D_OK, "Unexpected hr %#lx.\n", hr); - if (FAILED(hr)) - { - if (errors) - trace("%s", (char *)ID3DXBuffer_GetBufferPointer(errors)); - release_test_context(&context); - return; - } + ok(hr == D3D_OK, "Unexpected hr %#lx.\n", hr);
hr = IDirect3DDevice9_CreateVertexShader(context.device, ID3DXBuffer_GetBufferPointer(vs_bytecode), &vs); ok(hr == D3D_OK, "Unexpected hr %#lx.\n", hr); diff --git a/libs/vkd3d/COPYING b/libs/vkd3d/COPYING index f5b3d6f239f..383003017aa 100644 --- a/libs/vkd3d/COPYING +++ b/libs/vkd3d/COPYING @@ -1,4 +1,4 @@ -Copyright 2016-2024 the Vkd3d project authors (see the file AUTHORS for a +Copyright 2016-2025 the Vkd3d project authors (see the file AUTHORS for a complete list)
Vkd3d is free software; you can redistribute it and/or modify it under diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h index b18fd14f4c3..279c6b38be8 100644 --- a/libs/vkd3d/include/vkd3d.h +++ b/libs/vkd3d/include/vkd3d.h @@ -99,6 +99,7 @@ enum vkd3d_api_version VKD3D_API_VERSION_1_12, VKD3D_API_VERSION_1_13, VKD3D_API_VERSION_1_14, + VKD3D_API_VERSION_1_15,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_API_VERSION), }; @@ -411,9 +412,13 @@ VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue); * the Vulkan driver as being submitted before other work submitted * though the Direct3D 12 API. If this is not desired, it is * recommended to synchronize work submission using an ID3D12Fence - * object, by submitting to the queue a signal operation after all the - * Direct3D 12 work is submitted and waiting for it before calling - * vkd3d_acquire_vk_queue(). + * object: + * 1. submit work through the Direct3D 12 API; + * 2. call vkd3d_queue_signal_on_cpu(); + * 3. wait for the fence to be signalled; + * 4. call vkd3d_acquire_vk_queue(); it is guaranteed that all work submitted + * at point 1 has already been submitted to Vulkan (though not necessarily + * executed). * * \since 1.0 */ @@ -466,6 +471,21 @@ VKD3D_API HRESULT vkd3d_create_versioned_root_signature_deserializer(const void */ VKD3D_API void vkd3d_set_log_callback(PFN_vkd3d_log callback);
+/** + * Signal a fence on the CPU once all the currently outstanding queue work is + * submitted to Vulkan. + * + * The fence will be signalled on the CPU (as if ID3D12Fence_Signal() was + * called) once all the work submitted through the Direct3D 12 API before + * vkd3d_queue_signal_on_cpu() is called has left the internal queue and has + * been submitted to the underlying Vulkan queue. Read the documentation for + * vkd3d_acquire_vk_queue() for more details. + * + * \since 1.15 + */ +VKD3D_API HRESULT vkd3d_queue_signal_on_cpu(ID3D12CommandQueue *queue, + ID3D12Fence *fence, uint64_t value); + #endif /* VKD3D_NO_PROTOTYPES */
/* @@ -512,6 +532,10 @@ typedef HRESULT (*PFN_vkd3d_create_versioned_root_signature_deserializer)(const /** Type of vkd3d_set_log_callback(). \since 1.4 */ typedef void (*PFN_vkd3d_set_log_callback)(PFN_vkd3d_log callback);
+/** Type of vkd3d_queue_signal_on_cpu(). \since 1.15 */ +typedef HRESULT (*PFN_vkd3d_queue_signal_on_cpu)(ID3D12CommandQueue *queue, + ID3D12Fence *fence, uint64_t value); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index f95caa2f825..058166aa2f9 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -57,6 +57,7 @@ enum vkd3d_shader_api_version VKD3D_SHADER_API_VERSION_1_12, VKD3D_SHADER_API_VERSION_1_13, VKD3D_SHADER_API_VERSION_1_14, + VKD3D_SHADER_API_VERSION_1_15,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_API_VERSION), }; @@ -112,6 +113,11 @@ enum vkd3d_shader_structure_type * \since 1.13 */ VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO, + /** + * The structure is a vkd3d_shader_scan_hull_shader_tessellation_info structure. + * \since 1.15 + */ + VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), }; @@ -471,6 +477,113 @@ enum vkd3d_shader_binding_flag VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_BINDING_FLAG), };
+/** + * The factor used to interpolate the fragment output colour with fog. + * + * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for specification of the + * interpolation factor as defined here. + * + * The following variables may be used to determine the interpolation factor: + * + * c = The fog coordinate value output from the vertex shader. This is an + * inter-stage varying with the semantic name "FOG" and semantic index 0. + * It may be modified by VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE. + * E = The value of VKD3D_SHADER_PARAMETER_NAME_FOG_END. + * k = The value of VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE. + * + * \since 1.15 + */ +enum vkd3d_shader_fog_fragment_mode +{ + /** + * No fog interpolation is applied; + * the output colour is passed through unmodified. + * Equivalently, the fog interpolation factor is 1. + */ + VKD3D_SHADER_FOG_FRAGMENT_NONE = 0x0, + /** + * The fog interpolation factor is 2^-(k * c). + * + * In order to implement traditional exponential fog, as present in + * Direct3D and OpenGL, i.e. + * + * e^-(density * c) + * + * set + * + * k = density * log₂(e) + */ + VKD3D_SHADER_FOG_FRAGMENT_EXP = 0x1, + /** + * The fog interpolation factor is 2^-((k * c)²). + * + * In order to implement traditional square-exponential fog, as present in + * Direct3D and OpenGL, i.e. + * + * e^-((density * c)²) + * + * set + * + * k = density * √log₂(e) + */ + VKD3D_SHADER_FOG_FRAGMENT_EXP2 = 0x2, + /** + * The fog interpolation factor is (E - c) * k. + * + * In order to implement traditional linear fog, as present in Direct3D and + * OpenGL, i.e. + * + * (end - c) / (end - start) + * + * set + * + * E = end + * k = 1 / (end - start) + */ + VKD3D_SHADER_FOG_FRAGMENT_LINEAR = 0x3, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_FOG_FRAGMENT_MODE), +}; + +/** + * The source of the fog varying output by a pre-rasterization shader. + * The fog varying is defined as the output varying with the semantic name "FOG" + * and semantic index 0. + * + * See VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE for further documentation of this + * parameter. + * + * \since 1.15 + */ +enum vkd3d_shader_fog_source +{ + /** + * The source shader is not modified. That is, the fog varying in the target + * shader is the original fog varying if and only if present. + */ + VKD3D_SHADER_FOG_SOURCE_FOG = 0x0, + /** + * If the source shader has a fog varying, it is not modified. + * Otherwise, if the source shader outputs a varying with semantic name + * "COLOR" and semantic index 1 whose index includes a W component, + * said W component is output as fog varying. + * Otherwise, no fog varying is output. + */ + VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W = 0x1, + /** + * The fog source is the Z component of the position output by the vertex + * shader. + */ + VKD3D_SHADER_FOG_SOURCE_Z = 0x2, + /** + * The fog source is the W component of the position output by the vertex + * shader. + */ + VKD3D_SHADER_FOG_SOURCE_W = 0x3, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_FOG_SOURCE), +}; + /** * The manner in which a parameter value is provided to the shader, used in * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. @@ -734,6 +847,97 @@ enum vkd3d_shader_parameter_name * \since 1.14 */ VKD3D_SHADER_PARAMETER_NAME_POINT_SPRITE, + /** + * Fog mode used in fragment shaders. + * + * The value specified by this parameter must be a member of + * enum vkd3d_shader_fog_fragment_mode. + * + * If not VKD3D_SHADER_FOG_FRAGMENT_NONE, the pixel shader colour output at + * location 0 is linearly interpolated with the fog colour defined by + * VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR. The interpolation factor is + * defined according to the enumerant selected by this parameter. + * The interpolated value is then outputted instead of the original value at + * location 0. + * + * An interpolation factor of 0 specifies to use the fog colour; a factor of + * 1 specifies to use the original colour output. The interpolation factor + * is clamped to the [0, 1] range before interpolating. + * + * The default value is VKD3D_SHADER_FOG_FRAGMENT_NONE. + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. + * + * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this + * version of vkd3d-shader. + * + * \since 1.15 + */ + VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE, + /** + * Fog colour. + * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of + * fog. + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4. + * + * The default value is transparent black, i.e. the vector {0, 0, 0, 0}. + * + * \since 1.15 + */ + VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, + /** + * End coordinate for linear fog. + * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of + * fog. + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. + * + * The default value is 1.0. + * + * \since 1.15 + */ + VKD3D_SHADER_PARAMETER_NAME_FOG_END, + /** + * Scale value for fog. + * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of + * fog. + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. + * + * The default value is 1.0. + * + * \since 1.15 + */ + VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, + /** + * Fog source. The value specified by this parameter must be a member of + * enum vkd3d_shader_fog_source. + * + * This parameter replaces or suppletes the fog varying output by a + * pre-rasterization shader. The fog varying is defined as the output + * varying with the semantic name "FOG" and semantic index 0. + * + * Together with other fog parameters, this parameter can be used to + * implement fixed function fog, as present in Direct3D versions up to 9, + * if the target environment does not support fog as part of its own + * fixed-function API (as Vulkan and core OpenGL). + * + * The default value is VKD3D_SHADER_FOG_SOURCE_FOG. + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. + * + * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this + * version of vkd3d-shader. + * + * \since 1.15 + */ + VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), }; @@ -917,6 +1121,8 @@ enum vkd3d_shader_d3dbc_constant_register VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER = 0x1, /** The boolean constant register set, b# in Direct3D assembly. */ VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER = 0x2, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_D3DBC_CONSTANT_REGISTER), };
/** @@ -2040,6 +2246,26 @@ struct vkd3d_shader_scan_combined_resource_sampler_info unsigned int combined_sampler_count; };
+/** + * A chained structure describing the tessellation information in a hull shader. + * + * This structure extends vkd3d_shader_compile_info. + * + * \since 1.15 + */ +struct vkd3d_shader_scan_hull_shader_tessellation_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** The tessellation output primitive. */ + enum vkd3d_shader_tessellator_output_primitive output_primitive; + /** The tessellation partitioning mode. */ + enum vkd3d_shader_tessellator_partitioning partitioning; +}; + /** * Data type of a shader varying, returned as part of struct * vkd3d_shader_signature_element. @@ -2527,6 +2753,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported * - vkd3d_shader_preprocess_info * - vkd3d_shader_scan_combined_resource_sampler_info * - vkd3d_shader_scan_descriptor_info + * - vkd3d_shader_scan_hull_shader_tessellation_info * - vkd3d_shader_scan_signature_info * - vkd3d_shader_spirv_domain_shader_target_info * - vkd3d_shader_spirv_target_info @@ -2714,9 +2941,10 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver * \param compile_info A chained structure containing scan parameters. * \n * The scanner supports the following chained structures: + * - vkd3d_shader_scan_combined_resource_sampler_info * - vkd3d_shader_scan_descriptor_info + * - vkd3d_shader_scan_hull_shader_tessellation_info * - vkd3d_shader_scan_signature_info - * - vkd3d_shader_scan_combined_resource_sampler_info * \n * Although the \a compile_info parameter is read-only, chained structures * passed to this function need not be, and may serve as output parameters, diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c index 9a92f0ead02..32862024b90 100644 --- a/libs/vkd3d/libs/vkd3d-common/debug.c +++ b/libs/vkd3d/libs/vkd3d-common/debug.c @@ -97,6 +97,17 @@ static void vkd3d_dbg_output(const char *fmt, ...) va_end(args); }
+#if HAVE_PTHREAD_THREADID_NP +static uint64_t get_pthread_threadid(void) +{ + uint64_t thread_id; + + pthread_threadid_np(NULL, &thread_id); + + return thread_id; +} +#endif + void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const char *fmt, ...) { va_list args; @@ -108,6 +119,8 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const ch vkd3d_dbg_output("vkd3d:%04lx:%s:%s ", GetCurrentThreadId(), debug_level_names[level], function); #elif HAVE_GETTID vkd3d_dbg_output("vkd3d:%u:%s:%s ", gettid(), debug_level_names[level], function); +#elif HAVE_PTHREAD_THREADID_NP + vkd3d_dbg_output("vkd3d:%"PRIu64":%s:%s ", get_pthread_threadid(), debug_level_names[level], function); #else vkd3d_dbg_output("vkd3d:%s:%s ", debug_level_names[level], function); #endif diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 7c5444f63a3..0639da83aa6 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -49,7 +49,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_BFREV ] = "bfrev", [VKD3DSIH_BRANCH ] = "branch", [VKD3DSIH_BREAK ] = "break", - [VKD3DSIH_BREAKC ] = "breakc", + [VKD3DSIH_BREAKC ] = "break", [VKD3DSIH_BREAKP ] = "breakp", [VKD3DSIH_BUFINFO ] = "bufinfo", [VKD3DSIH_CALL ] = "call", @@ -183,7 +183,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_IDIV ] = "idiv", [VKD3DSIH_IEQ ] = "ieq", [VKD3DSIH_IF ] = "if", - [VKD3DSIH_IFC ] = "ifc", + [VKD3DSIH_IFC ] = "if", [VKD3DSIH_IGE ] = "ige", [VKD3DSIH_ILT ] = "ilt", [VKD3DSIH_IMAD ] = "imad", @@ -354,6 +354,64 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_XOR ] = "xor", };
+static const char * const shader_register_names[] = +{ + [VKD3DSPR_ADDR ] = "a", + [VKD3DSPR_ATTROUT ] = "oD", + [VKD3DSPR_COLOROUT ] = "oC", + [VKD3DSPR_COMBINED_SAMPLER ] = "s", + [VKD3DSPR_CONST ] = "c", + [VKD3DSPR_CONSTBOOL ] = "b", + [VKD3DSPR_CONSTBUFFER ] = "cb", + [VKD3DSPR_CONSTINT ] = "i", + [VKD3DSPR_COVERAGE ] = "vCoverage", + [VKD3DSPR_DEPTHOUT ] = "oDepth", + [VKD3DSPR_DEPTHOUTGE ] = "oDepthGE", + [VKD3DSPR_DEPTHOUTLE ] = "oDepthLE", + [VKD3DSPR_FORKINSTID ] = "vForkInstanceId", + [VKD3DSPR_FUNCTIONBODY ] = "fb", + [VKD3DSPR_FUNCTIONPOINTER ] = "fp", + [VKD3DSPR_GROUPSHAREDMEM ] = "g", + [VKD3DSPR_GSINSTID ] = "vGSInstanceID", + [VKD3DSPR_IDXTEMP ] = "x", + [VKD3DSPR_IMMCONST ] = "l", + [VKD3DSPR_IMMCONST64 ] = "d", + [VKD3DSPR_IMMCONSTBUFFER ] = "icb", + [VKD3DSPR_INCONTROLPOINT ] = "vicp", + [VKD3DSPR_INPUT ] = "v", + [VKD3DSPR_JOININSTID ] = "vJoinInstanceId", + [VKD3DSPR_LABEL ] = "l", + [VKD3DSPR_LOCALTHREADID ] = "vThreadIDInGroup", + [VKD3DSPR_LOCALTHREADINDEX ] = "vThreadIDInGroupFlattened", + [VKD3DSPR_LOOP ] = "aL", + [VKD3DSPR_NULL ] = "null", + [VKD3DSPR_OUTCONTROLPOINT ] = "vocp", + [VKD3DSPR_OUTPOINTID ] = "vOutputControlPointID", + [VKD3DSPR_OUTPUT ] = "o", + [VKD3DSPR_OUTSTENCILREF ] = "oStencilRef", + [VKD3DSPR_PARAMETER ] = "parameter", + [VKD3DSPR_PATCHCONST ] = "vpc", + [VKD3DSPR_POINT_COORD ] = "vPointCoord", + [VKD3DSPR_PREDICATE ] = "p", + [VKD3DSPR_PRIMID ] = "primID", + [VKD3DSPR_RASTERIZER ] = "rasterizer", + [VKD3DSPR_RESOURCE ] = "t", + [VKD3DSPR_SAMPLEMASK ] = "oMask", + [VKD3DSPR_SAMPLER ] = "s", + [VKD3DSPR_SSA ] = "sr", + [VKD3DSPR_STREAM ] = "m", + [VKD3DSPR_TEMP ] = "r", + [VKD3DSPR_TESSCOORD ] = "vDomainLocation", + [VKD3DSPR_TEXCRDOUT ] = "oT", + [VKD3DSPR_TEXTURE ] = "t", + [VKD3DSPR_THREADGROUPID ] = "vThreadGroupID", + [VKD3DSPR_THREADID ] = "vThreadID", + [VKD3DSPR_UAV ] = "u", + [VKD3DSPR_UNDEF ] = "undef", + [VKD3DSPR_WAVELANECOUNT ] = "vWaveLaneCount", + [VKD3DSPR_WAVELANEINDEX ] = "vWaveLaneIndex", +}; + struct vkd3d_d3d_asm_colours { const char *reset; @@ -377,22 +435,6 @@ struct vkd3d_d3d_asm_compiler const struct vkd3d_shader_instruction *current; };
-/* Convert floating point offset relative to a register file to an absolute - * offset for float constants. */ -static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type register_type, UINT register_idx) -{ - switch (register_type) - { - case VKD3DSPR_CONST: return register_idx; - case VKD3DSPR_CONST2: return 2048 + register_idx; - case VKD3DSPR_CONST3: return 4096 + register_idx; - case VKD3DSPR_CONST4: return 6144 + register_idx; - default: - FIXME("Unsupported register type: %u.\n", register_type); - return register_idx; - } -} - static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, enum vsir_global_flags global_flags) { unsigned int i; @@ -815,7 +857,7 @@ static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler, usage = "tessfactor"; break; case VKD3D_DECL_USAGE_POSITIONT: - usage = "positionT"; + usage = "positiont"; indexed = true; break; case VKD3D_DECL_USAGE_FOG: @@ -966,82 +1008,10 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg); switch (reg->type) { - case VKD3DSPR_TEMP: - vkd3d_string_buffer_printf(buffer, "r"); - break; - - case VKD3DSPR_INPUT: - vkd3d_string_buffer_printf(buffer, "v"); - break; - - case VKD3DSPR_CONST: - case VKD3DSPR_CONST2: - case VKD3DSPR_CONST3: - case VKD3DSPR_CONST4: - vkd3d_string_buffer_printf(buffer, "c"); - offset = shader_get_float_offset(reg->type, offset); - break; - - case VKD3DSPR_TEXTURE: /* vs: case VKD3DSPR_ADDR */ - vkd3d_string_buffer_printf(buffer, "%c", - compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL ? 't' : 'a'); - break; - case VKD3DSPR_RASTOUT: vkd3d_string_buffer_printf(buffer, "%s", rastout_reg_names[offset]); break;
- case VKD3DSPR_COLOROUT: - vkd3d_string_buffer_printf(buffer, "oC"); - break; - - case VKD3DSPR_DEPTHOUT: - vkd3d_string_buffer_printf(buffer, "oDepth"); - break; - - case VKD3DSPR_DEPTHOUTGE: - vkd3d_string_buffer_printf(buffer, "oDepthGE"); - break; - - case VKD3DSPR_DEPTHOUTLE: - vkd3d_string_buffer_printf(buffer, "oDepthLE"); - break; - - case VKD3DSPR_ATTROUT: - vkd3d_string_buffer_printf(buffer, "oD"); - break; - - case VKD3DSPR_TEXCRDOUT: - /* Vertex shaders >= 3.0 use general purpose output registers - * (VKD3DSPR_OUTPUT), which can include an address token. */ - if (vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0)) - vkd3d_string_buffer_printf(buffer, "o"); - else - vkd3d_string_buffer_printf(buffer, "oT"); - break; - - case VKD3DSPR_CONSTINT: - vkd3d_string_buffer_printf(buffer, "i"); - break; - - case VKD3DSPR_CONSTBOOL: - vkd3d_string_buffer_printf(buffer, "b"); - break; - - case VKD3DSPR_LABEL: - vkd3d_string_buffer_printf(buffer, "l"); - break; - - case VKD3DSPR_LOOP: - vkd3d_string_buffer_printf(buffer, "aL"); - break; - - case VKD3DSPR_COMBINED_SAMPLER: - case VKD3DSPR_SAMPLER: - vkd3d_string_buffer_printf(buffer, "s"); - is_descriptor = true; - break; - case VKD3DSPR_MISCTYPE: if (offset > 1) vkd3d_string_buffer_printf(buffer, "%s<unhandled misctype %#x>%s", @@ -1050,156 +1020,20 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const vkd3d_string_buffer_printf(buffer, "%s", misctype_reg_names[offset]); break;
- case VKD3DSPR_PREDICATE: - vkd3d_string_buffer_printf(buffer, "p"); - break; - - case VKD3DSPR_IMMCONST: - vkd3d_string_buffer_printf(buffer, "l"); - break; - - case VKD3DSPR_IMMCONST64: - vkd3d_string_buffer_printf(buffer, "d"); - break; - + case VKD3DSPR_COMBINED_SAMPLER: + case VKD3DSPR_SAMPLER: case VKD3DSPR_CONSTBUFFER: - vkd3d_string_buffer_printf(buffer, "cb"); - is_descriptor = true; - break; - - case VKD3DSPR_IMMCONSTBUFFER: - vkd3d_string_buffer_printf(buffer, "icb"); - break; - - case VKD3DSPR_PRIMID: - vkd3d_string_buffer_printf(buffer, "primID"); - break; - - case VKD3DSPR_NULL: - vkd3d_string_buffer_printf(buffer, "null"); - break; - - case VKD3DSPR_RASTERIZER: - vkd3d_string_buffer_printf(buffer, "rasterizer"); - break; - case VKD3DSPR_RESOURCE: - vkd3d_string_buffer_printf(buffer, "t"); - is_descriptor = true; - break; - case VKD3DSPR_UAV: - vkd3d_string_buffer_printf(buffer, "u"); is_descriptor = true; - break; - - case VKD3DSPR_OUTPOINTID: - vkd3d_string_buffer_printf(buffer, "vOutputControlPointID"); - break; - - case VKD3DSPR_FORKINSTID: - vkd3d_string_buffer_printf(buffer, "vForkInstanceId"); - break; - - case VKD3DSPR_JOININSTID: - vkd3d_string_buffer_printf(buffer, "vJoinInstanceId"); - break; - - case VKD3DSPR_INCONTROLPOINT: - vkd3d_string_buffer_printf(buffer, "vicp"); - break; - - case VKD3DSPR_OUTCONTROLPOINT: - vkd3d_string_buffer_printf(buffer, "vocp"); - break; - - case VKD3DSPR_PATCHCONST: - vkd3d_string_buffer_printf(buffer, "vpc"); - break; - - case VKD3DSPR_TESSCOORD: - vkd3d_string_buffer_printf(buffer, "vDomainLocation"); - break; - - case VKD3DSPR_GROUPSHAREDMEM: - vkd3d_string_buffer_printf(buffer, "g"); - break; - - case VKD3DSPR_THREADID: - vkd3d_string_buffer_printf(buffer, "vThreadID"); - break; - - case VKD3DSPR_THREADGROUPID: - vkd3d_string_buffer_printf(buffer, "vThreadGroupID"); - break; - - case VKD3DSPR_LOCALTHREADID: - vkd3d_string_buffer_printf(buffer, "vThreadIDInGroup"); - break; - - case VKD3DSPR_LOCALTHREADINDEX: - vkd3d_string_buffer_printf(buffer, "vThreadIDInGroupFlattened"); - break; - - case VKD3DSPR_IDXTEMP: - vkd3d_string_buffer_printf(buffer, "x"); - break; - - case VKD3DSPR_STREAM: - vkd3d_string_buffer_printf(buffer, "m"); - break; - - case VKD3DSPR_FUNCTIONBODY: - vkd3d_string_buffer_printf(buffer, "fb"); - break; - - case VKD3DSPR_FUNCTIONPOINTER: - vkd3d_string_buffer_printf(buffer, "fp"); - break; - - case VKD3DSPR_COVERAGE: - vkd3d_string_buffer_printf(buffer, "vCoverage"); - break; - - case VKD3DSPR_SAMPLEMASK: - vkd3d_string_buffer_printf(buffer, "oMask"); - break; - - case VKD3DSPR_GSINSTID: - vkd3d_string_buffer_printf(buffer, "vGSInstanceID"); - break; - - case VKD3DSPR_OUTSTENCILREF: - vkd3d_string_buffer_printf(buffer, "oStencilRef"); - break; - - case VKD3DSPR_UNDEF: - vkd3d_string_buffer_printf(buffer, "undef"); - break; - - case VKD3DSPR_SSA: - vkd3d_string_buffer_printf(buffer, "sr"); - break; - - case VKD3DSPR_WAVELANECOUNT: - vkd3d_string_buffer_printf(buffer, "vWaveLaneCount"); - break; - - case VKD3DSPR_WAVELANEINDEX: - vkd3d_string_buffer_printf(buffer, "vWaveLaneIndex"); - break; - - case VKD3DSPR_PARAMETER: - vkd3d_string_buffer_printf(buffer, "parameter"); - break; - - case VKD3DSPR_POINT_COORD: - vkd3d_string_buffer_printf(buffer, "vPointCoord"); - break; + /* fall through */
default: - vkd3d_string_buffer_printf(buffer, "%s<unhandled register type %#x>%s", - compiler->colours.error, reg->type, compiler->colours.reset); + if (reg->type < ARRAY_SIZE(shader_register_names) && shader_register_names[reg->type]) + vkd3d_string_buffer_printf(buffer, "%s", shader_register_names[reg->type]); + else + vkd3d_string_buffer_printf(buffer, "%s<unhandled register type %#x>%s", + compiler->colours.error, reg->type, compiler->colours.reset); break; }
@@ -1346,8 +1180,8 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const bool is_sm_5_1 = vkd3d_shader_ver_ge(&compiler->shader_version, 5, 1);
if (reg->idx[0].rel_addr || reg->type == VKD3DSPR_IMMCONSTBUFFER - || reg->type == VKD3DSPR_INCONTROLPOINT || (reg->type == VKD3DSPR_INPUT - && (compiler->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY + || reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT + || (reg->type == VKD3DSPR_INPUT && (compiler->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY || compiler->shader_version.type == VKD3D_SHADER_TYPE_HULL))) { vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); @@ -2132,8 +1966,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler,
case VKD3DSIH_DEF: vkd3d_string_buffer_printf(buffer, " %sc%u%s", compiler->colours.reg, - shader_get_float_offset(ins->dst[0].reg.type, ins->dst[0].reg.idx[0].offset), - compiler->colours.reset); + ins->dst[0].reg.idx[0].offset, compiler->colours.reset); shader_print_float_literal(compiler, " = ", ins->src[0].reg.u.immconst_f32[0], ""); shader_print_float_literal(compiler, ", ", ins->src[0].reg.u.immconst_f32[1], ""); shader_print_float_literal(compiler, ", ", ins->src[0].reg.u.immconst_f32[2], ""); @@ -2547,6 +2380,33 @@ static void trace_signature(const struct shader_signature *signature, const char vkd3d_string_buffer_cleanup(&buffer); }
+static void trace_io_declarations(const struct vsir_program *program) +{ + struct vkd3d_string_buffer buffer; + bool empty = true; + unsigned int i; + + vkd3d_string_buffer_init(&buffer); + + vkd3d_string_buffer_printf(&buffer, "Input/output declarations:"); + + for (i = 0; i < sizeof(program->io_dcls) * CHAR_BIT; ++i) + { + if (bitmap_is_set(program->io_dcls, i)) + { + empty = false; + vkd3d_string_buffer_printf(&buffer, " %u", i); + } + } + + if (empty) + vkd3d_string_buffer_printf(&buffer, " empty"); + + TRACE("%s\n", buffer.buffer); + + vkd3d_string_buffer_cleanup(&buffer); +} + void vsir_program_trace(const struct vsir_program *program) { const unsigned int flags = VSIR_ASM_FLAG_DUMP_TYPES | VSIR_ASM_FLAG_DUMP_ALL_INDICES; @@ -2556,6 +2416,7 @@ void vsir_program_trace(const struct vsir_program *program) trace_signature(&program->input_signature, "Input"); trace_signature(&program->output_signature, "Output"); trace_signature(&program->patch_constant_signature, "Patch-constant"); + trace_io_declarations(program);
if (d3d_asm_compile(program, NULL, &code, flags) != VKD3D_OK) return; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index 9e2eacbcfa6..58e35cf22e8 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -8,7 +8,7 @@ * Copyright 2006 Ivan Gyurdiev * Copyright 2007-2008 Stefan Dösinger for CodeWeavers * Copyright 2009, 2021 Henri Verbeet for CodeWeavers - * Copyright 2019-2020 Zebediah Figura for CodeWeavers + * Copyright 2019-2020, 2023-2024 Elizabeth Figura for CodeWeavers * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -25,7 +25,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */
-#include "hlsl.h" +#include "vkd3d_shader_private.h"
#define VKD3D_SM1_VS 0xfffeu #define VKD3D_SM1_PS 0xffffu @@ -89,6 +89,32 @@ #define VKD3D_SM1_VERSION_MAJOR(version) (((version) >> 8u) & 0xffu) #define VKD3D_SM1_VERSION_MINOR(version) (((version) >> 0u) & 0xffu)
+enum vkd3d_sm1_register_type +{ + VKD3D_SM1_REG_TEMP = 0x00, + VKD3D_SM1_REG_INPUT = 0x01, + VKD3D_SM1_REG_CONST = 0x02, + VKD3D_SM1_REG_ADDR = 0x03, + VKD3D_SM1_REG_TEXTURE = 0x03, + VKD3D_SM1_REG_RASTOUT = 0x04, + VKD3D_SM1_REG_ATTROUT = 0x05, + VKD3D_SM1_REG_TEXCRDOUT = 0x06, + VKD3D_SM1_REG_OUTPUT = 0x06, + VKD3D_SM1_REG_CONSTINT = 0x07, + VKD3D_SM1_REG_COLOROUT = 0x08, + VKD3D_SM1_REG_DEPTHOUT = 0x09, + VKD3D_SM1_REG_SAMPLER = 0x0a, + VKD3D_SM1_REG_CONST2 = 0x0b, + VKD3D_SM1_REG_CONST3 = 0x0c, + VKD3D_SM1_REG_CONST4 = 0x0d, + VKD3D_SM1_REG_CONSTBOOL = 0x0e, + VKD3D_SM1_REG_LOOP = 0x0f, + VKD3D_SM1_REG_TEMPFLOAT16 = 0x10, + VKD3D_SM1_REG_MISCTYPE = 0x11, + VKD3D_SM1_REG_LABEL = 0x12, + VKD3D_SM1_REG_PREDICATE = 0x13, +}; + enum vkd3d_sm1_address_mode_type { VKD3D_SM1_ADDRESS_MODE_ABSOLUTE = 0x0, @@ -235,7 +261,7 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = /* Arithmetic */ {VKD3D_SM1_OP_NOP, 0, 0, VKD3DSIH_NOP}, {VKD3D_SM1_OP_MOV, 1, 1, VKD3DSIH_MOV}, - {VKD3D_SM1_OP_MOVA, 1, 1, VKD3DSIH_MOVA, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_MOVA, 1, 1, VKD3DSIH_MOVA, {2, 0}}, {VKD3D_SM1_OP_ADD, 1, 2, VKD3DSIH_ADD}, {VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB}, {VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD}, @@ -248,22 +274,22 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX}, {VKD3D_SM1_OP_SLT, 1, 2, VKD3DSIH_SLT}, {VKD3D_SM1_OP_SGE, 1, 2, VKD3DSIH_SGE}, - {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS}, + {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS, {2, 0}}, {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP}, {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG}, {VKD3D_SM1_OP_EXPP, 1, 1, VKD3DSIH_EXPP}, {VKD3D_SM1_OP_LOGP, 1, 1, VKD3DSIH_LOGP}, {VKD3D_SM1_OP_LIT, 1, 1, VKD3DSIH_LIT}, {VKD3D_SM1_OP_DST, 1, 2, VKD3DSIH_DST}, - {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP}, + {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP, {2, 0}}, {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC}, - {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW}, - {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS}, - {VKD3D_SM1_OP_SGN, 1, 3, VKD3DSIH_SGN, {2, 0}, { 2, 1}}, - {VKD3D_SM1_OP_SGN, 1, 1, VKD3DSIH_SGN, {3, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM,}, - {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, { 2, 1}}, - {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW, {2, 0}}, + {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS, {2, 0}}, + {VKD3D_SM1_OP_SGN, 1, 3, VKD3DSIH_SGN, {2, 0}, {2, 1}}, + {VKD3D_SM1_OP_SGN, 1, 1, VKD3DSIH_SGN, {3, 0}}, + {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM, {2, 0}}, + {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, {2, 1}}, + {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}}, /* Matrix */ {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4}, {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3}, @@ -274,27 +300,27 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL}, /* Constant definitions */ {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, - {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, - {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, + {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB, {2, 0}}, + {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI, {2, 0}}, /* Control flow */ - {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP}, - {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 0}, {~0u, ~0u}}, - - {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP}, - {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}}, + {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}}, + {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 0}}, + {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}}, + {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 0}}, + {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 0}}, + {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}}, + {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}}, + {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP, {2, 1}}, + {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 0}}, + {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 0}}, + {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {2, 0}}, + {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 0}}, + {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {2, 0}}, + {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 0}}, + + {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP, {2, 1}}, + {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}}, {0, 0, 0, VKD3DSIH_INVALID}, };
@@ -307,92 +333,115 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = {VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB}, {VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD}, {VKD3D_SM1_OP_MUL, 1, 2, VKD3DSIH_MUL}, - {VKD3D_SM1_OP_RCP, 1, 1, VKD3DSIH_RCP}, - {VKD3D_SM1_OP_RSQ, 1, 1, VKD3DSIH_RSQ}, + {VKD3D_SM1_OP_RCP, 1, 1, VKD3DSIH_RCP, {2, 0}}, + {VKD3D_SM1_OP_RSQ, 1, 1, VKD3DSIH_RSQ, {2, 0}}, {VKD3D_SM1_OP_DP3, 1, 2, VKD3DSIH_DP3}, - {VKD3D_SM1_OP_DP4, 1, 2, VKD3DSIH_DP4}, - {VKD3D_SM1_OP_MIN, 1, 2, VKD3DSIH_MIN}, - {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX}, - {VKD3D_SM1_OP_SLT, 1, 2, VKD3DSIH_SLT}, - {VKD3D_SM1_OP_SGE, 1, 2, VKD3DSIH_SGE}, - {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS}, - {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP}, - {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG}, - {VKD3D_SM1_OP_EXPP, 1, 1, VKD3DSIH_EXPP}, - {VKD3D_SM1_OP_LOGP, 1, 1, VKD3DSIH_LOGP}, - {VKD3D_SM1_OP_DST, 1, 2, VKD3DSIH_DST}, + {VKD3D_SM1_OP_DP4, 1, 2, VKD3DSIH_DP4, {1, 2}}, + {VKD3D_SM1_OP_MIN, 1, 2, VKD3DSIH_MIN, {2, 0}}, + {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX, {2, 0}}, + {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS, {2, 0}}, + {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP, {2, 0}}, + {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG, {2, 0}}, {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP}, - {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC}, - {VKD3D_SM1_OP_CND, 1, 3, VKD3DSIH_CND, {1, 0}, { 1, 4}}, - {VKD3D_SM1_OP_CMP, 1, 3, VKD3DSIH_CMP, {1, 2}, { 3, 0}}, - {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW}, - {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS}, - {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM}, - {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, { 2, 1}}, - {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_DP2ADD, 1, 3, VKD3DSIH_DP2ADD, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC, {2, 0}}, + {VKD3D_SM1_OP_CND, 1, 3, VKD3DSIH_CND, {1, 0}, {1, 4}}, + {VKD3D_SM1_OP_CMP, 1, 3, VKD3DSIH_CMP, {1, 2}}, + {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW, {2, 0}}, + {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS, {2, 0}}, + {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM, {2, 0}}, + {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, {2, 1}}, + {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}}, + {VKD3D_SM1_OP_DP2ADD, 1, 3, VKD3DSIH_DP2ADD, {2, 0}}, /* Matrix */ - {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4}, - {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3}, - {VKD3D_SM1_OP_M3x4, 1, 2, VKD3DSIH_M3x4}, - {VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3}, - {VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2}, + {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4, {2, 0}}, + {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3, {2, 0}}, + {VKD3D_SM1_OP_M3x4, 1, 2, VKD3DSIH_M3x4, {2, 0}}, + {VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3, {2, 0}}, + {VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2, {2, 0}}, /* Declarations */ - {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL}, + {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL, {2, 0}}, /* Constant definitions */ {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, - {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, - {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, + {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB, {2, 0}}, + {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI, {2, 1}}, /* Control flow */ - {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP}, - {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {3, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {3, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}}, + {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}}, + {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 1}}, + {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}}, + {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 1}}, + {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 1}}, + {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}}, + {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}}, + {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP, {2, 1}}, + {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 1}}, + {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 1}}, + {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {3, 0}}, + {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 1}}, + {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {3, 0}}, + {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 1}}, /* Texture */ - {VKD3D_SM1_OP_TEXCOORD, 1, 0, VKD3DSIH_TEXCOORD, {0, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXCOORD, 1, 1, VKD3DSIH_TEXCOORD, {1 ,4}, { 1, 4}}, - {VKD3D_SM1_OP_TEXKILL, 1, 0, VKD3DSIH_TEXKILL, {1 ,0}, { 3, 0}}, - {VKD3D_SM1_OP_TEX, 1, 0, VKD3DSIH_TEX, {0, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEX, 1, 1, VKD3DSIH_TEX, {1, 4}, { 1, 4}}, - {VKD3D_SM1_OP_TEX, 1, 2, VKD3DSIH_TEX, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_TEXBEM, 1, 1, VKD3DSIH_TEXBEM, {0, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXBEML, 1, 1, VKD3DSIH_TEXBEML, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXREG2AR, 1, 1, VKD3DSIH_TEXREG2AR, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXREG2GB, 1, 1, VKD3DSIH_TEXREG2GB, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXREG2RGB, 1, 1, VKD3DSIH_TEXREG2RGB, {1, 2}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x2PAD, 1, 1, VKD3DSIH_TEXM3x2PAD, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x2TEX, 1, 1, VKD3DSIH_TEXM3x2TEX, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x3PAD, 1, 1, VKD3DSIH_TEXM3x3PAD, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x3DIFF, 1, 1, VKD3DSIH_TEXM3x3DIFF, {0, 0}, { 0, 0}}, - {VKD3D_SM1_OP_TEXM3x3SPEC, 1, 2, VKD3DSIH_TEXM3x3SPEC, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x3VSPEC, 1, 1, VKD3DSIH_TEXM3x3VSPEC, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x3TEX, 1, 1, VKD3DSIH_TEXM3x3TEX, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXDP3TEX, 1, 1, VKD3DSIH_TEXDP3TEX, {1, 2}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x2DEPTH, 1, 1, VKD3DSIH_TEXM3x2DEPTH, {1, 3}, { 1, 3}}, - {VKD3D_SM1_OP_TEXDP3, 1, 1, VKD3DSIH_TEXDP3, {1, 2}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x3, 1, 1, VKD3DSIH_TEXM3x3, {1, 2}, { 1, 3}}, - {VKD3D_SM1_OP_TEXDEPTH, 1, 0, VKD3DSIH_TEXDEPTH, {1, 4}, { 1, 4}}, - {VKD3D_SM1_OP_BEM, 1, 2, VKD3DSIH_BEM, {1, 4}, { 1, 4}}, - {VKD3D_SM1_OP_DSX, 1, 1, VKD3DSIH_DSX, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_DSY, 1, 1, VKD3DSIH_DSY, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_TEXLDD, 1, 4, VKD3DSIH_TEXLDD, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP}, - {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_PHASE, 0, 0, VKD3DSIH_PHASE}, + {VKD3D_SM1_OP_TEXCOORD, 1, 0, VKD3DSIH_TEXCOORD, {0, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXCOORD, 1, 1, VKD3DSIH_TEXCOORD, {1, 4}, {1, 4}}, + {VKD3D_SM1_OP_TEXKILL, 1, 0, VKD3DSIH_TEXKILL, {1, 0}}, + {VKD3D_SM1_OP_TEX, 1, 0, VKD3DSIH_TEX, {0, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEX, 1, 1, VKD3DSIH_TEX, {1, 4}, {1, 4}}, + {VKD3D_SM1_OP_TEX, 1, 2, VKD3DSIH_TEX, {2, 0}}, + {VKD3D_SM1_OP_TEXBEM, 1, 1, VKD3DSIH_TEXBEM, {0, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXBEML, 1, 1, VKD3DSIH_TEXBEML, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXREG2AR, 1, 1, VKD3DSIH_TEXREG2AR, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXREG2GB, 1, 1, VKD3DSIH_TEXREG2GB, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXREG2RGB, 1, 1, VKD3DSIH_TEXREG2RGB, {1, 2}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x2PAD, 1, 1, VKD3DSIH_TEXM3x2PAD, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x2TEX, 1, 1, VKD3DSIH_TEXM3x2TEX, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x3PAD, 1, 1, VKD3DSIH_TEXM3x3PAD, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x3DIFF, 1, 1, VKD3DSIH_TEXM3x3DIFF, {0, 0}, {0, 0}}, + {VKD3D_SM1_OP_TEXM3x3SPEC, 1, 2, VKD3DSIH_TEXM3x3SPEC, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x3VSPEC, 1, 1, VKD3DSIH_TEXM3x3VSPEC, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x3TEX, 1, 1, VKD3DSIH_TEXM3x3TEX, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXDP3TEX, 1, 1, VKD3DSIH_TEXDP3TEX, {1, 2}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x2DEPTH, 1, 1, VKD3DSIH_TEXM3x2DEPTH, {1, 3}, {1, 3}}, + {VKD3D_SM1_OP_TEXDP3, 1, 1, VKD3DSIH_TEXDP3, {1, 2}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x3, 1, 1, VKD3DSIH_TEXM3x3, {1, 2}, {1, 3}}, + {VKD3D_SM1_OP_TEXDEPTH, 1, 0, VKD3DSIH_TEXDEPTH, {1, 4}, {1, 4}}, + {VKD3D_SM1_OP_BEM, 1, 2, VKD3DSIH_BEM, {1, 4}, {1, 4}}, + {VKD3D_SM1_OP_DSX, 1, 1, VKD3DSIH_DSX, {2, 1}}, + {VKD3D_SM1_OP_DSY, 1, 1, VKD3DSIH_DSY, {2, 1}}, + {VKD3D_SM1_OP_TEXLDD, 1, 4, VKD3DSIH_TEXLDD, {2, 1}}, + {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP, {2, 1}}, + {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}}, + {VKD3D_SM1_OP_PHASE, 0, 0, VKD3DSIH_PHASE, {1, 4}, {1, 4}}, {0, 0, 0, VKD3DSIH_INVALID}, };
+static const struct +{ + enum vkd3d_sm1_register_type d3dbc_type; + enum vkd3d_shader_register_type vsir_type; +} +register_types[] = +{ + {VKD3D_SM1_REG_TEMP, VKD3DSPR_TEMP}, + {VKD3D_SM1_REG_INPUT, VKD3DSPR_INPUT}, + {VKD3D_SM1_REG_CONST, VKD3DSPR_CONST}, + {VKD3D_SM1_REG_ADDR, VKD3DSPR_ADDR}, + {VKD3D_SM1_REG_TEXTURE, VKD3DSPR_TEXTURE}, + {VKD3D_SM1_REG_RASTOUT, VKD3DSPR_RASTOUT}, + {VKD3D_SM1_REG_ATTROUT, VKD3DSPR_ATTROUT}, + {VKD3D_SM1_REG_OUTPUT, VKD3DSPR_OUTPUT}, + {VKD3D_SM1_REG_TEXCRDOUT, VKD3DSPR_TEXCRDOUT}, + {VKD3D_SM1_REG_CONSTINT, VKD3DSPR_CONSTINT}, + {VKD3D_SM1_REG_COLOROUT, VKD3DSPR_COLOROUT}, + {VKD3D_SM1_REG_DEPTHOUT, VKD3DSPR_DEPTHOUT}, + {VKD3D_SM1_REG_SAMPLER, VKD3DSPR_COMBINED_SAMPLER}, + {VKD3D_SM1_REG_CONSTBOOL, VKD3DSPR_CONSTBOOL}, + {VKD3D_SM1_REG_LOOP, VKD3DSPR_LOOP}, + {VKD3D_SM1_REG_TEMPFLOAT16, VKD3DSPR_TEMPFLOAT16}, + {VKD3D_SM1_REG_MISCTYPE, VKD3DSPR_MISCTYPE}, + {VKD3D_SM1_REG_LABEL, VKD3DSPR_LABEL}, + {VKD3D_SM1_REG_PREDICATE, VKD3DSPR_PREDICATE}, +}; + static const enum vkd3d_shader_resource_type resource_type_table[] = { /* VKD3D_SM1_RESOURCE_UNKNOWN */ VKD3D_SHADER_RESOURCE_NONE, @@ -458,6 +507,7 @@ static unsigned int idx_count_from_reg_type(enum vkd3d_shader_register_type reg_ switch (reg_type) { case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_ADDR: return 0;
default: @@ -465,52 +515,82 @@ static unsigned int idx_count_from_reg_type(enum vkd3d_shader_register_type reg_ } }
-static void shader_sm1_parse_src_param(uint32_t param, struct vkd3d_shader_src_param *rel_addr, - struct vkd3d_shader_src_param *src) +static enum vkd3d_shader_register_type parse_register_type( + struct vkd3d_shader_sm1_parser *sm1, uint32_t param, unsigned int *index_offset) { - enum vkd3d_shader_register_type reg_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) + enum vkd3d_sm1_register_type d3dbc_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2); - unsigned int idx_count = idx_count_from_reg_type(reg_type);
- vsir_register_init(&src->reg, reg_type, VKD3D_DATA_FLOAT, idx_count); - src->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; - src->reg.non_uniform = false; - if (idx_count == 1) + *index_offset = 0; + + if (d3dbc_type == VKD3D_SM1_REG_CONST2) { - src->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; - src->reg.idx[0].rel_addr = rel_addr; + *index_offset = 2048; + return VKD3DSPR_CONST; } - if (src->reg.type == VKD3DSPR_SAMPLER) - src->reg.dimension = VSIR_DIMENSION_NONE; - else if (src->reg.type == VKD3DSPR_DEPTHOUT) - src->reg.dimension = VSIR_DIMENSION_SCALAR; - else - src->reg.dimension = VSIR_DIMENSION_VEC4; - src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT); - src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT; + + if (d3dbc_type == VKD3D_SM1_REG_CONST3) + { + *index_offset = 4096; + return VKD3DSPR_CONST; + } + + if (d3dbc_type == VKD3D_SM1_REG_CONST4) + { + *index_offset = 6144; + return VKD3DSPR_CONST; + } + + if (d3dbc_type == VKD3D_SM1_REG_ADDR) + return sm1->p.program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL ? VKD3DSPR_TEXTURE : VKD3DSPR_ADDR; + if (d3dbc_type == VKD3D_SM1_REG_TEXCRDOUT) + return vkd3d_shader_ver_ge(&sm1->p.program->shader_version, 3, 0) ? VKD3DSPR_OUTPUT : VKD3DSPR_TEXCRDOUT; + + for (unsigned int i = 0; i < ARRAY_SIZE(register_types); ++i) + { + if (register_types[i].d3dbc_type == d3dbc_type) + return register_types[i].vsir_type; + } + + return VKD3DSPR_INVALID; }
-static void shader_sm1_parse_dst_param(uint32_t param, struct vkd3d_shader_src_param *rel_addr, - struct vkd3d_shader_dst_param *dst) +static void d3dbc_parse_register(struct vkd3d_shader_sm1_parser *d3dbc, + struct vkd3d_shader_register *reg, uint32_t param, struct vkd3d_shader_src_param *rel_addr) { - enum vkd3d_shader_register_type reg_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) - | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2); - unsigned int idx_count = idx_count_from_reg_type(reg_type); + enum vkd3d_shader_register_type reg_type; + unsigned int index_offset, idx_count;
- vsir_register_init(&dst->reg, reg_type, VKD3D_DATA_FLOAT, idx_count); - dst->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; - dst->reg.non_uniform = false; + reg_type = parse_register_type(d3dbc, param, &index_offset); + idx_count = idx_count_from_reg_type(reg_type); + vsir_register_init(reg, reg_type, VKD3D_DATA_FLOAT, idx_count); + reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + reg->non_uniform = false; if (idx_count == 1) { - dst->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; - dst->reg.idx[0].rel_addr = rel_addr; + reg->idx[0].offset = index_offset + (param & VKD3D_SM1_REGISTER_NUMBER_MASK); + reg->idx[0].rel_addr = rel_addr; } - if (dst->reg.type == VKD3DSPR_SAMPLER) - dst->reg.dimension = VSIR_DIMENSION_NONE; - else if (dst->reg.type == VKD3DSPR_DEPTHOUT) - dst->reg.dimension = VSIR_DIMENSION_SCALAR; + if (reg->type == VKD3DSPR_SAMPLER) + reg->dimension = VSIR_DIMENSION_NONE; + else if (reg->type == VKD3DSPR_DEPTHOUT) + reg->dimension = VSIR_DIMENSION_SCALAR; else - dst->reg.dimension = VSIR_DIMENSION_VEC4; + reg->dimension = VSIR_DIMENSION_VEC4; +} + +static void shader_sm1_parse_src_param(struct vkd3d_shader_sm1_parser *sm1, uint32_t param, + struct vkd3d_shader_src_param *rel_addr, struct vkd3d_shader_src_param *src) +{ + d3dbc_parse_register(sm1, &src->reg, param, rel_addr); + src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT); + src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT; +} + +static void shader_sm1_parse_dst_param(struct vkd3d_shader_sm1_parser *sm1, uint32_t param, + struct vkd3d_shader_src_param *rel_addr, struct vkd3d_shader_dst_param *dst) +{ + d3dbc_parse_register(sm1, &dst->reg, param, rel_addr); dst->modifiers = (param & VKD3D_SM1_DST_MODIFIER_MASK) >> VKD3D_SM1_DST_MODIFIER_SHIFT; dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT;
@@ -638,7 +718,32 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, return; }
+ /* Normally VSIR mandates that the register mask is a subset of the usage + * mask, and the usage mask is a subset of the signature mask. This is + * doesn't always happen with SM1-3 registers, because of the limited + * flexibility with expressing swizzles. + * + * For example it's easy to find shaders like this: + * ps_3_0 + * [...] + * dcl_texcoord0 v0 + * [...] + * texld r2.xyzw, v0.xyzw, s1.xyzw + * [...] + * + * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to + * compute the signature mask, but the texld instruction apparently uses all + * the components. Of course the last two components are ignored, but + * formally they seem to be used. So we end up with a signature element with + * mask .xy and usage mask .xyzw. + * + * In order to avoid this problem, when generating VSIR code with SM4 + * normalisation level we remove the unused components in the write mask. We + * don't do that when targetting the SM1 normalisation level (i.e., when + * disassembling) so as to generate the same disassembly code as native. */ element->used_mask |= mask; + if (program->normalisation_level >= VSIR_NORMALISED_SM4) + element->used_mask &= element->mask; }
static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, @@ -666,26 +771,18 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser * VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask);
case VKD3DSPR_TEXTURE: - /* For vertex shaders, this is ADDR. */ - if (version->type == VKD3D_SHADER_TYPE_VERTEX) - return true; return add_signature_element(sm1, false, "TEXCOORD", register_index, VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask);
+ case VKD3DSPR_TEXCRDOUT: + return add_signature_element(sm1, true, "TEXCOORD", register_index, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); + case VKD3DSPR_OUTPUT: if (version->type == VKD3D_SHADER_TYPE_VERTEX) { - /* For sm < 2 vertex shaders, this is TEXCRDOUT. - * - * For sm3 vertex shaders, this is OUTPUT, but we already - * should have had a DCL instruction. */ - if (version->major == 3) - { - add_signature_mask(sm1, true, register_index, mask); - return true; - } - return add_signature_element(sm1, true, "TEXCOORD", register_index, - VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); + add_signature_mask(sm1, true, register_index, mask); + return true; } /* fall through */
@@ -822,18 +919,6 @@ static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, register_index, from_def); break;
- case VKD3DSPR_CONST2: - record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048 + register_index, from_def); - break; - - case VKD3DSPR_CONST3: - record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096 + register_index, from_def); - break; - - case VKD3DSPR_CONST4: - record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144 + register_index, from_def); - break; - case VKD3DSPR_CONSTINT: record_constant_register(sm1, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, register_index, from_def); break; @@ -941,9 +1026,9 @@ static void shader_sm1_read_src_param(struct vkd3d_shader_sm1_parser *sm1, const sm1->abort = true; return; } - shader_sm1_parse_src_param(addr_token, NULL, src_rel_addr); + shader_sm1_parse_src_param(sm1, addr_token, NULL, src_rel_addr); } - shader_sm1_parse_src_param(token, src_rel_addr, src_param); + shader_sm1_parse_src_param(sm1, token, src_rel_addr, src_param); }
static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, @@ -962,12 +1047,14 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const sm1->abort = true; return; } - shader_sm1_parse_src_param(addr_token, NULL, dst_rel_addr); + shader_sm1_parse_src_param(sm1, addr_token, NULL, dst_rel_addr); } - shader_sm1_parse_dst_param(token, dst_rel_addr, dst_param); + shader_sm1_parse_dst_param(sm1, token, dst_rel_addr, dst_param);
if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_POINT_SIZE) sm1->p.program->has_point_size = true; + if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_FOG) + sm1->p.program->has_fog = true; }
static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, @@ -1005,7 +1092,7 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, semantic->resource_data_type[1] = VKD3D_DATA_FLOAT; semantic->resource_data_type[2] = VKD3D_DATA_FLOAT; semantic->resource_data_type[3] = VKD3D_DATA_FLOAT; - shader_sm1_parse_dst_param(dst_token, NULL, &semantic->resource.reg); + shader_sm1_parse_dst_param(sm1, dst_token, NULL, &semantic->resource.reg); range = &semantic->resource.range; range->space = 0; range->first = range->last = semantic->resource.reg.reg.idx[0].offset; @@ -1268,6 +1355,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) { const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; + enum vsir_normalisation_level normalisation_level; const uint32_t *code = compile_info->source.code; size_t code_size = compile_info->source.size; struct vkd3d_shader_version version; @@ -1318,9 +1406,13 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st sm1->start = &code[1]; sm1->end = &code[token_count];
+ normalisation_level = VSIR_NORMALISED_SM1; + if (compile_info->target_type != VKD3D_SHADER_TARGET_D3D_ASM) + normalisation_level = VSIR_NORMALISED_SM4; + /* Estimate instruction count to avoid reallocation in most shaders. */ if (!vsir_program_init(program, compile_info, &version, - code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) + code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, normalisation_level)) return VKD3D_ERROR_OUT_OF_MEMORY;
vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); @@ -1525,555 +1617,208 @@ static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, uns type == VKD3D_SHADER_TYPE_VERTEX ? VKD3D_SM1_VS : VKD3D_SM1_PS); }
-D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) +static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir( + struct d3dbc_compiler *d3dbc, enum vkd3d_shader_opcode vkd3d_opcode) { - switch (type->class) - { - case HLSL_CLASS_ARRAY: - return hlsl_sm1_class(type->e.array.type); - case HLSL_CLASS_MATRIX: - VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3DXPC_MATRIX_COLUMNS; - else - return D3DXPC_MATRIX_ROWS; - case HLSL_CLASS_SCALAR: - return D3DXPC_SCALAR; - case HLSL_CLASS_STRUCT: - return D3DXPC_STRUCT; - case HLSL_CLASS_VECTOR: - return D3DXPC_VECTOR; - case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_VERTEX_SHADER: - return D3DXPC_OBJECT; - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_ERROR: - case HLSL_CLASS_PASS: - case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VOID: - case HLSL_CLASS_CONSTANT_BUFFER: - case HLSL_CLASS_COMPUTE_SHADER: - case HLSL_CLASS_DOMAIN_SHADER: - case HLSL_CLASS_HULL_SHADER: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: - case HLSL_CLASS_NULL: - break; - } - - vkd3d_unreachable(); -} + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + const struct vkd3d_sm1_opcode_info *info; + unsigned int i = 0;
-D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) -{ - switch (type->class) + for (;;) { - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: - case HLSL_CLASS_MATRIX: - switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: - return D3DXPT_BOOL; - /* Actually double behaves differently depending on DLL version: - * For <= 36, it maps to D3DXPT_FLOAT. - * For 37-40, it maps to zero (D3DXPT_VOID). - * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_* - * values are mostly compatible with D3DXPT_*). - * However, the latter two cases look like bugs, and a reasonable - * application certainly wouldn't know what to do with them. - * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */ - case HLSL_TYPE_DOUBLE: - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return D3DXPT_FLOAT; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - return D3DXPT_INT; - default: - vkd3d_unreachable(); - } - - case HLSL_CLASS_SAMPLER: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3DXPT_SAMPLER1D; - case HLSL_SAMPLER_DIM_2D: - return D3DXPT_SAMPLER2D; - case HLSL_SAMPLER_DIM_3D: - return D3DXPT_SAMPLER3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3DXPT_SAMPLERCUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3DXPT_SAMPLER; - default: - ERR("Invalid dimension %#x.\n", type->sampler_dim); - vkd3d_unreachable(); - } - break; - - case HLSL_CLASS_TEXTURE: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3DXPT_TEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3DXPT_TEXTURE2D; - case HLSL_SAMPLER_DIM_3D: - return D3DXPT_TEXTURE3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3DXPT_TEXTURECUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3DXPT_TEXTURE; - default: - ERR("Invalid dimension %#x.\n", type->sampler_dim); - vkd3d_unreachable(); - } - break; + info = &d3dbc->opcode_table[i++]; + if (info->vkd3d_opcode == VKD3DSIH_INVALID) + return NULL;
- case HLSL_CLASS_ARRAY: - return hlsl_sm1_base_type(type->e.array.type); - - case HLSL_CLASS_STRUCT: - return D3DXPT_VOID; - - case HLSL_CLASS_STRING: - return D3DXPT_STRING; - - case HLSL_CLASS_PIXEL_SHADER: - return D3DXPT_PIXELSHADER; - - case HLSL_CLASS_VERTEX_SHADER: - return D3DXPT_VERTEXSHADER; - - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_ERROR: - case HLSL_CLASS_PASS: - case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VOID: - case HLSL_CLASS_CONSTANT_BUFFER: - case HLSL_CLASS_COMPUTE_SHADER: - case HLSL_CLASS_DOMAIN_SHADER: - case HLSL_CLASS_HULL_SHADER: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: - case HLSL_CLASS_NULL: - break; + if (vkd3d_opcode == info->vkd3d_opcode + && vkd3d_shader_ver_ge(version, info->min_version.major, info->min_version.minor) + && (vkd3d_shader_ver_le(version, info->max_version.major, info->max_version.minor) + || !info->max_version.major)) + return info; } - - vkd3d_unreachable(); }
-static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) +static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir_instruction( + struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { - const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); - unsigned int array_size = hlsl_get_multiarray_size(type); - unsigned int field_count = 0; - size_t fields_offset = 0; - size_t i; - - if (type->bytecode_offset) - return; + const struct vkd3d_sm1_opcode_info *info;
- if (array_type->class == HLSL_CLASS_STRUCT) + if (!(info = shader_sm1_get_opcode_info_from_vsir(d3dbc, ins->opcode))) { - field_count = array_type->e.record.field_count; - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - field->name_bytecode_offset = put_string(buffer, field->name); - write_sm1_type(buffer, field->type, ctab_start); - } - - fields_offset = bytecode_align(buffer) - ctab_start; - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - put_u32(buffer, field->name_bytecode_offset - ctab_start); - put_u32(buffer, field->type->bytecode_offset - ctab_start); - } + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, + "Opcode %#x not supported for shader profile.", ins->opcode); + d3dbc->failed = true; + return NULL; }
- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type))); - put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, field_count)); - put_u32(buffer, fields_offset); -} - -static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) -{ - struct hlsl_ir_var *var; - - list_remove(&to_sort->extern_entry); - - LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) + if (ins->dst_count != info->dst_count) { - if (strcmp(to_sort->name, var->name) < 0) - { - list_add_before(&var->extern_entry, &to_sort->extern_entry); - return; - } + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, + "Invalid destination count %u for vsir instruction %#x (expected %u).", + ins->dst_count, ins->opcode, info->dst_count); + d3dbc->failed = true; + return NULL; } - - list_add_tail(sorted, &to_sort->extern_entry); -} - -static void sm1_sort_externs(struct hlsl_ctx *ctx) -{ - struct list sorted = LIST_INIT(sorted); - struct hlsl_ir_var *var, *next; - - LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + if (ins->src_count != info->src_count) { - if (var->is_uniform) - sm1_sort_extern(&sorted, var); + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, + "Invalid source count %u for vsir instruction %#x (expected %u).", + ins->src_count, ins->opcode, info->src_count); + d3dbc->failed = true; + return NULL; } - list_move_tail(&ctx->extern_vars, &sorted); + + return info; }
-void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +static void d3dbc_write_comment(struct d3dbc_compiler *d3dbc, + uint32_t tag, const struct vkd3d_shader_code *comment) { - size_t ctab_offset, ctab_start, ctab_end, vars_offset, vars_start, size_offset, creator_offset, offset; - unsigned int uniform_count = 0; - struct hlsl_ir_var *var; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int r; - - for (r = 0; r <= HLSL_REGSET_LAST; ++r) - { - if (var->semantic.name || !var->regs[r].allocated || !var->last_read) - continue; - - ++uniform_count; - - if (var->is_param && var->is_uniform) - { - char *new_name; - - if (!(new_name = hlsl_sprintf_alloc(ctx, "$%s", var->name))) - return; - vkd3d_free((char *)var->name); - var->name = new_name; - } - } - } - - sm1_sort_externs(ctx); + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + size_t offset, start, end;
- size_offset = put_u32(buffer, 0); - ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); + offset = put_u32(buffer, 0);
- ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */ - creator_offset = put_u32(buffer, 0); - put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); - put_u32(buffer, uniform_count); - vars_offset = put_u32(buffer, 0); - put_u32(buffer, 0); /* FIXME: flags */ - put_u32(buffer, 0); /* FIXME: target string */ + start = put_u32(buffer, tag); + bytecode_put_bytes(buffer, comment->code, comment->size); + end = bytecode_align(buffer);
- vars_start = bytecode_align(buffer); - set_u32(buffer, vars_offset, vars_start - ctab_start); + set_u32(buffer, offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (end - start) / sizeof(uint32_t))); +}
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +static enum vkd3d_sm1_register_type d3dbc_register_type_from_vsir(const struct vkd3d_shader_register *reg) +{ + if (reg->type == VKD3DSPR_CONST) { - unsigned int r; - - for (r = 0; r <= HLSL_REGSET_LAST; ++r) - { - if (var->semantic.name || !var->regs[r].allocated || !var->last_read) - continue; - - put_u32(buffer, 0); /* name */ - if (r == HLSL_REGSET_NUMERIC) - { - put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); - put_u32(buffer, var->bind_count[r]); - } - else - { - put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index)); - put_u32(buffer, var->bind_count[r]); - } - put_u32(buffer, 0); /* type */ - put_u32(buffer, 0); /* default value */ - } + if (reg->idx[0].offset >= 6144) + return VKD3D_SM1_REG_CONST4; + if (reg->idx[0].offset >= 4096) + return VKD3D_SM1_REG_CONST3; + if (reg->idx[0].offset >= 2048) + return VKD3D_SM1_REG_CONST2; }
- uniform_count = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + for (unsigned int i = 0; i < ARRAY_SIZE(register_types); ++i) { - unsigned int r; - - for (r = 0; r <= HLSL_REGSET_LAST; ++r) - { - size_t var_offset, name_offset; - - if (var->semantic.name || !var->regs[r].allocated || !var->last_read) - continue; - - var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); - - name_offset = put_string(buffer, var->name); - set_u32(buffer, var_offset, name_offset - ctab_start); - - write_sm1_type(buffer, var->data_type, ctab_start); - set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); - - if (var->default_values) - { - unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; - unsigned int comp_count = hlsl_type_component_count(var->data_type); - unsigned int default_value_offset; - unsigned int k; - - default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t)); - set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start); - - for (k = 0; k < comp_count; ++k) - { - struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); - unsigned int comp_offset; - enum hlsl_regset regset; - - comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); - if (regset == HLSL_REGSET_NUMERIC) - { - union - { - uint32_t u; - float f; - } uni; - - switch (comp_type->e.numeric.type) - { - case HLSL_TYPE_DOUBLE: - if (ctx->double_as_float_alias) - uni.u = var->default_values[k].number.u; - else - uni.u = 0; - break; - - case HLSL_TYPE_INT: - uni.f = var->default_values[k].number.i; - break; - - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: - uni.f = var->default_values[k].number.u; - break; - - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - uni.u = var->default_values[k].number.u; - break; - - default: - vkd3d_unreachable(); - } - - set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u); - } - } - } - - ++uniform_count; - } + if (register_types[i].vsir_type == reg->type) + return register_types[i].d3dbc_type; }
- offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); - set_u32(buffer, creator_offset, offset - ctab_start); - - ctab_end = bytecode_align(buffer); - set_u32(buffer, size_offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); + vkd3d_unreachable(); }
-static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) +static uint32_t sm1_encode_register_type(const struct vkd3d_shader_register *reg) { - return ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT) & VKD3D_SM1_REGISTER_TYPE_MASK) - | ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2); + enum vkd3d_sm1_register_type sm1_type = d3dbc_register_type_from_vsir(reg); + + return ((sm1_type << VKD3D_SM1_REGISTER_TYPE_SHIFT) & VKD3D_SM1_REGISTER_TYPE_MASK) + | ((sm1_type << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2); }
-struct sm1_instruction +static uint32_t swizzle_from_vsir(uint32_t swizzle) { - enum vkd3d_sm1_opcode opcode; - unsigned int flags; - - struct sm1_dst_register - { - enum vkd3d_shader_register_type type; - enum vkd3d_shader_dst_modifier mod; - unsigned int writemask; - uint32_t reg; - } dst; + uint32_t x = vsir_swizzle_get_component(swizzle, 0); + uint32_t y = vsir_swizzle_get_component(swizzle, 1); + uint32_t z = vsir_swizzle_get_component(swizzle, 2); + uint32_t w = vsir_swizzle_get_component(swizzle, 3);
- struct sm1_src_register - { - enum vkd3d_shader_register_type type; - enum vkd3d_shader_src_modifier mod; - unsigned int swizzle; - uint32_t reg; - } srcs[4]; - unsigned int src_count; + if (x & ~0x3u || y & ~0x3u || z & ~0x3u || w & ~0x3u) + ERR("Unexpected vsir swizzle: 0x%08x.\n", swizzle);
- unsigned int has_dst; -}; + return ((x & 0x3u) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(0)) + | ((y & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(1)) + | ((z & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(2)) + | ((w & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(3)); +}
-static bool is_inconsequential_instr(const struct sm1_instruction *instr) +static bool is_inconsequential_instr(const struct vkd3d_shader_instruction *ins) { - const struct sm1_src_register *src = &instr->srcs[0]; - const struct sm1_dst_register *dst = &instr->dst; + const struct vkd3d_shader_dst_param *dst = &ins->dst[0]; + const struct vkd3d_shader_src_param *src = &ins->src[0]; unsigned int i;
- if (instr->opcode != VKD3D_SM1_OP_MOV) + if (ins->opcode != VKD3DSIH_MOV) return false; - if (dst->mod != VKD3DSPDM_NONE) + if (dst->modifiers != VKD3DSPDM_NONE) return false; - if (src->mod != VKD3DSPSM_NONE) + if (src->modifiers != VKD3DSPSM_NONE) return false; - if (src->type != dst->type) + if (src->reg.type != dst->reg.type) return false; - if (src->reg != dst->reg) + if (src->reg.idx[0].offset != dst->reg.idx[0].offset) return false;
for (i = 0; i < 4; ++i) { - if ((dst->writemask & (1 << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i)) + if ((dst->write_mask & (1u << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i)) return false; }
return true; }
-static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) +static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_dst_param *reg) { - VKD3D_ASSERT(reg->writemask); + VKD3D_ASSERT(reg->write_mask); put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER - | sm1_encode_register_type(reg->type) - | (reg->mod << VKD3D_SM1_DST_MODIFIER_SHIFT) - | (reg->writemask << VKD3D_SM1_WRITEMASK_SHIFT) | reg->reg); + | sm1_encode_register_type(®->reg) + | (reg->modifiers << VKD3D_SM1_DST_MODIFIER_SHIFT) + | (reg->write_mask << VKD3D_SM1_WRITEMASK_SHIFT) + | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); }
-static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, - const struct sm1_src_register *reg) +static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_src_param *reg) { put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER - | sm1_encode_register_type(reg->type) - | (reg->mod << VKD3D_SM1_SRC_MODIFIER_SHIFT) - | (reg->swizzle << VKD3D_SM1_SWIZZLE_SHIFT) | reg->reg); + | sm1_encode_register_type(®->reg) + | (reg->modifiers << VKD3D_SM1_SRC_MODIFIER_SHIFT) + | (swizzle_from_vsir(reg->swizzle) << VKD3D_SM1_SWIZZLE_SHIFT) + | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); }
-static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) +static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - uint32_t token = instr->opcode; + const struct vkd3d_sm1_opcode_info *info; unsigned int i; + uint32_t token; + + if (!(info = shader_sm1_get_opcode_info_from_vsir_instruction(d3dbc, ins))) + return;
- if (is_inconsequential_instr(instr)) + if (is_inconsequential_instr(ins)) return;
- token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); + token = info->sm1_opcode; + token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (ins->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT);
if (version->major > 1) - token |= (instr->has_dst + instr->src_count) << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; + token |= (ins->dst_count + ins->src_count) << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; put_u32(buffer, token);
- if (instr->has_dst) - write_sm1_dst_register(buffer, &instr->dst); - - for (i = 0; i < instr->src_count; ++i) - write_sm1_src_register(buffer, &instr->srcs[i]); -}; - -static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir( - struct d3dbc_compiler *d3dbc, enum vkd3d_shader_opcode vkd3d_opcode) -{ - const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - const struct vkd3d_sm1_opcode_info *info; - unsigned int i = 0; - - for (;;) + for (i = 0; i < ins->dst_count; ++i) { - info = &d3dbc->opcode_table[i++]; - if (info->vkd3d_opcode == VKD3DSIH_INVALID) - return NULL; - - if (vkd3d_opcode == info->vkd3d_opcode - && vkd3d_shader_ver_ge(version, info->min_version.major, info->min_version.minor) - && (vkd3d_shader_ver_le(version, info->max_version.major, info->max_version.minor) - || !info->max_version.major)) - return info; - } -} - -static uint32_t swizzle_from_vsir(uint32_t swizzle) -{ - uint32_t x = vsir_swizzle_get_component(swizzle, 0); - uint32_t y = vsir_swizzle_get_component(swizzle, 1); - uint32_t z = vsir_swizzle_get_component(swizzle, 2); - uint32_t w = vsir_swizzle_get_component(swizzle, 3); - - if (x & ~0x3u || y & ~0x3u || z & ~0x3u || w & ~0x3u) - ERR("Unexpected vsir swizzle: 0x%08x.\n", swizzle); - - return ((x & 0x3u) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(0)) - | ((y & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(1)) - | ((z & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(2)) - | ((w & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(3)); -} - -static void sm1_src_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_src_param *param, - struct sm1_src_register *src, const struct vkd3d_shader_location *loc) -{ - src->mod = param->modifiers; - src->reg = param->reg.idx[0].offset; - src->type = param->reg.type; - src->swizzle = swizzle_from_vsir(param->swizzle); - - if (param->reg.idx[0].rel_addr) - { - vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, - "Unhandled relative addressing on source register."); - d3dbc->failed = true; + if (ins->dst[i].reg.idx[0].rel_addr) + { + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, + "Unhandled relative addressing on destination register."); + d3dbc->failed = true; + } + write_sm1_dst_register(buffer, &ins->dst[i]); } -} - -static void sm1_dst_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_dst_param *param, - struct sm1_dst_register *dst, const struct vkd3d_shader_location *loc) -{ - dst->mod = param->modifiers; - dst->reg = param->reg.idx[0].offset; - dst->type = param->reg.type; - dst->writemask = param->write_mask;
- if (param->reg.idx[0].rel_addr) + for (i = 0; i < ins->src_count; ++i) { - vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, - "Unhandled relative addressing on destination register."); - d3dbc->failed = true; + if (ins->src[i].reg.idx[0].rel_addr) + { + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, + "Unhandled relative addressing on source register."); + d3dbc->failed = true; + } + write_sm1_src_register(buffer, &ins->src[i]); } -} +};
static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { @@ -2081,11 +1826,11 @@ static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3 struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; uint32_t token;
- const struct sm1_dst_register reg = + const struct vkd3d_shader_dst_param reg = { - .type = VKD3DSPR_CONST, - .writemask = VKD3DSP_WRITEMASK_ALL, - .reg = ins->dst[0].reg.idx[0].offset, + .reg.type = VKD3DSPR_CONST, + .write_mask = VKD3DSP_WRITEMASK_ALL, + .reg.idx[0].offset = ins->dst[0].reg.idx[0].offset, };
token = VKD3D_SM1_OP_DEF; @@ -2103,7 +1848,7 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - struct sm1_dst_register reg = {0}; + struct vkd3d_shader_dst_param reg = {0}; uint32_t token;
token = VKD3D_SM1_OP_DCL; @@ -2115,9 +1860,9 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; put_u32(buffer, token);
- reg.type = VKD3DSPR_COMBINED_SAMPLER; - reg.writemask = VKD3DSP_WRITEMASK_ALL; - reg.reg = reg_id; + reg.reg.type = VKD3DSPR_COMBINED_SAMPLER; + reg.write_mask = VKD3DSP_WRITEMASK_ALL; + reg.reg.idx[0].offset = reg_id;
write_sm1_dst_register(buffer, ®); } @@ -2163,61 +1908,6 @@ static void d3dbc_write_vsir_dcl(struct d3dbc_compiler *d3dbc, const struct vkd3 } }
-static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir_instruction( - struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) -{ - const struct vkd3d_sm1_opcode_info *info; - - if (!(info = shader_sm1_get_opcode_info_from_vsir(d3dbc, ins->opcode))) - { - vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, - "Opcode %#x not supported for shader profile.", ins->opcode); - d3dbc->failed = true; - return NULL; - } - - if (ins->dst_count != info->dst_count) - { - vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, - "Invalid destination count %u for vsir instruction %#x (expected %u).", - ins->dst_count, ins->opcode, info->dst_count); - d3dbc->failed = true; - return NULL; - } - if (ins->src_count != info->src_count) - { - vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, - "Invalid source count %u for vsir instruction %#x (expected %u).", - ins->src_count, ins->opcode, info->src_count); - d3dbc->failed = true; - return NULL; - } - - return info; -} - -static void d3dbc_write_vsir_simple_instruction(struct d3dbc_compiler *d3dbc, - const struct vkd3d_shader_instruction *ins) -{ - struct sm1_instruction instr = {0}; - const struct vkd3d_sm1_opcode_info *info; - - if (!(info = shader_sm1_get_opcode_info_from_vsir_instruction(d3dbc, ins))) - return; - - instr.opcode = info->sm1_opcode; - instr.flags = ins->flags; - instr.has_dst = info->dst_count; - instr.src_count = info->src_count; - - if (instr.has_dst) - sm1_dst_reg_from_vsir(d3dbc, &ins->dst[0], &instr.dst, &ins->location); - for (unsigned int i = 0; i < instr.src_count; ++i) - sm1_src_reg_from_vsir(d3dbc, &ins->src[i], &instr.srcs[i], &ins->location); - - d3dbc_write_instruction(d3dbc, &instr); -} - static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { uint32_t writemask; @@ -2254,7 +1944,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str case VKD3DSIH_TEX: case VKD3DSIH_TEXKILL: case VKD3DSIH_TEXLDD: - d3dbc_write_vsir_simple_instruction(d3dbc, ins); + d3dbc_write_instruction(d3dbc, ins); break;
case VKD3DSIH_EXP: @@ -2271,7 +1961,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str writemask, ins->opcode); d3dbc->failed = true; } - d3dbc_write_vsir_simple_instruction(d3dbc, ins); + d3dbc_write_instruction(d3dbc, ins); break;
default: @@ -2287,13 +1977,13 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - struct sm1_dst_register reg = {0}; + struct vkd3d_shader_dst_param reg = {0}; enum vkd3d_decl_usage usage; uint32_t token, usage_idx; bool ret;
if (sm1_register_from_semantic_name(version, element->semantic_name, - element->semantic_index, output, ®.type, ®.reg)) + element->semantic_index, output, ®.reg.type, ®.reg.idx[0].offset)) { usage = 0; usage_idx = 0; @@ -2302,8 +1992,8 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, { ret = sm1_usage_from_semantic_name(element->semantic_name, element->semantic_index, &usage, &usage_idx); VKD3D_ASSERT(ret); - reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; - reg.reg = element->register_index; + reg.reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; + reg.reg.idx[0].offset = element->register_index; }
token = VKD3D_SM1_OP_DCL; @@ -2316,7 +2006,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, token |= usage_idx << VKD3D_SM1_DCL_USAGE_INDEX_SHIFT; put_u32(buffer, token);
- reg.writemask = element->mask; + reg.write_mask = element->mask; write_sm1_dst_register(buffer, ®); }
@@ -2384,9 +2074,7 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, }
put_u32(buffer, sm1_version(version->type, version->major, version->minor)); - - bytecode_put_bytes(buffer, ctab->code, ctab->size); - + d3dbc_write_comment(&d3dbc, VKD3D_MAKE_TAG('C','T','A','B'), ctab); d3dbc_write_semantic_dcls(&d3dbc); d3dbc_write_program_instructions(&d3dbc);
diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index f6ac8e0829e..81af62f7810 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -115,6 +115,14 @@ static uint32_t read_u32(const char **ptr) return ret; }
+static uint64_t read_u64(const char **ptr) +{ + uint64_t ret; + memcpy(&ret, *ptr, sizeof(ret)); + *ptr += sizeof(ret); + return ret; +} + static float read_float(const char **ptr) { union @@ -502,6 +510,28 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, return ret; }
+static int shdr_parse_features(const struct vkd3d_shader_dxbc_section_desc *section, + struct vkd3d_shader_message_context *message_context, struct vsir_features *f) +{ + const char *data = section->data.code; + const char *ptr = data; + uint64_t flags; + + if (!require_space(0, 1, sizeof(uint64_t), section->data.size)) + { + WARN("Invalid data size %#zx.\n", section->data.size); + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_DXBC_INVALID_CHUNK_SIZE, + "SFI0 section size %zu is too small to contain flags.\n", section->data.size); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + flags = read_u64(&ptr); + + if (flags & DXBC_SFI0_REQUIRES_ROVS) + f->rovs = true; + + return VKD3D_OK; +} + static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, struct vkd3d_shader_message_context *message_context, void *context) { @@ -558,6 +588,11 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, desc->byte_code_size = section->data.size; break;
+ case TAG_SFI0: + if ((ret = shdr_parse_features(section, message_context, &desc->features)) < 0) + return ret; + break; + case TAG_AON9: TRACE("Skipping AON9 shader code chunk.\n"); break; diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index 7099bcc9ce2..a10de68008a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -430,6 +430,8 @@ enum dx_intrinsic_opcode DX_DERIV_COARSEY = 84, DX_DERIV_FINEX = 85, DX_DERIV_FINEY = 86, + DX_EVAL_SAMPLE_INDEX = 88, + DX_EVAL_CENTROID = 89, DX_SAMPLE_INDEX = 90, DX_COVERAGE = 91, DX_THREAD_ID = 93, @@ -3824,7 +3826,7 @@ static void src_params_init_from_operands(struct vkd3d_shader_src_param *src_par }
static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( - enum vkd3d_shader_sysval_semantic sysval_semantic) + enum vkd3d_shader_sysval_semantic sysval_semantic, bool is_input) { switch (sysval_semantic) { @@ -3834,7 +3836,7 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( case VKD3D_SHADER_SV_SAMPLE_INDEX: return VKD3DSPR_NULL; case VKD3D_SHADER_SV_COVERAGE: - return VKD3DSPR_COVERAGE; + return is_input ? VKD3DSPR_COVERAGE : VKD3DSPR_SAMPLEMASK; case VKD3D_SHADER_SV_DEPTH: return VKD3DSPR_DEPTHOUT; case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: @@ -3884,7 +3886,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade param = ¶ms[i];
if (e->register_index == UINT_MAX - && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic)) != VKD3DSPR_NULL) + && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic, is_input)) != VKD3DSPR_NULL) { dst_param_io_init(param, e, io_reg_type); continue; @@ -5098,6 +5100,53 @@ static void sm6_parser_emit_dx_dot(struct sm6_parser *sm6, enum dx_intrinsic_opc instruction_dst_param_init_ssa_scalar(ins, sm6); }
+static void sm6_parser_emit_dx_eval_attrib(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_params; + const struct shader_signature *signature; + unsigned int row_index, column_index; + const struct signature_element *e; + + row_index = sm6_value_get_constant_uint(operands[0]); + column_index = sm6_value_get_constant_uint(operands[2]); + + signature = &sm6->p.program->input_signature; + if (row_index >= signature->element_count) + { + WARN("Invalid row index %u.\n", row_index); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Invalid input row index %u for an attribute evaluation.", row_index); + return; + } + + e = &signature->elements[row_index]; + if (column_index >= VKD3D_VEC4_SIZE || !(e->mask & (1 << column_index))) + { + WARN("Invalid column index %u.\n", column_index); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Invalid input column index %u for an attribute evaluation.", column_index); + return; + } + + vsir_instruction_init(ins, &sm6->p.location, (op == DX_EVAL_CENTROID) + ? VKD3DSIH_EVAL_CENTROID : VKD3DSIH_EVAL_SAMPLE_INDEX); + + if (!(src_params = instruction_src_params_alloc(ins, 1 + (op == DX_EVAL_SAMPLE_INDEX), sm6))) + return; + + src_params[0].reg = sm6->input_params[row_index].reg; + src_param_init_scalar(&src_params[0], column_index); + if (e->register_count > 1) + register_index_address_init(&src_params[0].reg.idx[0], operands[1], sm6); + + if (op == DX_EVAL_SAMPLE_INDEX) + src_param_init_from_value(&src_params[1], operands[3]); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -6288,6 +6337,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_DOT4 ] = {"g", "RRRRRRRR", sm6_parser_emit_dx_dot}, [DX_EMIT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, [DX_EMIT_THEN_CUT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, + [DX_EVAL_CENTROID ] = {"o", "cic", sm6_parser_emit_dx_eval_attrib}, + [DX_EVAL_SAMPLE_INDEX ] = {"o", "cici", sm6_parser_emit_dx_eval_attrib}, [DX_EXP ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_FABS ] = {"g", "R", sm6_parser_emit_dx_fabs}, [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, @@ -8547,6 +8598,7 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] = [SEMANTIC_KIND_VERTEXID] = VKD3D_SHADER_SV_VERTEX_ID, [SEMANTIC_KIND_INSTANCEID] = VKD3D_SHADER_SV_INSTANCE_ID, [SEMANTIC_KIND_POSITION] = VKD3D_SHADER_SV_POSITION, + [SEMANTIC_KIND_RTARRAYINDEX] = VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX, [SEMANTIC_KIND_CLIPDISTANCE] = VKD3D_SHADER_SV_CLIP_DISTANCE, [SEMANTIC_KIND_CULLDISTANCE] = VKD3D_SHADER_SV_CULL_DISTANCE, [SEMANTIC_KIND_PRIMITIVEID] = VKD3D_SHADER_SV_PRIMITIVE_ID, @@ -9348,7 +9400,7 @@ static void signature_element_read_additional_element_values(struct signature_el }
static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const struct sm6_metadata_value *m, - struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain) + struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain, bool is_input) { unsigned int i, j, column_count, operand_count, index; const struct sm6_metadata_node *node, *element_node; @@ -9466,7 +9518,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const
if ((is_register = e->register_index == UINT_MAX)) { - if (register_type_from_dxil_semantic_kind(e->sysval_semantic) == VKD3DSPR_INVALID) + if (register_type_from_dxil_semantic_kind(e->sysval_semantic, is_input) == VKD3DSPR_INVALID) { WARN("Unhandled I/O register semantic kind %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, @@ -9578,17 +9630,17 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons }
if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], - &program->input_signature, tessellator_domain)) < 0) + &program->input_signature, tessellator_domain, true)) < 0) { return ret; } if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], - &program->output_signature, tessellator_domain)) < 0) + &program->output_signature, tessellator_domain, false)) < 0) { return ret; } if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[2], - &program->patch_constant_signature, tessellator_domain)) < 0) + &program->patch_constant_signature, tessellator_domain, false)) < 0) { return ret; } @@ -9717,12 +9769,13 @@ static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6,
ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_DOMAIN); ins->declaration.tessellator_domain = tessellator_domain; + sm6->p.program->tess_domain = tessellator_domain; }
-static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, unsigned int count, - const char *type) +static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, + unsigned int count, bool allow_zero, const char *type) { - if (!count || count > 32) + if ((!count && !allow_zero) || count > 32) { WARN("%s control point count %u invalid.\n", type, count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, @@ -9744,6 +9797,8 @@ static void sm6_parser_emit_dcl_tessellator_partitioning(struct sm6_parser *sm6,
ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING); ins->declaration.tessellator_partitioning = tessellator_partitioning; + + sm6->p.program->tess_partitioning = tessellator_partitioning; }
static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser *sm6, @@ -9760,6 +9815,8 @@ static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser *
ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE); ins->declaration.tessellator_output_primitive = primitive; + + sm6->p.program->tess_output_primitive = primitive; }
static void sm6_parser_emit_dcl_max_tessellation_factor(struct sm6_parser *sm6, struct sm6_metadata_value *m) @@ -9951,7 +10008,7 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa }
sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); - sm6_parser_validate_control_point_count(sm6, operands[1], "Domain shader input"); + sm6_parser_validate_control_point_count(sm6, operands[1], true, "Domain shader input"); sm6->p.program->input_control_point_count = operands[1];
return operands[0]; @@ -10010,9 +10067,9 @@ static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_pa } }
- sm6_parser_validate_control_point_count(sm6, operands[1], "Hull shader input"); + sm6_parser_validate_control_point_count(sm6, operands[1], false, "Hull shader input"); program->input_control_point_count = operands[1]; - sm6_parser_validate_control_point_count(sm6, operands[2], "Hull shader output"); + sm6_parser_validate_control_point_count(sm6, operands[2], false, "Hull shader output"); sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]); program->output_control_point_count = operands[2]; sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]); @@ -10351,7 +10408,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro /* Estimate instruction count to avoid reallocation in most shaders. */ count = max(token_count, 400) - 400; if (!vsir_program_init(program, compile_info, &version, - (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_FULLY_NORMALISED_IO)) + (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_NORMALISED_SM6)) return VKD3D_ERROR_OUT_OF_MEMORY; vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); sm6->ptr = &sm6->start[1]; @@ -10378,6 +10435,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro *input_signature = dxbc_desc->input_signature; *output_signature = dxbc_desc->output_signature; *patch_constant_signature = dxbc_desc->patch_constant_signature; + program->features = dxbc_desc->features; memset(dxbc_desc, 0, sizeof(*dxbc_desc));
block = &sm6->root_block; diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index e22177e1e30..bd7e7b420db 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -36,6 +36,16 @@ struct fx_4_binary_type uint32_t typeinfo; };
+struct fx_5_shader +{ + uint32_t offset; + uint32_t sodecl[4]; + uint32_t sodecl_count; + uint32_t rast_stream; + uint32_t iface_bindings_count; + uint32_t iface_bindings; +}; + struct string_entry { struct rb_entry entry; @@ -550,6 +560,8 @@ enum fx_4_type_constants FX_4_ASSIGNMENT_VARIABLE = 0x2, FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX = 0x3, FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX = 0x4, + FX_4_ASSIGNMENT_INLINE_SHADER = 0x7, + FX_5_ASSIGNMENT_INLINE_SHADER = 0x8, };
static const uint32_t fx_4_numeric_base_types[] = @@ -598,8 +610,8 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, return 0; }
- value |= (type->dimy & 0x7) << FX_4_NUMERIC_ROWS_SHIFT; - value |= (type->dimx & 0x7) << FX_4_NUMERIC_COLUMNS_SHIFT; + value |= (type->e.numeric.dimy & 0x7) << FX_4_NUMERIC_ROWS_SHIFT; + value |= (type->e.numeric.dimx & 0x7) << FX_4_NUMERIC_COLUMNS_SHIFT; if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) value |= FX_4_NUMERIC_COLUMN_MAJOR_MASK;
@@ -762,6 +774,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_CONSTANT_BUFFER: case HLSL_CLASS_NULL: + case HLSL_CLASS_STREAM_OUTPUT: vkd3d_unreachable();
case HLSL_CLASS_VOID: @@ -1008,8 +1021,8 @@ static uint32_t get_fx_2_type_class(const struct hlsl_type *type) return hlsl_sm1_class(type); }
-static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, const struct hlsl_semantic *semantic, - struct fx_write_context *fx) +static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, + const struct hlsl_semantic *semantic, bool is_combined_sampler, struct fx_write_context *fx) { struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; uint32_t semantic_offset, offset, elements_count = 0, name_offset; @@ -1025,7 +1038,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n name_offset = write_string(name, fx); semantic_offset = semantic->raw_name ? write_string(semantic->raw_name, fx) : 0;
- offset = put_u32(buffer, hlsl_sm1_base_type(type)); + offset = put_u32(buffer, hlsl_sm1_base_type(type, is_combined_sampler)); put_u32(buffer, get_fx_2_type_class(type)); put_u32(buffer, name_offset); put_u32(buffer, semantic_offset); @@ -1034,13 +1047,13 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n switch (type->class) { case HLSL_CLASS_VECTOR: - put_u32(buffer, type->dimx); - put_u32(buffer, type->dimy); + put_u32(buffer, type->e.numeric.dimx); + put_u32(buffer, type->e.numeric.dimy); break; case HLSL_CLASS_SCALAR: case HLSL_CLASS_MATRIX: - put_u32(buffer, type->dimy); - put_u32(buffer, type->dimx); + put_u32(buffer, type->e.numeric.dimy); + put_u32(buffer, type->e.numeric.dimx); break; case HLSL_CLASS_STRUCT: put_u32(buffer, type->e.record.field_count); @@ -1061,7 +1074,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n
/* Validated in check_invalid_object_fields(). */ VKD3D_ASSERT(hlsl_is_numeric_type(field->type)); - write_fx_2_parameter(field->type, field->name, &field->semantic, fx); + write_fx_2_parameter(field->type, field->name, &field->semantic, false, fx); } }
@@ -1298,6 +1311,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_CONSTANT_BUFFER: case HLSL_CLASS_NULL: + case HLSL_CLASS_STREAM_OUTPUT: /* This cannot appear as an extern variable. */ break; } @@ -1321,7 +1335,7 @@ static void write_fx_2_parameters(struct fx_write_context *fx) if (!is_type_supported_fx_2(ctx, var->data_type, &var->loc)) continue;
- desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); + desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, var->is_combined_sampler, fx); value_offset = write_fx_2_initial_value(var, fx);
flags = 0; @@ -1344,7 +1358,7 @@ static void write_fx_2_annotation(struct hlsl_ir_var *var, struct fx_write_conte struct vkd3d_bytecode_buffer *buffer = &fx->structured; uint32_t desc_offset, value_offset;
- desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); + desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, var->is_combined_sampler, fx); value_offset = write_fx_2_initial_value(var, fx);
put_u32(buffer, desc_offset); @@ -1834,6 +1848,7 @@ enum state_property_component_type FX_BLEND, FX_VERTEXSHADER, FX_PIXELSHADER, + FX_GEOMETRYSHADER, FX_COMPONENT_TYPE_COUNT, };
@@ -2065,6 +2080,7 @@ fx_4_states[] =
{ "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 }, { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 }, + { "GeometryShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_GEOMETRYSHADER, 1, 1, 8 }, { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, { "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 }, { "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 }, @@ -2951,7 +2967,7 @@ static void VKD3D_PRINTF_FUNC(3, 4) fx_parser_error(struct fx_parser *parser, en
static int fx_2_parse(struct fx_parser *parser) { - fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented.\n"); + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented.");
return -1; } @@ -3120,7 +3136,7 @@ static void fx_parse_fx_4_annotations(struct fx_parser *parser) else { fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, - "Only numeric and string types are supported in annotations.\n"); + "Only numeric and string types are supported in annotations."); }
if (type.element_count) @@ -3210,63 +3226,23 @@ static void fx_parse_buffers(struct fx_parser *parser) } }
-static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) +static void fx_4_parse_shader_blob(struct fx_parser *parser, unsigned int object_type, const struct fx_5_shader *shader) { struct vkd3d_shader_compile_info info = { 0 }; struct vkd3d_shader_code output; - uint32_t data_size, offset; const void *data = NULL; const char *p, *q, *end; - struct fx_5_shader - { - uint32_t offset; - uint32_t sodecl[4]; - uint32_t sodecl_count; - uint32_t rast_stream; - uint32_t iface_bindings_count; - uint32_t iface_bindings; - } shader5; - struct fx_4_gs_so - { - uint32_t offset; - uint32_t sodecl; - } gs_so; + uint32_t data_size; int ret;
static const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_15}, };
- switch (object_type) - { - case FX_4_OBJECT_TYPE_PIXEL_SHADER: - case FX_4_OBJECT_TYPE_VERTEX_SHADER: - case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: - offset = fx_parser_read_u32(parser); - break; - - case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: - fx_parser_read_u32s(parser, &gs_so, sizeof(gs_so)); - offset = gs_so.offset; - break; - - case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: - case FX_5_OBJECT_TYPE_COMPUTE_SHADER: - case FX_5_OBJECT_TYPE_HULL_SHADER: - case FX_5_OBJECT_TYPE_DOMAIN_SHADER: - fx_parser_read_u32s(parser, &shader5, sizeof(shader5)); - offset = shader5.offset; - break; - - default: - parser->failed = true; - return; - } - - fx_parser_read_unstructured(parser, &data_size, offset, sizeof(data_size)); + fx_parser_read_unstructured(parser, &data_size, shader->offset, sizeof(data_size)); if (data_size) - data = fx_parser_get_unstructured_ptr(parser, offset + 4, data_size); + data = fx_parser_get_unstructured_ptr(parser, shader->offset + 4, data_size);
if (!data) return; @@ -3283,7 +3259,7 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int if ((ret = vkd3d_shader_compile(&info, &output, NULL)) < 0) { fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, - "Failed to disassemble shader blob.\n"); + "Failed to disassemble shader blob."); return; } parse_fx_print_indent(parser); @@ -3307,26 +3283,58 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int
parse_fx_print_indent(parser); vkd3d_string_buffer_printf(&parser->buffer, "}"); - if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && gs_so.sodecl) + if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && shader->sodecl[0]) { vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: "%s" */", - fx_4_get_string(parser, gs_so.sodecl)); + fx_4_get_string(parser, shader->sodecl[0])); } else if (object_type == FX_5_OBJECT_TYPE_GEOMETRY_SHADER) { - for (unsigned int i = 0; i < ARRAY_SIZE(shader5.sodecl); ++i) + for (unsigned int i = 0; i < ARRAY_SIZE(shader->sodecl); ++i) { - if (shader5.sodecl[i]) + if (shader->sodecl[i]) vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output %u declaration: "%s" */", - i, fx_4_get_string(parser, shader5.sodecl[i])); + i, fx_4_get_string(parser, shader->sodecl[i])); } - if (shader5.sodecl_count) - vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader5.rast_stream); + if (shader->sodecl_count) + vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader->rast_stream); }
vkd3d_shader_free_shader_code(&output); }
+static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) +{ + struct fx_5_shader shader = { 0 }; + + switch (object_type) + { + case FX_4_OBJECT_TYPE_PIXEL_SHADER: + case FX_4_OBJECT_TYPE_VERTEX_SHADER: + case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: + shader.offset = fx_parser_read_u32(parser); + break; + + case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: + shader.offset = fx_parser_read_u32(parser); + shader.sodecl[0] = fx_parser_read_u32(parser); + break; + + case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: + case FX_5_OBJECT_TYPE_COMPUTE_SHADER: + case FX_5_OBJECT_TYPE_HULL_SHADER: + case FX_5_OBJECT_TYPE_DOMAIN_SHADER: + fx_parser_read_u32s(parser, &shader, sizeof(shader)); + break; + + default: + parser->failed = true; + return; + } + + fx_4_parse_shader_blob(parser, object_type, &shader); +} + static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type) { switch (type->typeinfo) @@ -3390,6 +3398,8 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 [FX_UINT8] = "byte", }; const struct rhs_named_value *named_value; + struct fx_5_shader shader = { 0 }; + unsigned int shader_type = 0; uint32_t i, j, comp_count; struct fx_4_state *state;
@@ -3400,7 +3410,7 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 if (!(state = bsearch(&entry.id, fx_4_states, ARRAY_SIZE(fx_4_states), sizeof(*fx_4_states), fx_4_state_id_compare))) { - fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.\n", entry.id); + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.", entry.id); break; }
@@ -3486,9 +3496,38 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 vkd3d_string_buffer_printf(&parser->buffer, "%s[%s]", fx_4_get_string(parser, index.name), fx_4_get_string(parser, index.index)); break; + case FX_4_ASSIGNMENT_INLINE_SHADER: + case FX_5_ASSIGNMENT_INLINE_SHADER: + { + bool shader5 = entry.type == FX_5_ASSIGNMENT_INLINE_SHADER; + + if (shader5) + fx_parser_read_unstructured(parser, &shader, entry.value, sizeof(shader)); + else + fx_parser_read_unstructured(parser, &shader, entry.value, 2 * sizeof(uint32_t)); + + if (state->type == FX_PIXELSHADER) + shader_type = FX_4_OBJECT_TYPE_PIXEL_SHADER; + else if (state->type == FX_VERTEXSHADER) + shader_type = FX_4_OBJECT_TYPE_VERTEX_SHADER; + else if (state->type == FX_GEOMETRYSHADER) + shader_type = shader5 ? FX_5_OBJECT_TYPE_GEOMETRY_SHADER : FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO; + else if (state->type == FX_HULLSHADER) + shader_type = FX_5_OBJECT_TYPE_HULL_SHADER; + else if (state->type == FX_DOMAINSHADER) + shader_type = FX_5_OBJECT_TYPE_DOMAIN_SHADER; + else if (state->type == FX_COMPUTESHADER) + shader_type = FX_5_OBJECT_TYPE_COMPUTE_SHADER; + + vkd3d_string_buffer_printf(&parser->buffer, "\n"); + parse_fx_start_indent(parser); + fx_4_parse_shader_blob(parser, shader_type, &shader); + parse_fx_end_indent(parser); + break; + } default: fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, - "Unsupported assignment type %u.\n", entry.type); + "Unsupported assignment type %u.", entry.type); } vkd3d_string_buffer_printf(&parser->buffer, ";\n"); } diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c index 0df0e30f399..ab6604bd703 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -1507,13 +1507,6 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, case VKD3DSIH_DCL_INDEXABLE_TEMP: shader_glsl_dcl_indexable_temp(gen, ins); break; - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_INPUT_PS: - case VKD3DSIH_DCL_INPUT_PS_SGV: - case VKD3DSIH_DCL_INPUT_PS_SIV: - case VKD3DSIH_DCL_INPUT_SGV: - case VKD3DSIH_DCL_OUTPUT: - case VKD3DSIH_DCL_OUTPUT_SIV: case VKD3DSIH_NOP: break; case VKD3DSIH_DEFAULT: @@ -2476,7 +2469,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) return ret;
- VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); + VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6);
vkd3d_glsl_generator_init(&generator, program, compile_info, descriptor_info, combined_sampler_info, message_context); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 96de18dc886..41586550203 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -93,7 +93,7 @@ char *hlsl_sprintf_alloc(struct hlsl_ctx *ctx, const char *fmt, ...) return ret; }
-bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var) +void hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl) { struct hlsl_scope *scope = ctx->cur_scope; struct hlsl_ir_var *var; @@ -103,21 +103,16 @@ bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) { if (var->name && !strcmp(decl->name, var->name)) - return false; - } - if (local_var && scope->upper->upper == ctx->globals) - { - /* Check whether the variable redefines a function parameter. */ - LIST_FOR_EACH_ENTRY(var, &scope->upper->vars, struct hlsl_ir_var, scope_entry) { - if (var->name && !strcmp(decl->name, var->name)) - return false; + hlsl_error(ctx, &decl->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "Identifier "%s" was already declared in this scope.", var->name); + hlsl_note(ctx, &var->loc, VKD3D_SHADER_LOG_ERROR, ""%s" was previously declared here.", var->name); + break; } } }
list_add_tail(&scope->vars, &decl->scope_entry); - return true; }
struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) @@ -192,18 +187,20 @@ bool hlsl_type_is_row_major(const struct hlsl_type *type)
unsigned int hlsl_type_minor_size(const struct hlsl_type *type) { + VKD3D_ASSERT(hlsl_is_numeric_type(type)); if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) - return type->dimx; + return type->e.numeric.dimx; else - return type->dimy; + return type->e.numeric.dimy; }
unsigned int hlsl_type_major_size(const struct hlsl_type *type) { + VKD3D_ASSERT(hlsl_is_numeric_type(type)); if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) - return type->dimy; + return type->e.numeric.dimy; else - return type->dimx; + return type->e.numeric.dimx; }
unsigned int hlsl_type_element_count(const struct hlsl_type *type) @@ -211,7 +208,7 @@ unsigned int hlsl_type_element_count(const struct hlsl_type *type) switch (type->class) { case HLSL_CLASS_VECTOR: - return type->dimx; + return type->e.numeric.dimx; case HLSL_CLASS_MATRIX: return hlsl_type_major_size(type); case HLSL_CLASS_ARRAY: @@ -287,6 +284,7 @@ bool hlsl_type_is_shader(const struct hlsl_type *type) case HLSL_CLASS_UAV: case HLSL_CLASS_CONSTANT_BUFFER: case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: case HLSL_CLASS_VOID: case HLSL_CLASS_NULL: return false; @@ -294,6 +292,12 @@ bool hlsl_type_is_shader(const struct hlsl_type *type) return false; }
+bool hlsl_type_is_patch_array(const struct hlsl_type *type) +{ + return type->class == HLSL_CLASS_ARRAY && (type->e.array.array_type == HLSL_ARRAY_PATCH_INPUT + || type->e.array.array_type == HLSL_ARRAY_PATCH_OUTPUT); +} + /* Only intended to be used for derefs (after copies have been lowered to components or vectors) or * resources, since for both their data types span across a single regset. */ static enum hlsl_regset type_get_regset(const struct hlsl_type *type) @@ -354,14 +358,24 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: - type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? type->dimx : 4; + type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? type->e.numeric.dimx : 4; break;
case HLSL_CLASS_MATRIX: if (hlsl_type_is_row_major(type)) - type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? (4 * (type->dimy - 1) + type->dimx) : (4 * type->dimy); + { + if (is_sm4) + type->reg_size[HLSL_REGSET_NUMERIC] = 4 * (type->e.numeric.dimy - 1) + type->e.numeric.dimx; + else + type->reg_size[HLSL_REGSET_NUMERIC] = 4 * type->e.numeric.dimy; + } else - type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? (4 * (type->dimx - 1) + type->dimy) : (4 * type->dimx); + { + if (is_sm4) + type->reg_size[HLSL_REGSET_NUMERIC] = 4 * (type->e.numeric.dimx - 1) + type->e.numeric.dimy; + else + type->reg_size[HLSL_REGSET_NUMERIC] = 4 * type->e.numeric.dimx; + } break;
case HLSL_CLASS_ARRAY: @@ -386,7 +400,6 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type { unsigned int i;
- type->dimx = 0; for (i = 0; i < type->e.record.field_count; ++i) { struct hlsl_struct_field *field = &type->e.record.fields[i]; @@ -398,8 +411,6 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type field->reg_offset[k] = type->reg_size[k]; type->reg_size[k] += field->type->reg_size[k]; } - - type->dimx += field->type->dimx * field->type->dimy * hlsl_get_multiarray_size(field->type); } break; } @@ -434,6 +445,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type case HLSL_CLASS_HULL_SHADER: case HLSL_CLASS_GEOMETRY_SHADER: case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: case HLSL_CLASS_NULL: break; } @@ -481,8 +493,8 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e } type->class = type_class; type->e.numeric.type = base_type; - type->dimx = dimx; - type->dimy = dimy; + type->e.numeric.dimx = dimx; + type->e.numeric.dimy = dimy; hlsl_type_calculate_reg_size(ctx, type);
list_add_tail(&ctx->types, &type->entry); @@ -525,6 +537,7 @@ static bool type_is_single_component(const struct hlsl_type *type) case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VOID: + case HLSL_CLASS_STREAM_OUTPUT: break; } vkd3d_unreachable(); @@ -549,18 +562,19 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, switch (type->class) { case HLSL_CLASS_VECTOR: - VKD3D_ASSERT(index < type->dimx); + VKD3D_ASSERT(index < type->e.numeric.dimx); *type_ptr = hlsl_get_scalar_type(ctx, type->e.numeric.type); *index_ptr = 0; return index;
case HLSL_CLASS_MATRIX: { - unsigned int y = index / type->dimx, x = index % type->dimx; + unsigned int y = index / type->e.numeric.dimx, x = index % type->e.numeric.dimx; bool row_major = hlsl_type_is_row_major(type);
- VKD3D_ASSERT(index < type->dimx * type->dimy); - *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type, row_major ? type->dimx : type->dimy); + VKD3D_ASSERT(index < type->e.numeric.dimx * type->e.numeric.dimy); + *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type, + row_major ? type->e.numeric.dimx : type->e.numeric.dimy); *index_ptr = row_major ? x : y; return row_major ? y : x; } @@ -680,6 +694,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty case HLSL_CLASS_SCALAR: case HLSL_CLASS_CONSTANT_BUFFER: case HLSL_CLASS_NULL: + case HLSL_CLASS_STREAM_OUTPUT: vkd3d_unreachable(); } type = next_type; @@ -857,9 +872,9 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co
case HLSL_CLASS_MATRIX: if (hlsl_type_is_row_major(type)) - return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx); + return hlsl_get_vector_type(ctx, type->e.numeric.type, type->e.numeric.dimx); else - return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimy); + return hlsl_get_vector_type(ctx, type->e.numeric.type, type->e.numeric.dimy);
case HLSL_CLASS_ARRAY: return type->e.array.type; @@ -877,7 +892,8 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co } }
-struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, unsigned int array_size) +struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, + unsigned int array_size, enum hlsl_array_type array_type) { struct hlsl_type *type;
@@ -888,8 +904,7 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba type->modifiers = basic_type->modifiers; type->e.array.elements_count = array_size; type->e.array.type = basic_type; - type->dimx = basic_type->dimx; - type->dimy = basic_type->dimy; + type->e.array.array_type = array_type; type->sampler_dim = basic_type->sampler_dim; hlsl_type_calculate_reg_size(ctx, type);
@@ -898,6 +913,22 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba return type; }
+struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, + enum hlsl_so_object_type so_type, struct hlsl_type *data_type) +{ + struct hlsl_type *type; + + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + type->class = HLSL_CLASS_STREAM_OUTPUT; + type->e.so.so_type = so_type; + type->e.so.type = data_type; + + list_add_tail(&ctx->types, &type->entry); + + return type; +} + struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct hlsl_struct_field *fields, size_t field_count) { @@ -907,7 +938,6 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, return NULL; type->class = HLSL_CLASS_STRUCT; type->name = name; - type->dimy = 1; type->e.record.fields = fields; type->e.record.field_count = field_count; hlsl_type_calculate_reg_size(ctx, type); @@ -925,8 +955,6 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; type->class = HLSL_CLASS_TEXTURE; - type->dimx = 4; - type->dimy = 1; type->sampler_dim = dim; type->e.resource.format = format; type->sample_count = sample_count; @@ -943,8 +971,6 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; type->class = HLSL_CLASS_UAV; - type->dimx = format->dimx; - type->dimy = 1; type->sampler_dim = dim; type->e.resource.format = format; type->e.resource.rasteriser_ordered = rasteriser_ordered; @@ -960,7 +986,6 @@ struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *forma if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; type->class = HLSL_CLASS_CONSTANT_BUFFER; - type->dimy = 1; type->e.resource.format = format; hlsl_type_calculate_reg_size(ctx, type); list_add_tail(&ctx->types, &type->entry); @@ -1046,7 +1071,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: case HLSL_CLASS_MATRIX: - return type->dimx * type->dimy; + return type->e.numeric.dimx * type->e.numeric.dimy;
case HLSL_CLASS_STRUCT: { @@ -1086,6 +1111,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VOID: + case HLSL_CLASS_STREAM_OUTPUT: break; }
@@ -1110,9 +1136,9 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 if ((t1->modifiers & HLSL_MODIFIER_ROW_MAJOR) != (t2->modifiers & HLSL_MODIFIER_ROW_MAJOR)) return false; - if (t1->dimx != t2->dimx) + if (t1->e.numeric.dimx != t2->e.numeric.dimx) return false; - if (t1->dimy != t2->dimy) + if (t1->e.numeric.dimy != t2->e.numeric.dimy) return false; return true;
@@ -1149,6 +1175,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2
case HLSL_CLASS_ARRAY: return t1->e.array.elements_count == t2->e.array.elements_count + && t1->e.array.array_type == t2->e.array.array_type && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type);
case HLSL_CLASS_TECHNIQUE: @@ -1157,6 +1184,11 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 case HLSL_CLASS_CONSTANT_BUFFER: return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format);
+ case HLSL_CLASS_STREAM_OUTPUT: + if (t1->e.so.so_type != t2->e.so.so_type) + return false; + return hlsl_types_are_equal(t1->e.so.type, t2->e.so.type); + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: @@ -1198,8 +1230,6 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, } } type->class = old->class; - type->dimx = old->dimx; - type->dimy = old->dimy; type->modifiers = old->modifiers | modifiers; if (!(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK)) type->modifiers |= default_majority; @@ -1212,6 +1242,8 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: case HLSL_CLASS_MATRIX: + type->e.numeric.dimx = old->e.numeric.dimx; + type->e.numeric.dimy = old->e.numeric.dimy; type->e.numeric.type = old->e.numeric.type; break;
@@ -1223,6 +1255,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, return NULL; } type->e.array.elements_count = old->e.array.elements_count; + type->e.array.array_type = old->e.array.array_type; break;
case HLSL_CLASS_STRUCT: @@ -1471,7 +1504,7 @@ struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hls hlsl_src_from_node(&store->rhs, rhs);
if (!writemask && type_is_single_reg(rhs->data_type)) - writemask = (1 << rhs->data_type->dimx) - 1; + writemask = (1 << rhs->data_type->e.numeric.dimx) - 1; store->writemask = writemask;
return &store->node; @@ -1498,7 +1531,7 @@ bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, hlsl_src_from_node(&store->rhs, rhs);
if (type_is_single_reg(rhs->data_type)) - store->writemask = (1 << rhs->data_type->dimx) - 1; + store->writemask = (1 << rhs->data_type->e.numeric.dimx) - 1;
hlsl_block_add_instr(block, &store->node);
@@ -1695,22 +1728,6 @@ struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node * return &s->node; }
-struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, - struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_vsir_instruction_ref *vsir_instr; - - if (!(vsir_instr = hlsl_alloc(ctx, sizeof(*vsir_instr)))) - return NULL; - init_node(&vsir_instr->node, HLSL_IR_VSIR_INSTRUCTION_REF, type, loc); - vsir_instr->vsir_instr_idx = vsir_instr_idx; - - if (reg) - vsir_instr->node.reg = *reg; - - return &vsir_instr->node; -} - struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) { @@ -1844,22 +1861,45 @@ struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct return &store->node; }
-struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, +struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int component_count, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) { struct hlsl_ir_swizzle *swizzle; struct hlsl_type *type;
+ VKD3D_ASSERT(val->data_type->class <= HLSL_CLASS_VECTOR); + if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) return NULL; - VKD3D_ASSERT(hlsl_is_numeric_type(val->data_type)); - if (components == 1) + if (component_count > 1) + type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, component_count); + else type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); + init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); + hlsl_src_from_node(&swizzle->val, val); + swizzle->u.vector = s; + + return &swizzle->node; +} + +struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s, + unsigned int component_count, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_swizzle *swizzle; + struct hlsl_type *type; + + VKD3D_ASSERT(val->data_type->class == HLSL_CLASS_MATRIX); + + if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) + return NULL; + if (component_count > 1) + type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, component_count); else - type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, components); + type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); hlsl_src_from_node(&swizzle->val, val); - swizzle->swizzle = s; + swizzle->u.matrix = s; + return &swizzle->node; }
@@ -1996,6 +2036,25 @@ struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const ch return &constant->node; }
+struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, struct hlsl_type *type, + const struct hlsl_deref *dst, struct hlsl_ir_node *coords, struct hlsl_ir_node *cmp_value, + struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_interlocked *interlocked; + + if (!(interlocked = hlsl_alloc(ctx, sizeof(*interlocked)))) + return NULL; + + init_node(&interlocked->node, HLSL_IR_INTERLOCKED, type, loc); + interlocked->op = op; + hlsl_copy_deref(ctx, &interlocked->dst, dst); + hlsl_src_from_node(&interlocked->coords, coords); + hlsl_src_from_node(&interlocked->cmp_value, cmp_value); + hlsl_src_from_node(&interlocked->value, value); + + return &interlocked->node; +} + bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) { struct hlsl_type *type = index->val.node->data_type; @@ -2031,7 +2090,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v if (type->class == HLSL_CLASS_TEXTURE || type->class == HLSL_CLASS_UAV) type = type->e.resource.format; else if (type->class == HLSL_CLASS_MATRIX) - type = hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx); + type = hlsl_get_vector_type(ctx, type->e.numeric.type, type->e.numeric.dimx); else type = hlsl_get_element_type_from_path_index(ctx, type, idx);
@@ -2054,8 +2113,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type return &jump->node; }
-struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, +struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter, + struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc) { struct hlsl_ir_loop *loop; @@ -2066,6 +2125,10 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, hlsl_block_init(&loop->body); hlsl_block_add_block(&loop->body, block);
+ hlsl_block_init(&loop->iter); + if (iter) + hlsl_block_add_block(&loop->iter, iter); + loop->unroll_type = unroll_type; loop->unroll_limit = unroll_limit; return &loop->node; @@ -2221,14 +2284,21 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_
static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src) { + struct hlsl_block iter, body; struct hlsl_ir_node *dst; - struct hlsl_block body; + + if (!clone_block(ctx, &iter, &src->iter, map)) + return NULL;
if (!clone_block(ctx, &body, &src->body, map)) + { + hlsl_block_cleanup(&iter); return NULL; + }
- if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) + if (!(dst = hlsl_new_loop(ctx, &iter, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) { + hlsl_block_cleanup(&iter); hlsl_block_cleanup(&body); return NULL; } @@ -2310,8 +2380,12 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_swizzle *src) { - return hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, - map_instr(map, src->val.node), &src->node.loc); + if (src->val.node->data_type->class == HLSL_CLASS_MATRIX) + return hlsl_new_matrix_swizzle(ctx, src->u.matrix, src->node.data_type->e.numeric.dimx, + map_instr(map, src->val.node), &src->node.loc); + else + return hlsl_new_swizzle(ctx, src->u.vector, src->node.data_type->e.numeric.dimx, + map_instr(map, src->val.node), &src->node.loc); }
static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr_map *map, @@ -2325,6 +2399,27 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr return dst; }
+static struct hlsl_ir_node *clone_interlocked(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_interlocked *src) +{ + struct hlsl_ir_interlocked *dst; + + if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) + return NULL; + init_node(&dst->node, HLSL_IR_INTERLOCKED, NULL, &src->node.loc); + dst->op = src->op; + + if (!clone_deref(ctx, map, &dst->dst, &src->dst)) + { + vkd3d_free(dst); + return NULL; + } + clone_src(map, &dst->coords, &src->coords); + clone_src(map, &dst->cmp_value, &src->cmp_value); + clone_src(map, &dst->value, &src->value); + return &dst->node; +} + static struct hlsl_ir_node *clone_compile(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_compile *compile) { @@ -2525,6 +2620,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_SWIZZLE: return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr));
+ case HLSL_IR_INTERLOCKED: + return clone_interlocked(ctx, map, hlsl_ir_interlocked(instr)); + case HLSL_IR_COMPILE: return clone_compile(ctx, map, hlsl_ir_compile(instr));
@@ -2533,9 +2631,6 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx,
case HLSL_IR_STATEBLOCK_CONSTANT: return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); - - case HLSL_IR_VSIR_INSTRUCTION_REF: - vkd3d_unreachable(); }
vkd3d_unreachable(); @@ -2693,10 +2788,8 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha return NULL; }
-struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type) +static void hlsl_dump_type(struct vkd3d_string_buffer *buffer, const struct hlsl_type *type) { - struct vkd3d_string_buffer *string, *inner_string; - static const char *const base_types[] = { [HLSL_TYPE_FLOAT] = "float", @@ -2720,121 +2813,126 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru [HLSL_SAMPLER_DIM_CUBEARRAY] = "CubeArray", };
- if (!(string = hlsl_get_string_buffer(ctx))) - return NULL; - if (type->name) { - vkd3d_string_buffer_printf(string, "%s", type->name); - return string; + vkd3d_string_buffer_printf(buffer, "%s", type->name); + return; }
switch (type->class) { case HLSL_CLASS_SCALAR: VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s", base_types[type->e.numeric.type]); - return string; + vkd3d_string_buffer_printf(buffer, "%s", base_types[type->e.numeric.type]); + return;
case HLSL_CLASS_VECTOR: VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s%u", base_types[type->e.numeric.type], type->dimx); - return string; + vkd3d_string_buffer_printf(buffer, "%s%u", base_types[type->e.numeric.type], type->e.numeric.dimx); + return;
case HLSL_CLASS_MATRIX: VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); - return string; + vkd3d_string_buffer_printf(buffer, "%s%ux%u", base_types[type->e.numeric.type], + type->e.numeric.dimy, type->e.numeric.dimx); + return;
case HLSL_CLASS_ARRAY: - { - const struct hlsl_type *t; - - for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) - ; - - if ((inner_string = hlsl_type_to_string(ctx, t))) + if (hlsl_type_is_patch_array(type)) { - vkd3d_string_buffer_printf(string, "%s", inner_string->buffer); - hlsl_release_string_buffer(ctx, inner_string); + if (type->e.array.array_type == HLSL_ARRAY_PATCH_INPUT) + vkd3d_string_buffer_printf(buffer, "InputPatch<"); + else + vkd3d_string_buffer_printf(buffer, "OutputPatch<"); + hlsl_dump_type(buffer, type->e.array.type); + vkd3d_string_buffer_printf(buffer, ", %u>", type->e.array.elements_count); } - - for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) + else { - if (t->e.array.elements_count == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) - vkd3d_string_buffer_printf(string, "[]"); - else - vkd3d_string_buffer_printf(string, "[%u]", t->e.array.elements_count); + const struct hlsl_type *t; + + for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) + ; + + hlsl_dump_type(buffer, t); + for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) + { + if (t->e.array.elements_count == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) + vkd3d_string_buffer_printf(buffer, "[]"); + else + vkd3d_string_buffer_printf(buffer, "[%u]", t->e.array.elements_count); + } } - return string; - } + return;
case HLSL_CLASS_STRUCT: - vkd3d_string_buffer_printf(string, "<anonymous struct>"); - return string; + vkd3d_string_buffer_printf(buffer, "<anonymous struct>"); + return;
case HLSL_CLASS_TEXTURE: if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) { - vkd3d_string_buffer_printf(string, "ByteAddressBuffer"); - return string; + vkd3d_string_buffer_printf(buffer, "ByteAddressBuffer"); + return; }
if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) { - vkd3d_string_buffer_printf(string, "Texture"); - return string; + vkd3d_string_buffer_printf(buffer, "Texture"); + return; }
VKD3D_ASSERT(hlsl_is_numeric_type(type->e.resource.format)); VKD3D_ASSERT(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) { - vkd3d_string_buffer_printf(string, "Buffer"); + vkd3d_string_buffer_printf(buffer, "Buffer<"); } else { VKD3D_ASSERT(type->sampler_dim < ARRAY_SIZE(dimensions)); - vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); - } - if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) - { - vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); - hlsl_release_string_buffer(ctx, inner_string); + vkd3d_string_buffer_printf(buffer, "Texture%s<", dimensions[type->sampler_dim]); } - return string; + hlsl_dump_type(buffer, type->e.resource.format); + vkd3d_string_buffer_printf(buffer, ">"); + return;
case HLSL_CLASS_UAV: if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) { - vkd3d_string_buffer_printf(string, "RWByteAddressBuffer"); - return string; + vkd3d_string_buffer_printf(buffer, "RWByteAddressBuffer"); + return; } if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) - vkd3d_string_buffer_printf(string, "RWBuffer"); + vkd3d_string_buffer_printf(buffer, "RWBuffer<"); else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - vkd3d_string_buffer_printf(string, "RWStructuredBuffer"); + vkd3d_string_buffer_printf(buffer, "RWStructuredBuffer<"); else - vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]); - if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) - { - vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); - hlsl_release_string_buffer(ctx, inner_string); - } - return string; + vkd3d_string_buffer_printf(buffer, "RWTexture%s<", dimensions[type->sampler_dim]); + hlsl_dump_type(buffer, type->e.resource.format); + vkd3d_string_buffer_printf(buffer, ">"); + return;
case HLSL_CLASS_CONSTANT_BUFFER: - vkd3d_string_buffer_printf(string, "ConstantBuffer"); - if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) - { - vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); - hlsl_release_string_buffer(ctx, inner_string); - } - return string; + vkd3d_string_buffer_printf(buffer, "ConstantBuffer<"); + hlsl_dump_type(buffer, type->e.resource.format); + vkd3d_string_buffer_printf(buffer, ">"); + return;
case HLSL_CLASS_ERROR: - vkd3d_string_buffer_printf(string, "<error type>"); - return string; + vkd3d_string_buffer_printf(buffer, "<error type>"); + return; + + case HLSL_CLASS_STREAM_OUTPUT: + if (type->e.so.so_type == HLSL_STREAM_OUTPUT_POINT_STREAM) + vkd3d_string_buffer_printf(buffer, "PointStream<"); + else if (type->e.so.so_type == HLSL_STREAM_OUTPUT_LINE_STREAM) + vkd3d_string_buffer_printf(buffer, "LineStream<"); + else + vkd3d_string_buffer_printf(buffer, "TriangleStream<"); + hlsl_dump_type(buffer, type->e.so.type); + vkd3d_string_buffer_printf(buffer, ">"); + return;
case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: @@ -2857,8 +2955,17 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru break; }
- vkd3d_string_buffer_printf(string, "<unexpected type>"); - return string; + vkd3d_string_buffer_printf(buffer, "<unexpected type>"); +} + +struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type) +{ + struct vkd3d_string_buffer *buffer; + + if (!(buffer = hlsl_get_string_buffer(ctx))) + return NULL; + hlsl_dump_type(buffer, type); + return buffer; }
struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, @@ -2964,11 +3071,11 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) [HLSL_IR_STORE ] = "HLSL_IR_STORE", [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", + [HLSL_IR_INTERLOCKED ] = "HLSL_IR_INTERLOCKED",
[HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", - [HLSL_IR_VSIR_INSTRUCTION_REF] = "HLSL_IR_VSIR_INSTRUCTION_REF", };
if (type >= ARRAY_SIZE(names)) @@ -3022,7 +3129,8 @@ static void dump_ir_var(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer vkd3d_string_buffer_printf(buffer, "%s ", string->buffer); hlsl_release_string_buffer(ctx, string); } - vkd3d_string_buffer_printf(buffer, "%s %s", debug_hlsl_type(ctx, var->data_type), var->name); + hlsl_dump_type(buffer, var->data_type); + vkd3d_string_buffer_printf(buffer, " %s", var->name); if (var->semantic.name) vkd3d_string_buffer_printf(buffer, " : %s%u", var->semantic.name, var->semantic.index); } @@ -3103,42 +3211,36 @@ const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size) return vkd3d_dbg_sprintf(".%s", string); }
-static void dump_ir_call(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_call *call) +void hlsl_dump_ir_function_decl(struct hlsl_ctx *ctx, + struct vkd3d_string_buffer *buffer, const struct hlsl_ir_function_decl *f) { - const struct hlsl_ir_function_decl *decl = call->decl; - struct vkd3d_string_buffer *string; size_t i;
- if (!(string = hlsl_type_to_string(ctx, decl->return_type))) - return; - - vkd3d_string_buffer_printf(buffer, "call %s %s(", string->buffer, decl->func->name); - hlsl_release_string_buffer(ctx, string); - - for (i = 0; i < decl->parameters.count; ++i) + hlsl_dump_type(buffer, f->return_type); + vkd3d_string_buffer_printf(buffer, " %s(", f->func->name); + for (i = 0; i < f->parameters.count; ++i) { - const struct hlsl_ir_var *param = decl->parameters.vars[i]; - - if (!(string = hlsl_type_to_string(ctx, param->data_type))) - return; - if (i) vkd3d_string_buffer_printf(buffer, ", "); - vkd3d_string_buffer_printf(buffer, "%s", string->buffer); - - hlsl_release_string_buffer(ctx, string); + dump_ir_var(ctx, buffer, f->parameters.vars[i]); } vkd3d_string_buffer_printf(buffer, ")"); }
+static void dump_ir_call(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_call *call) +{ + vkd3d_string_buffer_printf(buffer, "call "); + hlsl_dump_ir_function_decl(ctx, buffer, call->decl); +} + static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_constant *constant) { struct hlsl_type *type = constant->node.data_type; unsigned int x;
- if (type->dimx != 1) + if (type->e.numeric.dimx != 1) vkd3d_string_buffer_printf(buffer, "{"); - for (x = 0; x < type->dimx; ++x) + for (x = 0; x < type->e.numeric.dimx; ++x) { const union hlsl_constant_value_component *value = &constant->value.u[x];
@@ -3164,12 +3266,9 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl case HLSL_TYPE_UINT: vkd3d_string_buffer_printf(buffer, "%u ", value->u); break; - - default: - vkd3d_unreachable(); } } - if (type->dimx != 1) + if (type->e.numeric.dimx != 1) vkd3d_string_buffer_printf(buffer, "}"); }
@@ -3198,16 +3297,15 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_F32TOF16] = "f32tof16", [HLSL_OP1_FLOOR] = "floor", [HLSL_OP1_FRACT] = "fract", + [HLSL_OP1_ISINF] = "isinf", [HLSL_OP1_LOG2] = "log2", [HLSL_OP1_LOGIC_NOT] = "!", [HLSL_OP1_NEG] = "-", - [HLSL_OP1_NRM] = "nrm", [HLSL_OP1_RCP] = "rcp", [HLSL_OP1_REINTERPRET] = "reinterpret", [HLSL_OP1_ROUND] = "round", [HLSL_OP1_RSQ] = "rsq", [HLSL_OP1_SAT] = "sat", - [HLSL_OP1_SIGN] = "sign", [HLSL_OP1_SIN] = "sin", [HLSL_OP1_SIN_REDUCED] = "sin_reduced", [HLSL_OP1_SQRT] = "sqrt", @@ -3217,7 +3315,6 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP2_BIT_AND] = "&", [HLSL_OP2_BIT_OR] = "|", [HLSL_OP2_BIT_XOR] = "^", - [HLSL_OP2_CRS] = "crs", [HLSL_OP2_DIV] = "/", [HLSL_OP2_DOT] = "dot", [HLSL_OP2_EQUAL] = "==", @@ -3398,15 +3495,17 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls unsigned int i;
dump_src(buffer, &swizzle->val); - if (swizzle->val.node->data_type->dimy > 1) + if (swizzle->val.node->data_type->e.numeric.dimy > 1) { vkd3d_string_buffer_printf(buffer, "."); - for (i = 0; i < swizzle->node.data_type->dimx; ++i) - vkd3d_string_buffer_printf(buffer, "_m%u%u", (swizzle->swizzle >> i * 8) & 0xf, (swizzle->swizzle >> (i * 8 + 4)) & 0xf); + for (i = 0; i < swizzle->node.data_type->e.numeric.dimx; ++i) + vkd3d_string_buffer_printf(buffer, "_m%u%u", + swizzle->u.matrix.components[i].y, swizzle->u.matrix.components[i].x); } else { - vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_swizzle(swizzle->swizzle, swizzle->node.data_type->dimx)); + vkd3d_string_buffer_printf(buffer, "%s", + debug_hlsl_swizzle(swizzle->u.vector, swizzle->node.data_type->e.numeric.dimx)); } }
@@ -3418,6 +3517,35 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ vkd3d_string_buffer_printf(buffer, "]"); }
+static void dump_ir_interlocked(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_interlocked *interlocked) +{ + static const char *const op_names[] = + { + [HLSL_INTERLOCKED_ADD] = "add", + [HLSL_INTERLOCKED_AND] = "and", + [HLSL_INTERLOCKED_CMP_EXCH] = "cmp_exch", + [HLSL_INTERLOCKED_EXCH] = "exch", + [HLSL_INTERLOCKED_MAX] = "max", + [HLSL_INTERLOCKED_MIN] = "min", + [HLSL_INTERLOCKED_OR] = "or", + [HLSL_INTERLOCKED_XOR] = "xor", + }; + + VKD3D_ASSERT(interlocked->op < ARRAY_SIZE(op_names)); + vkd3d_string_buffer_printf(buffer, "interlocked_%s(dst = ", op_names[interlocked->op]); + dump_deref(buffer, &interlocked->dst); + vkd3d_string_buffer_printf(buffer, ", coords = "); + dump_src(buffer, &interlocked->coords); + if (interlocked->cmp_value.node) + { + vkd3d_string_buffer_printf(buffer, ", cmp_value = "); + dump_src(buffer, &interlocked->cmp_value); + } + vkd3d_string_buffer_printf(buffer, ", value = "); + dump_src(buffer, &interlocked->value); + vkd3d_string_buffer_printf(buffer, ")"); +} + static void dump_ir_compile(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_compile *compile) { @@ -3551,6 +3679,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); break;
+ case HLSL_IR_INTERLOCKED: + dump_ir_interlocked(buffer, hlsl_ir_interlocked(instr)); + break; + case HLSL_IR_COMPILE: dump_ir_compile(ctx, buffer, hlsl_ir_compile(instr)); break; @@ -3562,11 +3694,6 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, case HLSL_IR_STATEBLOCK_CONSTANT: dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); break; - - case HLSL_IR_VSIR_INSTRUCTION_REF: - vkd3d_string_buffer_printf(buffer, "vsir_program instruction %u", - hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx); - break; } }
@@ -3625,10 +3752,15 @@ void hlsl_dump_var_default_values(const struct hlsl_ir_var *var)
void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) { + const struct hlsl_type *old_type = old->data_type, *new_type = new->data_type; struct hlsl_src *src, *next;
- VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimx == new->data_type->dimx); - VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimy == new->data_type->dimy); + if (hlsl_is_numeric_type(old_type)) + { + VKD3D_ASSERT(hlsl_is_numeric_type(new_type)); + VKD3D_ASSERT(old_type->e.numeric.dimx == new_type->e.numeric.dimx); + VKD3D_ASSERT(old_type->e.numeric.dimy == new_type->e.numeric.dimy); + }
LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry) { @@ -3719,6 +3851,7 @@ static void free_ir_load(struct hlsl_ir_load *load) static void free_ir_loop(struct hlsl_ir_loop *loop) { hlsl_block_cleanup(&loop->body); + hlsl_block_cleanup(&loop->iter); vkd3d_free(loop); }
@@ -3778,6 +3911,15 @@ static void free_ir_index(struct hlsl_ir_index *index) vkd3d_free(index); }
+static void free_ir_interlocked(struct hlsl_ir_interlocked *interlocked) +{ + hlsl_cleanup_deref(&interlocked->dst); + hlsl_src_remove(&interlocked->coords); + hlsl_src_remove(&interlocked->cmp_value); + hlsl_src_remove(&interlocked->value); + vkd3d_free(interlocked); +} + static void free_ir_compile(struct hlsl_ir_compile *compile) { unsigned int i; @@ -3864,6 +4006,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) free_ir_switch(hlsl_ir_switch(node)); break;
+ case HLSL_IR_INTERLOCKED: + free_ir_interlocked(hlsl_ir_interlocked(node)); + break; + case HLSL_IR_COMPILE: free_ir_compile(hlsl_ir_compile(node)); break; @@ -3875,10 +4021,6 @@ void hlsl_free_instr(struct hlsl_ir_node *node) case HLSL_IR_STATEBLOCK_CONSTANT: free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); break; - - case HLSL_IR_VSIR_INSTRUCTION_REF: - vkd3d_free(hlsl_ir_vsir_instruction_ref(node)); - break; } }
@@ -3977,8 +4119,8 @@ void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function
uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask) { + unsigned int src_component = 0; uint32_t ret = 0; - unsigned int i;
/* Leave replicate swizzles alone; some instructions need them. */ if (swizzle == HLSL_SWIZZLE(X, X, X, X) @@ -3987,13 +4129,10 @@ uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask) || swizzle == HLSL_SWIZZLE(W, W, W, W)) return swizzle;
- for (i = 0; i < 4; ++i) + for (unsigned int dst_component = 0; dst_component < 4; ++dst_component) { - if (writemask & (1 << i)) - { - ret |= (swizzle & 3) << (i * 2); - swizzle >>= 2; - } + if (writemask & (1 << dst_component)) + hlsl_swizzle_set_component(&ret, dst_component, hlsl_swizzle_get_component(swizzle, src_component++)); } return ret; } @@ -4046,7 +4185,7 @@ uint32_t hlsl_combine_swizzles(uint32_t first, uint32_t second, unsigned int dim for (i = 0; i < dim; ++i) { unsigned int s = hlsl_swizzle_get_component(second, i); - ret |= hlsl_swizzle_get_component(first, s) << HLSL_SWIZZLE_SHIFT(i); + hlsl_swizzle_set_component(&ret, i, hlsl_swizzle_get_component(first, s)); } return ret; } @@ -4304,7 +4443,7 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) }
ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); - ctx->builtin_types.null = hlsl_new_type(ctx, "NULL", HLSL_CLASS_NULL, HLSL_TYPE_UINT, 1, 1); + ctx->builtin_types.null = hlsl_new_simple_type(ctx, "NULL", HLSL_CLASS_NULL); ctx->builtin_types.string = hlsl_new_simple_type(ctx, "string", HLSL_CLASS_STRING); ctx->builtin_types.error = hlsl_new_simple_type(ctx, "<error type>", HLSL_CLASS_ERROR); hlsl_scope_add_type(ctx->globals, ctx->builtin_types.string); @@ -4437,6 +4576,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil ctx->output_control_point_count = UINT_MAX; ctx->output_primitive = 0; ctx->partitioning = 0; + ctx->input_control_point_count = UINT_MAX;
return true; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index 075c76cb0e2..f614e12036e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -22,7 +22,6 @@
#include "vkd3d_shader_private.h" #include "wine/rbtree.h" -#include "d3dcommon.h" #include "d3dx9shader.h"
/* The general IR structure is inspired by Mesa GLSL hir, even though the code @@ -51,31 +50,17 @@ * DEALINGS IN THE SOFTWARE. */
-#define HLSL_SWIZZLE_X (0u) -#define HLSL_SWIZZLE_Y (1u) -#define HLSL_SWIZZLE_Z (2u) -#define HLSL_SWIZZLE_W (3u) - -#define HLSL_SWIZZLE(x, y, z, w) \ - (((HLSL_SWIZZLE_ ## x) << 0) \ - | ((HLSL_SWIZZLE_ ## y) << 2) \ - | ((HLSL_SWIZZLE_ ## z) << 4) \ - | ((HLSL_SWIZZLE_ ## w) << 6)) - -#define HLSL_SWIZZLE_MASK (0x3u) -#define HLSL_SWIZZLE_SHIFT(idx) (2u * (idx)) +#define HLSL_SWIZZLE VKD3D_SHADER_SWIZZLE
static inline unsigned int hlsl_swizzle_get_component(uint32_t swizzle, unsigned int idx) { - return (swizzle >> HLSL_SWIZZLE_SHIFT(idx)) & HLSL_SWIZZLE_MASK; + return vsir_swizzle_get_component(swizzle, idx); }
-static inline uint32_t vsir_swizzle_from_hlsl(uint32_t swizzle) +static inline void hlsl_swizzle_set_component(uint32_t *swizzle, unsigned int idx, unsigned int component) { - return vkd3d_shader_create_swizzle(hlsl_swizzle_get_component(swizzle, 0), - hlsl_swizzle_get_component(swizzle, 1), - hlsl_swizzle_get_component(swizzle, 2), - hlsl_swizzle_get_component(swizzle, 3)); + *swizzle &= ~(VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(idx)); + *swizzle |= component << VKD3D_SHADER_SWIZZLE_SHIFT(idx); }
enum hlsl_type_class @@ -105,6 +90,7 @@ enum hlsl_type_class HLSL_CLASS_GEOMETRY_SHADER, HLSL_CLASS_CONSTANT_BUFFER, HLSL_CLASS_BLEND_STATE, + HLSL_CLASS_STREAM_OUTPUT, HLSL_CLASS_VOID, HLSL_CLASS_NULL, HLSL_CLASS_ERROR, @@ -142,6 +128,13 @@ enum hlsl_sampler_dim /* NOTE: Remember to update object_methods[] in hlsl.y if this enum is modified. */ };
+enum hlsl_so_object_type +{ + HLSL_STREAM_OUTPUT_POINT_STREAM, + HLSL_STREAM_OUTPUT_LINE_STREAM, + HLSL_STREAM_OUTPUT_TRIANGLE_STREAM, +}; + enum hlsl_regset { HLSL_REGSET_SAMPLERS, @@ -152,6 +145,13 @@ enum hlsl_regset HLSL_REGSET_LAST = HLSL_REGSET_NUMERIC, };
+enum hlsl_array_type +{ + HLSL_ARRAY_GENERIC, + HLSL_ARRAY_PATCH_INPUT, + HLSL_ARRAY_PATCH_OUTPUT, +}; + /* An HLSL source-level data type, including anonymous structs and typedefs. */ struct hlsl_type { @@ -176,16 +176,6 @@ struct hlsl_type * Modifiers that don't fall inside this mask are to be stored in the variable in * hlsl_ir_var.modifiers, or in the struct field in hlsl_ir_field.modifiers. */ uint32_t modifiers; - /* Size of the type values on each dimension. For non-numeric types, they are set for the - * convenience of the sm1/sm4 backends. - * If type is HLSL_CLASS_SCALAR, then both dimx = 1 and dimy = 1. - * If type is HLSL_CLASS_VECTOR, then dimx is the size of the vector, and dimy = 1. - * If type is HLSL_CLASS_MATRIX, then dimx is the number of columns, and dimy the number of rows. - * If type is HLSL_CLASS_ARRAY, then dimx and dimy have the same value as in the type of the array elements. - * If type is HLSL_CLASS_STRUCT, then dimx is the sum of (dimx * dimy) of every component, and dimy = 1. - */ - unsigned int dimx; - unsigned int dimy; /* Sample count for HLSL_SAMPLER_DIM_2DMS or HLSL_SAMPLER_DIM_2DMSARRAY. */ unsigned int sample_count;
@@ -195,6 +185,10 @@ struct hlsl_type struct { enum hlsl_base_type type; + /* For scalars, dimx == dimy == 1. + * For vectors, dimx == vector width; dimy == 1. + * For matrices, dimx == column count; dimy == row count. */ + unsigned int dimx, dimy; } numeric; /* Additional information if type is HLSL_CLASS_STRUCT. */ struct @@ -208,6 +202,7 @@ struct hlsl_type struct hlsl_type *type; /* Array length, or HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT if it is not known yet at parse time. */ unsigned int elements_count; + enum hlsl_array_type array_type; } array; /* Additional information if the class is HLSL_CLASS_TEXTURE or * HLSL_CLASS_UAV. */ @@ -220,6 +215,12 @@ struct hlsl_type } resource; /* Additional field to distinguish object types. Currently used only for technique types. */ unsigned int version; + /* Additional information if type is HLSL_CLASS_STREAM_OUTPUT. */ + struct + { + struct hlsl_type *type; + enum hlsl_so_object_type so_type; + } so; } e;
/* Number of numeric register components used by one value of this type, for each regset. @@ -326,12 +327,11 @@ enum hlsl_ir_node_type HLSL_IR_STORE, HLSL_IR_SWIZZLE, HLSL_IR_SWITCH, + HLSL_IR_INTERLOCKED,
HLSL_IR_COMPILE, HLSL_IR_SAMPLER_STATE, HLSL_IR_STATEBLOCK_CONSTANT, - - HLSL_IR_VSIR_INSTRUCTION_REF, };
/* Common data for every type of IR instruction node. */ @@ -524,6 +524,10 @@ struct hlsl_ir_var * element of a struct, and thus needs to be aligned when packed in the signature. */ bool force_align;
+ /* Whether this is a sampler that was created from the combination of a + * sampler and a texture for SM<4 backwards compatibility. */ + bool is_combined_sampler; + uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; @@ -644,21 +648,30 @@ struct hlsl_ir_if struct hlsl_block else_block; };
-enum hlsl_ir_loop_unroll_type +enum hlsl_loop_unroll_type +{ + HLSL_LOOP_UNROLL, + HLSL_LOOP_FORCE_UNROLL, + HLSL_LOOP_FORCE_LOOP +}; + +enum hlsl_loop_type { - HLSL_IR_LOOP_UNROLL, - HLSL_IR_LOOP_FORCE_UNROLL, - HLSL_IR_LOOP_FORCE_LOOP + HLSL_LOOP_FOR, + HLSL_LOOP_WHILE, + HLSL_LOOP_DO_WHILE };
struct hlsl_ir_loop { struct hlsl_ir_node node; + struct hlsl_block iter; /* loop condition is stored in the body (as "if (!condition) break;") */ struct hlsl_block body; + enum hlsl_loop_type type; unsigned int next_index; /* liveness index of the end of the loop */ unsigned int unroll_limit; - enum hlsl_ir_loop_unroll_type unroll_type; + enum hlsl_loop_unroll_type unroll_type; };
struct hlsl_ir_switch_case @@ -700,16 +713,15 @@ enum hlsl_ir_expr_op HLSL_OP1_F32TOF16, HLSL_OP1_FLOOR, HLSL_OP1_FRACT, + HLSL_OP1_ISINF, HLSL_OP1_LOG2, HLSL_OP1_LOGIC_NOT, HLSL_OP1_NEG, - HLSL_OP1_NRM, HLSL_OP1_RCP, HLSL_OP1_REINTERPRET, HLSL_OP1_ROUND, HLSL_OP1_RSQ, HLSL_OP1_SAT, - HLSL_OP1_SIGN, HLSL_OP1_SIN, HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi], writes to .y */ HLSL_OP1_SQRT, @@ -719,7 +731,6 @@ enum hlsl_ir_expr_op HLSL_OP2_BIT_AND, HLSL_OP2_BIT_OR, HLSL_OP2_BIT_XOR, - HLSL_OP2_CRS, HLSL_OP2_DIV, HLSL_OP2_DOT, HLSL_OP2_EQUAL, @@ -781,7 +792,17 @@ struct hlsl_ir_swizzle { struct hlsl_ir_node node; struct hlsl_src val; - uint32_t swizzle; + union + { + uint32_t vector; + struct hlsl_matrix_swizzle + { + struct + { + uint8_t x, y; + } components[4]; + } matrix; + } u; };
struct hlsl_ir_index @@ -844,6 +865,10 @@ enum hlsl_resource_load_type HLSL_RESOURCE_GATHER_GREEN, HLSL_RESOURCE_GATHER_BLUE, HLSL_RESOURCE_GATHER_ALPHA, + HLSL_RESOURCE_GATHER_CMP_RED, + HLSL_RESOURCE_GATHER_CMP_GREEN, + HLSL_RESOURCE_GATHER_CMP_BLUE, + HLSL_RESOURCE_GATHER_CMP_ALPHA, HLSL_RESOURCE_SAMPLE_INFO, HLSL_RESOURCE_RESINFO, }; @@ -934,14 +959,30 @@ struct hlsl_ir_stateblock_constant char *name; };
-/* A vkd3d_shader_instruction that can be inserted in a hlsl_block. - * Only used for the HLSL IR to vsir translation, might be removed once this translation is complete. */ -struct hlsl_ir_vsir_instruction_ref +enum hlsl_interlocked_op { - struct hlsl_ir_node node; + HLSL_INTERLOCKED_ADD, + HLSL_INTERLOCKED_AND, + HLSL_INTERLOCKED_CMP_EXCH, + HLSL_INTERLOCKED_EXCH, + HLSL_INTERLOCKED_MAX, + HLSL_INTERLOCKED_MIN, + HLSL_INTERLOCKED_OR, + HLSL_INTERLOCKED_XOR, +};
- /* Index to a vkd3d_shader_instruction within a vkd3d_shader_instruction_array in a vsir_program. */ - unsigned int vsir_instr_idx; +/* Represents an interlocked operation. + * + * The data_type of the node indicates whether or not the original value is returned. + * If the original value is not returned, the data_type is set to NULL. + * Otherwise, the data_type is set to the type of the original value. + */ +struct hlsl_ir_interlocked +{ + struct hlsl_ir_node node; + enum hlsl_interlocked_op op; + struct hlsl_deref dst; + struct hlsl_src coords, cmp_value, value; };
struct hlsl_scope @@ -1114,11 +1155,34 @@ struct hlsl_ctx * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ uint32_t thread_count[3];
+ /* Declared information in tessellation shaders. + * + * The following fields are specific to hull shaders: output_control_point_count, + * output_control_point_type, output_primitive, partitioning, and patch_constant_func. + * + * The output_control_point_count and output_control_point_type fields correspond to the return + * type and the "outputcontrolpoints" attribute of a hull shader's control point function, + * respectively. Moreover, if an OutputPatch parameter is declared in the hull shader's patch + * constant function, its type and element count must match these fields. + * + * The input_control_point_count and input_control_point_type fields are specified by the + * InputPatch parameter in hull shaders, or by the _OutputPatch_ parameter in domain + * shaders. + * + * For input_ and output_control_point_count, the value UINT_MAX indicates that the value is + * unknown or not set by the shader. */ enum vkd3d_tessellator_domain domain; unsigned int output_control_point_count; + struct hlsl_type *output_control_point_type; enum vkd3d_shader_tessellator_output_primitive output_primitive; enum vkd3d_shader_tessellator_partitioning partitioning; struct hlsl_ir_function_decl *patch_constant_func; + unsigned int input_control_point_count; + struct hlsl_type *input_control_point_type; + + /* Whether the current function being processed during HLSL codegen is + * the patch constant function in a hull shader. */ + bool is_patch_constant_func;
/* In some cases we generate opcodes by parsing an HLSL function and then * invoking it. If not NULL, this field is the name of the function that we @@ -1241,6 +1305,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); }
+static inline struct hlsl_ir_interlocked *hlsl_ir_interlocked(const struct hlsl_ir_node *node) +{ + VKD3D_ASSERT(node->type == HLSL_IR_INTERLOCKED); + return CONTAINING_RECORD(node, struct hlsl_ir_interlocked, node); +} + static inline struct hlsl_ir_compile *hlsl_ir_compile(const struct hlsl_ir_node *node) { VKD3D_ASSERT(node->type == HLSL_IR_COMPILE); @@ -1259,12 +1329,6 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); }
-static inline struct hlsl_ir_vsir_instruction_ref *hlsl_ir_vsir_instruction_ref(const struct hlsl_ir_node *node) -{ - VKD3D_ASSERT(node->type == HLSL_IR_VSIR_INSTRUCTION_REF); - return CONTAINING_RECORD(node, struct hlsl_ir_vsir_instruction_ref, node); -} - static inline void hlsl_block_init(struct hlsl_block *block) { list_init(&block->instrs); @@ -1436,12 +1500,14 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type); struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false); void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); -bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var); +void hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl);
void hlsl_block_cleanup(struct hlsl_block *block); bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block);
void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); +void hlsl_dump_ir_function_decl(struct hlsl_ctx *ctx, + struct vkd3d_string_buffer *buffer, const struct hlsl_ir_function_decl *f); void hlsl_dump_var_default_values(const struct hlsl_ir_var *var);
bool hlsl_state_block_add_entry(struct hlsl_state_block *state_block, @@ -1492,7 +1558,8 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co
const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type);
-struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, unsigned int array_size); +struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, + unsigned int array_size, enum hlsl_array_type array_type); struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2); struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); @@ -1519,6 +1586,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); +struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, + enum hlsl_so_object_type so_type, struct hlsl_type *type); struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3);
@@ -1550,8 +1619,14 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_ty struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); -struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, struct hlsl_type *type, + const struct hlsl_deref *dst, struct hlsl_ir_node *coords, struct hlsl_ir_node *cmp_value, + struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter, + struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type, + unsigned int unroll_limit, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s, + unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, @@ -1588,9 +1663,6 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector, struct list *cases, const struct vkd3d_shader_location *loc);
-struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, - struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc); - void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, @@ -1619,6 +1691,7 @@ unsigned int hlsl_type_major_size(const struct hlsl_type *type); unsigned int hlsl_type_element_count(const struct hlsl_type *type); bool hlsl_type_is_resource(const struct hlsl_type *type); bool hlsl_type_is_shader(const struct hlsl_type *type); +bool hlsl_type_is_patch_array(const struct hlsl_type *type); unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2);
@@ -1645,24 +1718,35 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); +bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), struct hlsl_block *block, void *context);
D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); -D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); +D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_combined_sampler);
-void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); -int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); +struct extern_resource +{ + /* "var" is only not NULL if this resource is a whole variable, so it may + * be responsible for more than one component. */ + const struct hlsl_ir_var *var; + const struct hlsl_buffer *buffer;
-int tpf_compile(struct vsir_program *program, uint64_t config_flags, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, - struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); + char *name; + bool is_user_packed;
-enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, - unsigned int storage_modifiers); + /* The data type of a single component of the resource. This might be + * different from the data type of the resource itself in 4.0 profiles, + * where an array (or multi-dimensional array) is handled as a single + * resource, unlike in 5.0. */ + struct hlsl_type *component_type; + + enum hlsl_regset regset; + unsigned int id, space, index, bind_count; + + struct vkd3d_shader_location loc; +};
struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl);
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index 8dace11916a..605a9abaa93 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -104,6 +104,8 @@ if {return KW_IF; } in {return KW_IN; } inline {return KW_INLINE; } inout {return KW_INOUT; } +InputPatch {return KW_INPUTPATCH; } +LineStream {return KW_LINESTREAM; } linear {return KW_LINEAR; } matrix {return KW_MATRIX; } namespace {return KW_NAMESPACE; } @@ -111,9 +113,11 @@ nointerpolation {return KW_NOINTERPOLATION; } noperspective {return KW_NOPERSPECTIVE; } NULL {return KW_NULL; } out {return KW_OUT; } +OutputPatch {return KW_OUTPUTPATCH; } packoffset {return KW_PACKOFFSET; } pass {return KW_PASS; } PixelShader {return KW_PIXELSHADER; } +PointStream {return KW_POINTSTREAM; } pixelshader {return KW_PIXELSHADER; } RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } @@ -170,6 +174,7 @@ texture3D {return KW_TEXTURE3D; } TextureCube {return KW_TEXTURECUBE; } textureCUBE {return KW_TEXTURECUBE; } TextureCubeArray {return KW_TEXTURECUBEARRAY; } +TriangleStream {return KW_TRIANGLESTREAM; } true {return KW_TRUE; } typedef {return KW_TYPEDEF; } unsigned {return KW_UNSIGNED; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 60aade732db..7afc9274c2e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -247,18 +247,19 @@ static bool type_contains_only_numerics(const struct hlsl_type *type)
static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) { - if (hlsl_is_numeric_type(src) && src->dimx == 1 && src->dimy == 1 && type_contains_only_numerics(dst)) + if (hlsl_is_numeric_type(src) && src->e.numeric.dimx == 1 && src->e.numeric.dimy == 1 + && type_contains_only_numerics(dst)) return true;
if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX - && src->dimx >= dst->dimx && src->dimy >= dst->dimy) + && src->e.numeric.dimx >= dst->e.numeric.dimx && src->e.numeric.dimy >= dst->e.numeric.dimy) return true;
- if ((src->class == HLSL_CLASS_MATRIX && src->dimx > 1 && src->dimy > 1) + if ((src->class == HLSL_CLASS_MATRIX && src->e.numeric.dimx > 1 && src->e.numeric.dimy > 1) && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) return false;
- if ((dst->class == HLSL_CLASS_MATRIX && dst->dimy > 1) + if ((dst->class == HLSL_CLASS_MATRIX && dst->e.numeric.dimy > 1) && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) return false;
@@ -273,16 +274,16 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ if (hlsl_is_numeric_type(src)) { /* Scalar vars can be converted to any other numeric data type */ - if (src->dimx == 1 && src->dimy == 1) + if (src->e.numeric.dimx == 1 && src->e.numeric.dimy == 1) return true; /* The other way around is true too */ - if (dst->dimx == 1 && dst->dimy == 1) + if (dst->e.numeric.dimx == 1 && dst->e.numeric.dimy == 1) return true;
if (src->class == HLSL_CLASS_MATRIX || dst->class == HLSL_CLASS_MATRIX) { if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX) - return src->dimx >= dst->dimx && src->dimy >= dst->dimy; + return src->e.numeric.dimx >= dst->e.numeric.dimx && src->e.numeric.dimy >= dst->e.numeric.dimy;
/* Matrix-vector conversion is apparently allowed if they have * the same components count, or if the matrix is 1xN or Nx1 @@ -292,8 +293,8 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ if (hlsl_type_component_count(src) == hlsl_type_component_count(dst)) return true;
- if ((src->class == HLSL_CLASS_VECTOR || src->dimx == 1 || src->dimy == 1) && - (dst->class == HLSL_CLASS_VECTOR || dst->dimx == 1 || dst->dimy == 1)) + if ((src->class == HLSL_CLASS_VECTOR || src->e.numeric.dimx == 1 || src->e.numeric.dimy == 1) + && (dst->class == HLSL_CLASS_VECTOR || dst->e.numeric.dimx == 1 || dst->e.numeric.dimy == 1)) return hlsl_type_component_count(src) >= hlsl_type_component_count(dst); }
@@ -301,7 +302,7 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ } else { - return src->dimx >= dst->dimx; + return src->e.numeric.dimx >= dst->e.numeric.dimx; } }
@@ -335,7 +336,7 @@ static void check_condition_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node if (type->class == HLSL_CLASS_ERROR) return;
- if (type->class > HLSL_CLASS_LAST_NUMERIC || type->dimx > 1 || type->dimy > 1) + if (type->class > HLSL_CLASS_LAST_NUMERIC || type->e.numeric.dimx > 1 || type->e.numeric.dimy > 1) { struct vkd3d_string_buffer *string;
@@ -358,79 +359,11 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl if (src_type->class == HLSL_CLASS_NULL) return node;
- if (src_type->class > HLSL_CLASS_VECTOR || dst_type->class > HLSL_CLASS_VECTOR) - { - unsigned int src_comp_count = hlsl_type_component_count(src_type); - unsigned int dst_comp_count = hlsl_type_component_count(dst_type); - struct hlsl_deref var_deref; - bool broadcast, matrix_cast; - struct hlsl_ir_load *load; - struct hlsl_ir_var *var; - unsigned int dst_idx; - - broadcast = hlsl_is_numeric_type(src_type) && src_type->dimx == 1 && src_type->dimy == 1; - matrix_cast = !broadcast && dst_comp_count != src_comp_count - && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX; - VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast); - if (matrix_cast) - { - VKD3D_ASSERT(dst_type->dimx <= src_type->dimx); - VKD3D_ASSERT(dst_type->dimy <= src_type->dimy); - } - - if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, loc))) - return NULL; - hlsl_init_simple_deref_from_var(&var_deref, var); - - for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx) - { - struct hlsl_ir_node *component_load; - struct hlsl_type *dst_comp_type; - struct hlsl_block store_block; - unsigned int src_idx; - - if (broadcast) - { - src_idx = 0; - } - else if (matrix_cast) - { - unsigned int x = dst_idx % dst_type->dimx, y = dst_idx / dst_type->dimx; - - src_idx = y * src_type->dimx + x; - } - else - { - src_idx = dst_idx; - } - - dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); - - if (!(component_load = hlsl_add_load_component(ctx, block, node, src_idx, loc))) - return NULL; - - if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) - return NULL; - hlsl_block_add_instr(block, cast); - - if (!hlsl_new_store_component(ctx, &store_block, &var_deref, dst_idx, cast)) - return NULL; - hlsl_block_add_block(block, &store_block); - } - - if (!(load = hlsl_new_var_load(ctx, var, loc))) - return NULL; - hlsl_block_add_instr(block, &load->node); + if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) + return NULL; + hlsl_block_add_instr(block, cast);
- return &load->node; - } - else - { - if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) - return NULL; - hlsl_block_add_instr(block, cast); - return cast; - } + return cast; }
static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -458,7 +391,9 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct return NULL; }
- if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy && ctx->warn_implicit_truncation) + if (hlsl_is_numeric_type(dst_type) && hlsl_is_numeric_type(src_type) + && dst_type->e.numeric.dimx * dst_type->e.numeric.dimy < src_type->e.numeric.dimx * src_type->e.numeric.dimy + && ctx->warn_implicit_truncation) hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix");
@@ -475,8 +410,12 @@ static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *blo for (i = 0; i < arrays->count; ++i) { if (arrays->sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) + { hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in casts."); - dst_type = hlsl_new_array_type(ctx, dst_type, arrays->sizes[i]); + dst_type = ctx->builtin_types.error; + break; + } + dst_type = hlsl_new_array_type(ctx, dst_type, arrays->sizes[i], HLSL_ARRAY_GENERIC); }
if (instr->data_type->class == HLSL_CLASS_ERROR) @@ -551,13 +490,6 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co return true; }
-enum loop_type -{ - LOOP_FOR, - LOOP_WHILE, - LOOP_DO_WHILE -}; - static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs) { unsigned int i, j; @@ -573,8 +505,8 @@ static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const stru } }
-static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, enum loop_type type, - struct hlsl_block *cond, struct hlsl_block *iter) +static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_loop_type type, struct hlsl_block *cond) { struct hlsl_ir_node *instr, *next;
@@ -584,8 +516,8 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block { struct hlsl_ir_if *iff = hlsl_ir_if(instr);
- resolve_loop_continue(ctx, &iff->then_block, type, cond, iter); - resolve_loop_continue(ctx, &iff->else_block, type, cond, iter); + resolve_loop_continue(ctx, &iff->then_block, type, cond); + resolve_loop_continue(ctx, &iff->else_block, type, cond); } else if (instr->type == HLSL_IR_JUMP) { @@ -595,7 +527,7 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE) continue;
- if (type == LOOP_DO_WHILE) + if (type == HLSL_LOOP_DO_WHILE) { if (!hlsl_clone_block(ctx, &cond_block, cond)) return; @@ -606,13 +538,6 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block } list_move_before(&instr->entry, &cond_block.instrs); } - else if (type == LOOP_FOR) - { - if (!hlsl_clone_block(ctx, &cond_block, iter)) - return; - list_move_before(&instr->entry, &cond_block.instrs); - } - jump->type = HLSL_IR_JUMP_CONTINUE; } } } @@ -674,12 +599,11 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx case HLSL_IR_RESOURCE_LOAD: case HLSL_IR_RESOURCE_STORE: case HLSL_IR_SWITCH: + case HLSL_IR_INTERLOCKED: case HLSL_IR_STATEBLOCK_CONSTANT: hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected literal expression."); break; - case HLSL_IR_VSIR_INSTRUCTION_REF: - vkd3d_unreachable(); } }
@@ -738,11 +662,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str return res.number.u; }
-static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, +static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum hlsl_loop_type type, const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) { - enum hlsl_ir_loop_unroll_type unroll_type = HLSL_IR_LOOP_UNROLL; + enum hlsl_loop_unroll_type unroll_type = HLSL_LOOP_UNROLL; unsigned int i, unroll_limit = 0; struct hlsl_ir_node *loop;
@@ -773,11 +697,11 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, hlsl_block_cleanup(&expr); }
- unroll_type = HLSL_IR_LOOP_FORCE_UNROLL; + unroll_type = HLSL_LOOP_FORCE_UNROLL; } else if (!strcmp(attr->name, "loop")) { - unroll_type = HLSL_IR_LOOP_FORCE_LOOP; + unroll_type = HLSL_LOOP_FORCE_LOOP; } else if (!strcmp(attr->name, "fastopt") || !strcmp(attr->name, "allow_uav_condition")) @@ -790,7 +714,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, } }
- resolve_loop_continue(ctx, body, type, cond, iter); + resolve_loop_continue(ctx, body, type, cond);
if (!init && !(init = make_empty_block(ctx))) goto oom; @@ -798,15 +722,12 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, if (!append_conditional_break(ctx, cond)) goto oom;
- if (iter) - hlsl_block_add_block(body, iter); - - if (type == LOOP_DO_WHILE) + if (type == HLSL_LOOP_DO_WHILE) list_move_tail(&body->instrs, &cond->instrs); else list_move_head(&body->instrs, &cond->instrs);
- if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc))) + if (!(loop = hlsl_new_loop(ctx, iter, body, unroll_type, unroll_limit, loc))) goto oom; hlsl_block_add_instr(init, loop);
@@ -860,6 +781,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod if (value->data_type->class == HLSL_CLASS_MATRIX) { /* Matrix swizzle */ + struct hlsl_matrix_swizzle s; bool m_swizzle; unsigned int inc, x, y;
@@ -888,12 +810,13 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod x = swizzle[i + 2] - '1'; }
- if (x >= value->data_type->dimx || y >= value->data_type->dimy) + if (x >= value->data_type->e.numeric.dimx || y >= value->data_type->e.numeric.dimy) return NULL; - swiz |= (y << 4 | x) << component * 8; + s.components[component].x = x; + s.components[component].y = y; component++; } - return hlsl_new_swizzle(ctx, swiz, component, value, loc); + return hlsl_new_matrix_swizzle(ctx, s, component, value, loc); }
/* Vector swizzle */ @@ -920,10 +843,9 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod break; }
- if (s >= value->data_type->dimx) + if (s >= value->data_type->e.numeric.dimx) return NULL; - swiz |= s << component * 2; - component++; + hlsl_swizzle_set_component(&swiz, component++, s); } if (valid) return hlsl_new_swizzle(ctx, swiz, component, value, loc); @@ -1035,7 +957,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str { unsigned int dim_count = hlsl_sampler_dim_count(expr_type->sampler_dim);
- if (index_type->class > HLSL_CLASS_VECTOR || index_type->dimx != dim_count) + if (index_type->class > HLSL_CLASS_VECTOR || index_type->e.numeric.dimx != dim_count) { struct vkd3d_string_buffer *string;
@@ -1192,9 +1114,11 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in struct fields."); + field->type = ctx->builtin_types.error; + break; }
- field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[k]); + field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[k], HLSL_ARRAY_GENERIC); } }
@@ -1282,9 +1206,15 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in typedefs."); + if (!(type = hlsl_type_clone(ctx, ctx->builtin_types.error, 0, 0))) + { + free_parse_variable_def(v); + ret = false; + } + break; }
- if (!(type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]))) + if (!(type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i], HLSL_ARRAY_GENERIC))) { free_parse_variable_def(v); ret = false; @@ -1325,6 +1255,11 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Parameter '%s' is declared as both "out" and "uniform".", param->name);
+ if ((param->modifiers & HLSL_STORAGE_OUT) && !(param->modifiers & HLSL_STORAGE_IN) + && (param->type->modifiers & HLSL_MODIFIER_CONST)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Parameter '%s' is declared as both "out" and "const".", param->name); + if (param->reg_reservation.offset_type) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "packoffset() is not allowed on function parameters."); @@ -1376,11 +1311,7 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters free_parse_initializer(¶m->initializer); }
- if (!hlsl_add_var(ctx, var, false)) - { - hlsl_free_var(var); - return false; - } + hlsl_add_var(ctx, var);
if (!hlsl_array_reserve(ctx, (void **)¶meters->vars, ¶meters->capacity, parameters->count + 1, sizeof(*parameters->vars))) @@ -1389,7 +1320,7 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters return true; }
-static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *annotations, +static void add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *annotations, struct hlsl_state_block *state_block, const struct vkd3d_shader_location *loc) { struct hlsl_ir_var *var; @@ -1397,7 +1328,7 @@ static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *
type = hlsl_get_type(ctx->globals, "pass", false, false); if (!(var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL))) - return false; + return; var->annotations = annotations;
var->state_blocks = hlsl_alloc(ctx, sizeof(*var->state_blocks)); @@ -1405,21 +1336,10 @@ static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope * var->state_block_count = 1; var->state_block_capacity = 1;
- if (!hlsl_add_var(ctx, var, false)) - { - struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); - - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, - "Identifier "%s" was already declared in this scope.", var->name); - hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, ""%s" was previously declared here.", old->name); - hlsl_free_var(var); - return false; - } - - return true; + hlsl_add_var(ctx, var); }
-static bool add_technique(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *scope, +static void add_technique(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *scope, struct hlsl_scope *annotations, const char *typename, const struct vkd3d_shader_location *loc) { struct hlsl_ir_var *var; @@ -1427,25 +1347,14 @@ static bool add_technique(struct hlsl_ctx *ctx, const char *name, struct hlsl_sc
type = hlsl_get_type(ctx->globals, typename, false, false); if (!(var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL))) - return false; + return; var->scope = scope; var->annotations = annotations;
- if (!hlsl_add_var(ctx, var, false)) - { - struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); - - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, - "Identifier "%s" was already declared in this scope.", var->name); - hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, ""%s" was previously declared here.", old->name); - hlsl_free_var(var); - return false; - } - - return true; + hlsl_add_var(ctx, var); }
-static bool add_effect_group(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *scope, +static void add_effect_group(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *scope, struct hlsl_scope *annotations, const struct vkd3d_shader_location *loc) { struct hlsl_ir_var *var; @@ -1453,22 +1362,11 @@ static bool add_effect_group(struct hlsl_ctx *ctx, const char *name, struct hlsl
type = hlsl_get_type(ctx->globals, "fxgroup", false, false); if (!(var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL))) - return false; + return; var->scope = scope; var->annotations = annotations;
- if (!hlsl_add_var(ctx, var, false)) - { - struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); - - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, - "Identifier "%s" was already declared in this scope.", var->name); - hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, ""%s" was previously declared here.", old->name); - hlsl_free_var(var); - return false; - } - - return true; + hlsl_add_var(ctx, var); }
static bool parse_reservation_index(struct hlsl_ctx *ctx, const char *string, unsigned int bracket_offset, @@ -1580,7 +1478,7 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node * static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) { /* Scalar vars can be converted to pretty much everything */ - if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1)) + if ((t1->e.numeric.dimx == 1 && t1->e.numeric.dimy == 1) || (t2->e.numeric.dimx == 1 && t2->e.numeric.dimy == 1)) return true;
if (t1->class == HLSL_CLASS_VECTOR && t2->class == HLSL_CLASS_VECTOR) @@ -1595,13 +1493,13 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t if (hlsl_type_component_count(t1) == hlsl_type_component_count(t2)) return true;
- return (t1->class == HLSL_CLASS_MATRIX && (t1->dimx == 1 || t1->dimy == 1)) - || (t2->class == HLSL_CLASS_MATRIX && (t2->dimx == 1 || t2->dimy == 1)); + return (t1->class == HLSL_CLASS_MATRIX && (t1->e.numeric.dimx == 1 || t1->e.numeric.dimy == 1)) + || (t2->class == HLSL_CLASS_MATRIX && (t2->e.numeric.dimx == 1 || t2->e.numeric.dimy == 1)); }
/* Both matrices */ - if ((t1->dimx >= t2->dimx && t1->dimy >= t2->dimy) - || (t1->dimx <= t2->dimx && t1->dimy <= t2->dimy)) + if ((t1->e.numeric.dimx >= t2->e.numeric.dimx && t1->e.numeric.dimy >= t2->e.numeric.dimy) + || (t1->e.numeric.dimx <= t2->e.numeric.dimx && t1->e.numeric.dimy <= t2->e.numeric.dimy)) return true; }
@@ -1661,37 +1559,37 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct return false; }
- if (t1->dimx == 1 && t1->dimy == 1) + if (t1->e.numeric.dimx == 1 && t1->e.numeric.dimy == 1) { *type = t2->class; - *dimx = t2->dimx; - *dimy = t2->dimy; + *dimx = t2->e.numeric.dimx; + *dimy = t2->e.numeric.dimy; } - else if (t2->dimx == 1 && t2->dimy == 1) + else if (t2->e.numeric.dimx == 1 && t2->e.numeric.dimy == 1) { *type = t1->class; - *dimx = t1->dimx; - *dimy = t1->dimy; + *dimx = t1->e.numeric.dimx; + *dimy = t1->e.numeric.dimy; } else if (t1->class == HLSL_CLASS_MATRIX && t2->class == HLSL_CLASS_MATRIX) { *type = HLSL_CLASS_MATRIX; - *dimx = min(t1->dimx, t2->dimx); - *dimy = min(t1->dimy, t2->dimy); + *dimx = min(t1->e.numeric.dimx, t2->e.numeric.dimx); + *dimy = min(t1->e.numeric.dimy, t2->e.numeric.dimy); } else { - if (t1->dimx * t1->dimy <= t2->dimx * t2->dimy) + if (t1->e.numeric.dimx * t1->e.numeric.dimy <= t2->e.numeric.dimx * t2->e.numeric.dimy) { *type = t1->class; - *dimx = t1->dimx; - *dimy = t1->dimy; + *dimx = t1->e.numeric.dimx; + *dimy = t1->e.numeric.dimy; } else { *type = t2->class; - *dimx = t2->dimx; - *dimy = t2->dimy; + *dimx = t2->e.numeric.dimx; + *dimy = t2->e.numeric.dimy; } }
@@ -1719,7 +1617,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl return NULL; hlsl_init_simple_deref_from_var(&var_deref, var);
- for (i = 0; i < type->dimy * type->dimx; ++i) + for (i = 0; i < type->e.numeric.dimy * type->e.numeric.dimx; ++i) { struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; struct hlsl_block store_block; @@ -1822,7 +1720,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct return arg;
bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, - arg->data_type->dimx, arg->data_type->dimy); + arg->data_type->e.numeric.dimx, arg->data_type->e.numeric.dimy);
if (!(args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc))) return NULL; @@ -1985,11 +1883,11 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hls }
if (arg1->data_type->class == HLSL_CLASS_SCALAR) - dim = arg2->data_type->dimx; + dim = arg2->data_type->e.numeric.dimx; else if (arg2->data_type->class == HLSL_CLASS_SCALAR) - dim = arg1->data_type->dimx; + dim = arg1->data_type->e.numeric.dimx; else - dim = min(arg1->data_type->dimx, arg2->data_type->dimx); + dim = min(arg1->data_type->e.numeric.dimx, arg2->data_type->e.numeric.dimx);
if (dim == 1) op = HLSL_OP2_MUL; @@ -2092,8 +1990,8 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned { if (*writemask & (1 << i)) { - unsigned int s = (*swizzle >> (i * 2)) & 3; - new_swizzle |= s << (bit++ * 2); + unsigned int s = hlsl_swizzle_get_component(*swizzle, i); + hlsl_swizzle_set_component(&new_swizzle, bit++, s); if (new_writemask & (1 << s)) return false; new_writemask |= 1 << s; @@ -2107,9 +2005,9 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned { for (j = 0; j < width; ++j) { - unsigned int s = (new_swizzle >> (j * 2)) & 3; + unsigned int s = hlsl_swizzle_get_component(new_swizzle, j); if (s == i) - inverted |= j << (bit++ * 2); + hlsl_swizzle_set_component(&inverted, bit++, j); } }
@@ -2119,22 +2017,22 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned return true; }
-static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, unsigned int *ret_width) +static bool invert_swizzle_matrix(const struct hlsl_matrix_swizzle *swizzle, + uint32_t *ret_inverted, unsigned int *writemask, unsigned int *ret_width) { - /* swizzle is 8 bits per component, each component is (from LSB) 4 bits X, then 4 bits Y. - * components are indexed by their sources. i.e. the first component comes from the first - * component of the rhs. */ - unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0; + unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0; + struct hlsl_matrix_swizzle new_swizzle = {0};
/* First, we filter the swizzle to remove components that aren't enabled by writemask. */ for (i = 0; i < 4; ++i) { if (*writemask & (1 << i)) { - unsigned int s = (*swizzle >> (i * 8)) & 0xff; - unsigned int x = s & 0xf, y = (s >> 4) & 0xf; + unsigned int x = swizzle->components[i].x; + unsigned int y = swizzle->components[i].y; unsigned int idx = x + y * 4; - new_swizzle |= s << (bit++ * 8); + + new_swizzle.components[bit++] = swizzle->components[i]; if (new_writemask & (1 << idx)) return false; new_writemask |= 1 << idx; @@ -2142,33 +2040,34 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un } width = bit;
- /* Then we invert the swizzle. The resulting swizzle has 2 bits per component, because it's for the - * incoming vector. */ + /* Then we invert the swizzle. The resulting swizzle uses a uint32_t + * vector format, because it's for the incoming vector. */ bit = 0; for (i = 0; i < 16; ++i) { for (j = 0; j < width; ++j) { - unsigned int s = (new_swizzle >> (j * 8)) & 0xff; - unsigned int x = s & 0xf, y = (s >> 4) & 0xf; + unsigned int x = new_swizzle.components[j].x; + unsigned int y = new_swizzle.components[j].y; unsigned int idx = x + y * 4; if (idx == i) - inverted |= j << (bit++ * 2); + hlsl_swizzle_set_component(&inverted, bit++, j); } }
- *swizzle = inverted; + *ret_inverted = inverted; *writemask = new_writemask; *ret_width = width; return true; }
static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, - enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) + enum parse_assign_op assign_op, struct hlsl_ir_node *rhs, bool is_function_out_arg) { struct hlsl_type *lhs_type = lhs->data_type; unsigned int writemask = 0, width = 0; bool matrix_writemask = false; + bool first_cast = true;
if (lhs->data_type->class == HLSL_CLASS_ERROR || rhs->data_type->class == HLSL_CLASS_ERROR) { @@ -2193,8 +2092,10 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc
if (hlsl_is_numeric_type(lhs_type)) { - writemask = (1 << lhs_type->dimx) - 1; - width = lhs_type->dimx; + unsigned int size = hlsl_type_component_count(lhs_type); + + writemask = (1 << size) - 1; + width = size; }
if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) @@ -2204,35 +2105,59 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc { if (lhs->type == HLSL_IR_EXPR && hlsl_ir_expr(lhs)->op == HLSL_OP1_CAST) { - hlsl_fixme(ctx, &lhs->loc, "Cast on the LHS."); - return false; + struct hlsl_ir_node *cast = lhs; + lhs = hlsl_ir_expr(cast)->operands[0].node; + + if (hlsl_type_component_count(lhs->data_type) != hlsl_type_component_count(cast->data_type)) + { + hlsl_fixme(ctx, &cast->loc, "Size change on the LHS."); + return false; + } + if (hlsl_version_ge(ctx, 4, 0) && (!is_function_out_arg || !first_cast)) + { + hlsl_error(ctx, &cast->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE, + "Base type casts are not allowed on the LHS for profiles >= 4."); + return false; + } + + lhs_type = lhs->data_type; + if (lhs_type->class == HLSL_CLASS_VECTOR || (lhs_type->class == HLSL_CLASS_MATRIX && matrix_writemask)) + lhs_type = hlsl_get_vector_type(ctx, lhs->data_type->e.numeric.type, width); + + first_cast = false; } else if (lhs->type == HLSL_IR_SWIZZLE) { struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); struct hlsl_ir_node *new_swizzle; - uint32_t s = swizzle->swizzle; + uint32_t s;
VKD3D_ASSERT(!matrix_writemask);
if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX) { + struct hlsl_matrix_swizzle ms = swizzle->u.matrix; + if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) { hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); return false; } - if (!invert_swizzle_matrix(&s, &writemask, &width)) + if (!invert_swizzle_matrix(&ms, &s, &writemask, &width)) { hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix."); return false; } matrix_writemask = true; } - else if (!invert_swizzle(&s, &writemask, &width)) + else { - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); - return false; + s = swizzle->u.vector; + if (!invert_swizzle(&s, &writemask, &width)) + { + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); + return false; + } }
if (!(new_swizzle = hlsl_new_swizzle(ctx, s, width, rhs, &swizzle->node.loc))) @@ -2240,6 +2165,7 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc hlsl_block_add_instr(block, new_swizzle);
lhs = swizzle->val.node; + lhs_type = hlsl_get_vector_type(ctx, lhs_type->e.numeric.type, width); rhs = new_swizzle; } else @@ -2249,6 +2175,12 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc } }
+ /* lhs casts could have resulted in a discrepancy between the + * rhs->data_type and the type of the variable that will be ulimately + * stored to. This is corrected. */ + if (!(rhs = add_cast(ctx, block, rhs, lhs_type, &rhs->loc))) + return false; + if (lhs->type == HLSL_IR_INDEX && hlsl_index_chain_has_resource_access(hlsl_ir_index(lhs))) { struct hlsl_ir_node *coords = hlsl_ir_index(lhs)->idx.node; @@ -2275,13 +2207,13 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc
dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim);
- if (width != resource_type->e.resource.format->dimx * resource_type->e.resource.format->dimy) + if (width != resource_type->e.resource.format->e.numeric.dimx * resource_type->e.resource.format->e.numeric.dimy) hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Resource store expressions must write to all components.");
VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); - VKD3D_ASSERT(coords->data_type->dimx == dim_count); + VKD3D_ASSERT(coords->data_type->e.numeric.dimx == dim_count);
if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) { @@ -2298,14 +2230,14 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc
hlsl_init_deref_from_index_chain(ctx, &deref, lhs);
- for (i = 0; i < lhs->data_type->dimy; ++i) + for (i = 0; i < lhs->data_type->e.numeric.dimy; ++i) { - for (j = 0; j < lhs->data_type->dimx; ++j) + for (j = 0; j < lhs->data_type->e.numeric.dimx; ++j) { struct hlsl_ir_node *load; struct hlsl_block store_block; const unsigned int idx = i * 4 + j; - const unsigned int component = i * lhs->data_type->dimx + j; + const unsigned int component = i * lhs->data_type->e.numeric.dimx + j;
if (!(writemask & (1 << idx))) continue; @@ -2335,7 +2267,7 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc
VKD3D_ASSERT(!matrix_writemask);
- for (i = 0; i < mat->data_type->dimx; ++i) + for (i = 0; i < mat->data_type->e.numeric.dimx; ++i) { struct hlsl_ir_node *cell, *load, *store, *c; struct hlsl_deref deref; @@ -2404,7 +2336,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d return false; hlsl_block_add_instr(block, one);
- if (!add_assignment(ctx, block, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) + if (!add_assignment(ctx, block, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one, false)) return false;
if (post) @@ -2621,7 +2553,6 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) bool constant_buffer = false; struct hlsl_ir_var *var; struct hlsl_type *type; - bool local = true; char *var_name; unsigned int i;
@@ -2670,33 +2601,37 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Only innermost array size can be implicit."); - v->initializer.args_count = 0; + type = ctx->builtin_types.error; + break; } else if (elem_components == 0) { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Cannot declare an implicit size array of a size 0 type."); - v->initializer.args_count = 0; + type = ctx->builtin_types.error; + break; } else if (size == 0) { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays need to be initialized."); - v->initializer.args_count = 0; + type = ctx->builtin_types.error; + break; } else if (size % elem_components != 0) { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, "Cannot initialize implicit size array with %u components, expected a multiple of %u.", size, elem_components); - v->initializer.args_count = 0; + type = ctx->builtin_types.error; + break; } else { v->arrays.sizes[i] = size / elem_components; } } - type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); + type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i], HLSL_ARRAY_GENERIC); } }
@@ -2744,11 +2679,15 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "packoffset() is only allowed inside constant buffer declarations."); } + else + { + if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, + "Const variable "%s" is missing an initializer.", var->name); + }
if (ctx->cur_scope == ctx->globals) { - local = false; - if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Variable '%s' is declared as both "uniform" and "static".", var->name); @@ -2811,16 +2750,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) "Static variables cannot have both numeric and resource components."); }
- if (!hlsl_add_var(ctx, var, local)) - { - struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); - - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, - "Variable "%s" was already declared in this scope.", var->name); - hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, ""%s" was previously declared here.", old->name); - hlsl_free_var(var); - return; - } + hlsl_add_var(ctx, var); }
static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var_list) @@ -2908,7 +2838,8 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var v->initializer.args[0] = node_from_block(v->initializer.instrs); }
- initialize_var(ctx, var, &v->initializer, is_default_values_initializer); + if (var->data_type->class != HLSL_CLASS_ERROR) + initialize_var(ctx, var, &v->initializer, is_default_values_initializer);
if (is_default_values_initializer) { @@ -2993,13 +2924,144 @@ static bool func_is_compatible_match(struct hlsl_ctx *ctx, const struct hlsl_ir_ return true; }
+static enum hlsl_base_type hlsl_base_type_class(enum hlsl_base_type t) +{ + switch (t) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_DOUBLE: + return HLSL_TYPE_FLOAT; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return HLSL_TYPE_INT; + + case HLSL_TYPE_BOOL: + return HLSL_TYPE_BOOL; + } + + return 0; +} + +static unsigned int hlsl_base_type_width(enum hlsl_base_type t) +{ + switch (t) + { + case HLSL_TYPE_HALF: + return 16; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + return 32; + + case HLSL_TYPE_DOUBLE: + return 64; + } + + return 0; +} + +static uint32_t get_argument_conversion_mask(const struct hlsl_ir_var *parameter, const struct hlsl_ir_node *arg) +{ + enum + { + COMPONENT_COUNT_WIDENING = 1u << 0, + COMPONENT_TYPE_NARROWING = 1u << 1, + COMPONENT_TYPE_MISMATCH = 1u << 2, + COMPONENT_TYPE_CLASS_MISMATCH = 1u << 3, + COMPONENT_COUNT_NARROWING = 1u << 4, + }; + struct + { + enum hlsl_base_type type; + enum hlsl_base_type class; + unsigned int count, width; + } p, a; + uint32_t mask = 0; + + /* TODO: Non-numeric types. */ + if (!hlsl_is_numeric_type(arg->data_type)) + return 0; + + p.type = parameter->data_type->e.numeric.type; + p.class = hlsl_base_type_class(p.type); + p.count = hlsl_type_component_count(parameter->data_type); + p.width = hlsl_base_type_width(p.type); + + a.type = arg->data_type->e.numeric.type; + a.class = hlsl_base_type_class(a.type); + a.count = hlsl_type_component_count(arg->data_type); + a.width = hlsl_base_type_width(a.type); + + /* Component count narrowing. E.g., passing a float4 argument to a float2 + * or int2 parameter. */ + if (a.count > p.count) + mask |= COMPONENT_COUNT_NARROWING; + /* Different component type classes. E.g., passing an int argument to a + * float parameter. */ + if (a.class != p.class) + mask |= COMPONENT_TYPE_CLASS_MISMATCH; + /* Different component types. E.g., passing an int argument to an uint + * parameter. */ + if (a.type != p.type) + mask |= COMPONENT_TYPE_MISMATCH; + /* Component type narrowing. E.g., passing a float argument to a half + * parameter. */ + if (a.width > p.width) + mask |= COMPONENT_TYPE_NARROWING; + /* Component count widening. E.g., passing an int2 argument to an int4 + * parameter. */ + if (a.count < p.count) + mask |= COMPONENT_COUNT_WIDENING; + + return mask; +} + +static int function_compare(const struct hlsl_ir_function_decl *candidate, + const struct hlsl_ir_function_decl *ref, const struct parse_initializer *args) +{ + uint32_t candidate_mask = 0, ref_mask = 0, c, r; + bool any_worse = false, any_better = false; + unsigned int i; + int ret; + + for (i = 0; i < args->args_count; ++i) + { + candidate_mask |= (c = get_argument_conversion_mask(candidate->parameters.vars[i], args->args[i])); + ref_mask |= (r = get_argument_conversion_mask(ref->parameters.vars[i], args->args[i])); + + if (c > r) + any_worse = true; + else if (c < r) + any_better = true; + } + + /* We consider a candidate better if at least one parameter is a better + * match, and none are a worse match. */ + if ((ret = any_better - any_worse)) + return ret; + /* Otherwise, consider the kind of conversions across all parameters. */ + return vkd3d_u32_compare(ref_mask, candidate_mask); +} + static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, const char *name, const struct parse_initializer *args, bool is_compile, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_function_decl *decl, *compatible_match = NULL; + struct hlsl_ir_function_decl *decl; + struct vkd3d_string_buffer *s; struct hlsl_ir_function *func; struct rb_entry *entry; + int compare; + size_t i; + struct + { + struct hlsl_ir_function_decl **candidates; + size_t count, capacity; + } candidates = {0};
if (!(entry = rb_get(&ctx->functions, name))) return NULL; @@ -3007,18 +3069,58 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx,
LIST_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry) { - if (func_is_compatible_match(ctx, decl, is_compile, args)) + if (!func_is_compatible_match(ctx, decl, is_compile, args)) + continue; + + if (candidates.count) { - if (compatible_match) + compare = function_compare(decl, candidates.candidates[0], args); + + /* The candidate is worse; skip it. */ + if (compare < 0) + continue; + + /* The candidate is better; replace the current candidates. */ + if (compare > 0) { - hlsl_fixme(ctx, loc, "Prioritize between multiple compatible function overloads."); - break; + candidates.candidates[0] = decl; + candidates.count = 1; + continue; + } + } + + if (!(hlsl_array_reserve(ctx, (void **)&candidates.candidates, + &candidates.capacity, candidates.count + 1, sizeof(decl)))) + { + vkd3d_free(candidates.candidates); + return NULL; + } + candidates.candidates[candidates.count++] = decl; + } + + if (!candidates.count) + return NULL; + + if (candidates.count > 1) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_AMBIGUOUS_CALL, "Ambiguous function call."); + if ((s = hlsl_get_string_buffer(ctx))) + { + hlsl_note(ctx, loc, VKD3D_SHADER_LOG_ERROR, "Candidates are:"); + for (i = 0; i < candidates.count; ++i) + { + hlsl_dump_ir_function_decl(ctx, s, candidates.candidates[i]); + hlsl_note(ctx, loc, VKD3D_SHADER_LOG_ERROR, " %s;", s->buffer); + vkd3d_string_buffer_clear(s); } - compatible_match = decl; + hlsl_release_string_buffer(ctx, s); } }
- return compatible_match; + decl = candidates.candidates[0]; + vkd3d_free(candidates.candidates); + + return decl; }
static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) @@ -3050,20 +3152,19 @@ static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, break; arg = args->args[k];
- if (!hlsl_types_are_equal(arg->data_type, param->data_type)) - { - struct hlsl_ir_node *cast; - - if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) - return NULL; - args->args[k] = cast; - arg = cast; - } - if (param->storage_modifiers & HLSL_STORAGE_IN) { struct hlsl_ir_node *store;
+ if (!hlsl_types_are_equal(arg->data_type, param->data_type)) + { + struct hlsl_ir_node *cast; + + if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) + return NULL; + arg = cast; + } + if (!(store = hlsl_new_simple_store(ctx, param, arg))) return NULL; hlsl_block_add_instr(args->instrs, store); @@ -3131,7 +3232,7 @@ static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, return NULL; hlsl_block_add_instr(args->instrs, &load->node);
- if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) + if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node, true)) return NULL; } } @@ -3164,7 +3265,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, if (!type_is_integer(type->e.numeric.type)) return arg;
- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->e.numeric.dimx, type->e.numeric.dimy); return add_implicit_conversion(ctx, params->instrs, arg, type, loc); }
@@ -3203,13 +3304,13 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * if (arg_type->class == HLSL_CLASS_VECTOR) { vectors = true; - dimx = min(dimx, arg_type->dimx); + dimx = min(dimx, arg_type->e.numeric.dimx); } else if (arg_type->class == HLSL_CLASS_MATRIX) { matrices = true; - dimx = min(dimx, arg_type->dimx); - dimy = min(dimy, arg_type->dimy); + dimx = min(dimx, arg_type->e.numeric.dimx); + dimy = min(dimy, arg_type->e.numeric.dimy); } }
@@ -3254,7 +3355,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false; if (type_is_integer(type->e.numeric.type)) - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->e.numeric.dimx, type->e.numeric.dimy);
return convert_args(ctx, params, type, loc); } @@ -3267,7 +3368,7 @@ static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false;
- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy);
return convert_args(ctx, params, type, loc); } @@ -3334,7 +3435,7 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx, static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, const struct hlsl_type *type, enum hlsl_base_type base_type) { - return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); + return hlsl_get_numeric_type(ctx, type->class, base_type, type->e.numeric.dimx, type->e.numeric.dimy); }
static bool add_combine_components(struct hlsl_ctx *ctx, const struct parse_initializer *params, @@ -3855,7 +3956,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) return false;
- dim = min(type->dimx, type->dimy); + dim = min(type->e.numeric.dimx, type->e.numeric.dimy); if (dim == 1) return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc);
@@ -3939,7 +4040,7 @@ static bool intrinsic_dst(struct hlsl_ctx *ctx, const struct parse_initializer * return false; type = params->args[0]->data_type; if (!(type->class == HLSL_CLASS_SCALAR - || (type->class == HLSL_CLASS_VECTOR && type->dimx == 4))) + || (type->class == HLSL_CLASS_VECTOR && type->e.numeric.dimx == 4))) { struct vkd3d_string_buffer *string; if ((string = hlsl_type_to_string(ctx, type))) @@ -3976,7 +4077,7 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, return false; hlsl_block_add_instr(params->instrs, coeff);
- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, arg, loc))) return false;
return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, mul, loc); @@ -4141,6 +4242,19 @@ static bool intrinsic_fwidth(struct hlsl_ctx *ctx, return !!add_user_call(ctx, func, params, false, loc); }
+static bool intrinsic_isinf(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_type *type = params->args[0]->data_type, *bool_type; + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + + bool_type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, + type->e.numeric.dimx, type->e.numeric.dimy); + + args[0] = params->args[0]; + return !!add_expr(ctx, params->instrs, HLSL_OP1_ISINF, args, bool_type, loc); +} + static bool intrinsic_ldexp(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4371,15 +4485,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, if (arg1->data_type->class == HLSL_CLASS_VECTOR) { vect_count++; - cast_type1 = hlsl_get_matrix_type(ctx, base, arg1->data_type->dimx, 1); + cast_type1 = hlsl_get_matrix_type(ctx, base, arg1->data_type->e.numeric.dimx, 1); } if (arg2->data_type->class == HLSL_CLASS_VECTOR) { vect_count++; - cast_type2 = hlsl_get_matrix_type(ctx, base, 1, arg2->data_type->dimx); + cast_type2 = hlsl_get_matrix_type(ctx, base, 1, arg2->data_type->e.numeric.dimx); }
- matrix_type = hlsl_get_matrix_type(ctx, base, cast_type2->dimx, cast_type1->dimy); + matrix_type = hlsl_get_matrix_type(ctx, base, cast_type2->e.numeric.dimx, cast_type1->e.numeric.dimy);
if (vect_count == 0) { @@ -4387,12 +4501,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, } else if (vect_count == 1) { - VKD3D_ASSERT(matrix_type->dimx == 1 || matrix_type->dimy == 1); - ret_type = hlsl_get_vector_type(ctx, base, matrix_type->dimx * matrix_type->dimy); + VKD3D_ASSERT(matrix_type->e.numeric.dimx == 1 || matrix_type->e.numeric.dimy == 1); + ret_type = hlsl_get_vector_type(ctx, base, matrix_type->e.numeric.dimx * matrix_type->e.numeric.dimy); } else { - VKD3D_ASSERT(matrix_type->dimx == 1 && matrix_type->dimy == 1); + VKD3D_ASSERT(matrix_type->e.numeric.dimx == 1 && matrix_type->e.numeric.dimy == 1); ret_type = hlsl_get_scalar_type(ctx, base); }
@@ -4406,23 +4520,23 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, return false; hlsl_init_simple_deref_from_var(&var_deref, var);
- for (i = 0; i < matrix_type->dimx; ++i) + for (i = 0; i < matrix_type->e.numeric.dimx; ++i) { - for (j = 0; j < matrix_type->dimy; ++j) + for (j = 0; j < matrix_type->e.numeric.dimy; ++j) { struct hlsl_ir_node *instr = NULL; struct hlsl_block block;
- for (k = 0; k < cast_type1->dimx && k < cast_type2->dimy; ++k) + for (k = 0; k < cast_type1->e.numeric.dimx && k < cast_type2->e.numeric.dimy; ++k) { struct hlsl_ir_node *value1, *value2, *mul;
if (!(value1 = hlsl_add_load_component(ctx, params->instrs, - cast1, j * cast1->data_type->dimx + k, loc))) + cast1, j * cast1->data_type->e.numeric.dimx + k, loc))) return false;
if (!(value2 = hlsl_add_load_component(ctx, params->instrs, - cast2, k * cast2->data_type->dimx + i, loc))) + cast2, k * cast2->data_type->e.numeric.dimx + i, loc))) return false;
if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) @@ -4439,7 +4553,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, } }
- if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) + if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->e.numeric.dimx + i, instr)) return false; hlsl_block_add_block(params->instrs, &block); } @@ -4632,7 +4746,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, static const struct hlsl_constant_value zero_value;
struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, - arg->data_type->dimx, arg->data_type->dimy); + arg->data_type->e.numeric.dimx, arg->data_type->e.numeric.dimy);
if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type), &zero_value, loc))) return false; @@ -5086,22 +5200,23 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, return true; }
- mat_type = hlsl_get_matrix_type(ctx, arg_type->e.numeric.type, arg_type->dimy, arg_type->dimx); + mat_type = hlsl_get_matrix_type(ctx, arg_type->e.numeric.type, arg_type->e.numeric.dimy, arg_type->e.numeric.dimx);
if (!(var = hlsl_new_synthetic_var(ctx, "transpose", mat_type, loc))) return false; hlsl_init_simple_deref_from_var(&var_deref, var);
- for (i = 0; i < arg_type->dimx; ++i) + for (i = 0; i < arg_type->e.numeric.dimx; ++i) { - for (j = 0; j < arg_type->dimy; ++j) + for (j = 0; j < arg_type->e.numeric.dimy; ++j) { struct hlsl_block block;
- if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) + if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, + j * arg->data_type->e.numeric.dimx + i, loc))) return false;
- if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) + if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->e.numeric.dimx + j, load)) return false; hlsl_block_add_block(params->instrs, &block); } @@ -5131,7 +5246,8 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, struct hlsl_ir_node *arg = params->args[0], *ret, *c, *swizzle; struct hlsl_type *arg_type = arg->data_type;
- if (arg_type->class != HLSL_CLASS_SCALAR && !(arg_type->class == HLSL_CLASS_VECTOR && arg_type->dimx == 4)) + if (arg_type->class != HLSL_CLASS_SCALAR && !(arg_type->class == HLSL_CLASS_VECTOR + && arg_type->e.numeric.dimx == 4)) { struct vkd3d_string_buffer *string;
@@ -5187,6 +5303,185 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, return true; }
+static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc, const char *name) +{ + struct hlsl_ir_node *lhs, *coords, *val, *cmp_val = NULL, *orig_val = NULL; + struct hlsl_ir_node *interlocked, *void_ret; + struct hlsl_type *lhs_type, *val_type; + struct vkd3d_string_buffer *string; + struct hlsl_deref dst_deref; + + if (hlsl_version_lt(ctx, 5, 0)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "Interlocked functions can only be used in shader model 5.0 or higher."); + + if (op != HLSL_INTERLOCKED_CMP_EXCH && op != HLSL_INTERLOCKED_EXCH + && params->args_count != 2 && params->args_count != 3) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Unexpected number of arguments to function '%s': expected 2 or 3, but got %u.", + name, params->args_count); + return false; + } + + lhs = params->args[0]; + lhs_type = lhs->data_type; + + if (op == HLSL_INTERLOCKED_CMP_EXCH) + { + cmp_val = params->args[1]; + val = params->args[2]; + if (params->args_count == 4) + orig_val = params->args[3]; + } + else + { + val = params->args[1]; + if (params->args_count == 3) + orig_val = params->args[2]; + } + + if (lhs_type->class != HLSL_CLASS_SCALAR || (lhs_type->e.numeric.type != HLSL_TYPE_UINT + && lhs_type->e.numeric.type != HLSL_TYPE_INT)) + { + if ((string = hlsl_type_to_string(ctx, lhs_type))) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Unexpected type for argument 0 of '%s': expected 'uint' or 'int', but got '%s'.", + name, string->buffer); + hlsl_release_string_buffer(ctx, string); + } + return false; + } + + /* Interlocked*() functions always take uint for the value parameters, + * except for InterlockedMax()/InterlockedMin(). */ + if (op == HLSL_INTERLOCKED_MAX || op == HLSL_INTERLOCKED_MIN) + { + enum hlsl_base_type val_base_type = val->data_type->e.numeric.type; + + /* Floating values are always cast to signed integers. */ + if (val_base_type == HLSL_TYPE_FLOAT || val_base_type == HLSL_TYPE_HALF || val_base_type == HLSL_TYPE_DOUBLE) + val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_INT); + else + val_type = hlsl_get_scalar_type(ctx, lhs_type->e.numeric.type); + } + else + { + val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT); + } + + if (cmp_val && !(cmp_val = add_implicit_conversion(ctx, params->instrs, cmp_val, val_type, loc))) + return false; + if (!(val = add_implicit_conversion(ctx, params->instrs, val, val_type, loc))) + return false; + + /* TODO: groupshared variables */ + if (lhs->type == HLSL_IR_INDEX && hlsl_index_chain_has_resource_access(hlsl_ir_index(lhs))) + { + if (!hlsl_index_is_resource_access(hlsl_ir_index(lhs))) + { + hlsl_fixme(ctx, &lhs->loc, "Non-direct structured resource interlocked targets."); + return false; + } + + if (!hlsl_init_deref_from_index_chain(ctx, &dst_deref, hlsl_ir_index(lhs)->val.node)) + return false; + coords = hlsl_ir_index(lhs)->idx.node; + + VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); + VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); + + if (hlsl_deref_get_type(ctx, &dst_deref)->class != HLSL_CLASS_UAV) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements."); + return false; + } + } + else + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements."); + return false; + } + + interlocked = hlsl_new_interlocked(ctx, op, orig_val ? lhs_type : NULL, &dst_deref, coords, cmp_val, val, loc); + hlsl_cleanup_deref(&dst_deref); + if (!interlocked) + return false; + hlsl_block_add_instr(params->instrs, interlocked); + + if (orig_val) + { + if (orig_val->data_type->modifiers & HLSL_MODIFIER_CONST) + hlsl_error(ctx, &orig_val->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, + "Output argument to '%s' is const.", name); + + if (!add_assignment(ctx, params->instrs, orig_val, ASSIGN_OP_ASSIGN, interlocked, true)) + return false; + } + + if (!(void_ret = hlsl_new_void_expr(ctx, loc))) + return false; + hlsl_block_add_instr(params->instrs, void_ret); + + return true; +} + +static bool intrinsic_InterlockedAdd(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_ADD, params, loc, "InterlockedAdd"); +} + +static bool intrinsic_InterlockedAnd(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_AND, params, loc, "InterlockedAnd"); +} + +static bool intrinsic_InterlockedCompareExchange(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_CMP_EXCH, params, loc, "InterlockedCompareExchange"); +} + +static bool intrinsic_InterlockedCompareStore(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_CMP_EXCH, params, loc, "InterlockedCompareStore"); +} + +static bool intrinsic_InterlockedExchange(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_EXCH, params, loc, "InterlockedExchange"); +} + +static bool intrinsic_InterlockedMax(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_MAX, params, loc, "InterlockedMax"); +} + +static bool intrinsic_InterlockedMin(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_MIN, params, loc, "InterlockedMin"); +} + +static bool intrinsic_InterlockedOr(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_OR, params, loc, "InterlockedOr"); +} + +static bool intrinsic_InterlockedXor(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_XOR, params, loc, "InterlockedXor"); +} + static const struct intrinsic_function { const char *name; @@ -5200,6 +5495,15 @@ intrinsic_functions[] = /* Note: these entries should be kept in alphabetical order. */ {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, {"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount}, + {"InterlockedAdd", -1, true, intrinsic_InterlockedAdd}, + {"InterlockedAnd", -1, true, intrinsic_InterlockedAnd}, + {"InterlockedCompareExchange", 4, true, intrinsic_InterlockedCompareExchange}, + {"InterlockedCompareStore", 3, true, intrinsic_InterlockedCompareStore}, + {"InterlockedExchange", 3, true, intrinsic_InterlockedExchange}, + {"InterlockedMax", -1, true, intrinsic_InterlockedMax}, + {"InterlockedMin", -1, true, intrinsic_InterlockedMin}, + {"InterlockedOr", -1, true, intrinsic_InterlockedOr}, + {"InterlockedXor", -1, true, intrinsic_InterlockedXor}, {"abs", 1, true, intrinsic_abs}, {"acos", 1, true, intrinsic_acos}, {"all", 1, true, intrinsic_all}, @@ -5236,6 +5540,7 @@ intrinsic_functions[] = {"fmod", 2, true, intrinsic_fmod}, {"frac", 1, true, intrinsic_frac}, {"fwidth", 1, true, intrinsic_fwidth}, + {"isinf", 1, true, intrinsic_isinf}, {"ldexp", 2, true, intrinsic_ldexp}, {"length", 1, true, intrinsic_length}, {"lerp", 3, true, intrinsic_lerp}, @@ -5394,6 +5699,16 @@ static struct hlsl_block *add_shader_compilation(struct hlsl_ctx *ctx, const cha return NULL; }
+ for (unsigned int i = 0; i < args->args_count; ++i) + { + if (args->args[i]->data_type->class == HLSL_CLASS_ERROR) + { + args->instrs->value = ctx->error_instr; + free(args->args); + return args->instrs; + } + } + if (!(call_to_compile = add_user_call(ctx, decl, args, true, loc))) { free_parse_initializer(args); @@ -5447,6 +5762,17 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type struct hlsl_ir_load *load; struct hlsl_ir_var *var;
+ if (!hlsl_is_numeric_type(type)) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Constructor data type %s is not numeric.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return NULL; + } + if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) return NULL;
@@ -5483,6 +5809,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, hlsl_error(ctx, &cond->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Ternary condition type '%s' is not numeric.", string->buffer); hlsl_release_string_buffer(ctx, string); + return false; }
if (first->data_type->class <= HLSL_CLASS_LAST_NUMERIC @@ -5491,21 +5818,22 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, if (!(common_type = get_common_numeric_type(ctx, first, second, &first->loc))) return false;
- if (cond_type->dimx == 1 && cond_type->dimy == 1) + if (cond_type->e.numeric.dimx == 1 && cond_type->e.numeric.dimy == 1) { cond_type = hlsl_get_numeric_type(ctx, common_type->class, - HLSL_TYPE_BOOL, common_type->dimx, common_type->dimy); + HLSL_TYPE_BOOL, common_type->e.numeric.dimx, common_type->e.numeric.dimy); if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) return false; } else { - if (common_type->dimx == 1 && common_type->dimy == 1) + if (common_type->e.numeric.dimx == 1 && common_type->e.numeric.dimy == 1) { common_type = hlsl_get_numeric_type(ctx, cond_type->class, - common_type->e.numeric.type, cond_type->dimx, cond_type->dimy); + common_type->e.numeric.type, cond_type->e.numeric.dimx, cond_type->e.numeric.dimy); } - else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) + else if (cond_type->e.numeric.dimx != common_type->e.numeric.dimx + || cond_type->e.numeric.dimy != common_type->e.numeric.dimy) { /* This condition looks wrong but is correct. * floatN is compatible with float1xN, but not with floatNx1. */ @@ -5523,7 +5851,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, }
cond_type = hlsl_get_numeric_type(ctx, common_type->class, HLSL_TYPE_BOOL, - common_type->dimx, common_type->dimy); + common_type->e.numeric.dimx, common_type->e.numeric.dimy); if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) return false; } @@ -5551,7 +5879,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, }
cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, - cond_type->dimx, cond_type->dimy); + cond_type->e.numeric.dimx, cond_type->e.numeric.dimy); if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) return false;
@@ -5923,7 +6251,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc return false; }
- if (read_channel >= object_type->e.resource.format->dimx) + if (read_channel >= object_type->e.resource.format->e.numeric.dimx) { hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Method %s() requires at least %u channels.", name, read_channel + 1); @@ -5944,6 +6272,87 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc return true; }
+static bool add_gather_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + struct hlsl_resource_load_params load_params = {0}; + unsigned int sampler_dim, offset_dim; + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load; + + sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + + if (!strcmp(name, "GatherCmpGreen")) + load_params.type = HLSL_RESOURCE_GATHER_CMP_GREEN; + else if (!strcmp(name, "GatherCmpBlue")) + load_params.type = HLSL_RESOURCE_GATHER_CMP_BLUE; + else if (!strcmp(name, "GatherCmpAlpha")) + load_params.type = HLSL_RESOURCE_GATHER_CMP_ALPHA; + else + load_params.type = HLSL_RESOURCE_GATHER_CMP_RED; + + if (!strcmp(name, "GatherCmp") || !offset_dim) + { + if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected from 3 to %u, but got %u.", + name, 4 + !!offset_dim, params->args_count); + return false; + } + } + else if (params->args_count < 3 || params->args_count == 6 || params->args_count > 8) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected 3, 4, 5, 7, or 8, but got %u.", + name, params->args_count); + return false; + } + + if (params->args_count == 5 || params->args_count == 8) + { + hlsl_fixme(ctx, loc, "Tiled resource status argument."); + } + else if (offset_dim && params->args_count > 3) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[3], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } + + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_COMPARISON) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, sampler_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of %s(): expected 'SamplerComparisonState', but got '%s'.", + name, string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; + + if (!(load_params.cmp = add_implicit_conversion(ctx, block, params->args[2], + hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) + return false; + + load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->e.numeric.type, 4); + load_params.resource = object; + load_params.sampler = params->args[0]; + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + hlsl_block_add_instr(block, load); + return true; +} + static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *dest, struct hlsl_ir_node *src, unsigned int component, const struct vkd3d_shader_location *loc) { @@ -5955,7 +6364,7 @@ static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_bloc if (!(load = hlsl_add_load_component(ctx, instrs, src, component, loc))) return false;
- if (!add_assignment(ctx, instrs, dest, ASSIGN_OP_ASSIGN, load)) + if (!add_assignment(ctx, instrs, dest, ASSIGN_OP_ASSIGN, load, false)) return false;
return true; @@ -6111,7 +6520,7 @@ static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_bloc return false; hlsl_block_add_instr(block, sample_info);
- if (!add_assignment(ctx, block, args[ARG_SAMPLE_COUNT], ASSIGN_OP_ASSIGN, sample_info)) + if (!add_assignment(ctx, block, args[ARG_SAMPLE_COUNT], ASSIGN_OP_ASSIGN, sample_info, false)) return false; }
@@ -6311,6 +6720,11 @@ texture_methods[] = { "Gather", add_gather_method_call, "00010101001000" }, { "GatherAlpha", add_gather_method_call, "00010101001000" }, { "GatherBlue", add_gather_method_call, "00010101001000" }, + { "GatherCmp", add_gather_cmp_method_call, "00010101001000" }, + { "GatherCmpAlpha", add_gather_cmp_method_call, "00010101001000" }, + { "GatherCmpBlue", add_gather_cmp_method_call, "00010101001000" }, + { "GatherCmpGreen", add_gather_cmp_method_call, "00010101001000" }, + { "GatherCmpRed", add_gather_cmp_method_call, "00010101001000" }, { "GatherGreen", add_gather_method_call, "00010101001000" }, { "GatherRed", add_gather_method_call, "00010101001000" },
@@ -6475,6 +6889,46 @@ static void check_duplicated_switch_cases(struct hlsl_ctx *ctx, const struct hls } }
+static bool add_switch(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct parse_attribute_list *attributes, struct list *cases, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *selector = node_from_block(block); + struct hlsl_ir_node *s; + + if (selector->data_type->class == HLSL_CLASS_ERROR) + { + destroy_switch_cases(cases); + destroy_block(block); + cleanup_parse_attribute_list(attributes); + return true; + } + + if (!(selector = add_implicit_conversion(ctx, block, selector, + hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &selector->loc))) + { + destroy_switch_cases(cases); + destroy_block(block); + cleanup_parse_attribute_list(attributes); + return false; + } + + s = hlsl_new_switch(ctx, selector, cases, loc); + + destroy_switch_cases(cases); + + if (!s) + { + destroy_block(block); + cleanup_parse_attribute_list(attributes); + return false; + } + + hlsl_block_add_instr(block, s); + + cleanup_parse_attribute_list(attributes); + return true; +} + static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, const struct vkd3d_shader_location* loc) { @@ -6553,6 +7007,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_semantic semantic; enum hlsl_buffer_type buffer_type; enum hlsl_sampler_dim sampler_dim; + enum hlsl_so_object_type so_type; + enum hlsl_array_type patch_type; struct hlsl_attribute *attr; struct parse_attribute_list attr_list; struct hlsl_ir_switch_case *switch_case; @@ -6595,16 +7051,20 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_IN %token KW_INLINE %token KW_INOUT +%token KW_INPUTPATCH %token KW_LINEAR +%token KW_LINESTREAM %token KW_MATRIX %token KW_NAMESPACE %token KW_NOINTERPOLATION %token KW_NOPERSPECTIVE %token KW_NULL %token KW_OUT +%token KW_OUTPUTPATCH %token KW_PACKOFFSET %token KW_PASS %token KW_PIXELSHADER +%token KW_POINTSTREAM %token KW_RASTERIZERORDEREDBUFFER %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER %token KW_RASTERIZERORDEREDTEXTURE1D @@ -6654,6 +7114,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_TEXTURE3D %token KW_TEXTURECUBE %token KW_TEXTURECUBEARRAY +%token KW_TRIANGLESTREAM %token KW_TRUE %token KW_TYPEDEF %token KW_UNSIGNED @@ -6781,9 +7242,12 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %type <reg_reservation> packoffset_reservation
%type <sampler_dim> texture_type texture_ms_type uav_type rov_type +%type <patch_type> patch_type
%type <semantic> semantic
+%type <so_type> so_type + %type <state_block> state_block
%type <state_block_index> state_block_index_opt @@ -6832,8 +7296,7 @@ name_opt: pass: KW_PASS name_opt annotations_opt '{' state_block_start state_block '}' { - if (!add_pass(ctx, $2, $3, $6, &@1)) - YYABORT; + add_pass(ctx, $2, $3, $6, &@1); }
annotations_list: @@ -6884,8 +7347,7 @@ technique9: struct hlsl_scope *scope = ctx->cur_scope; hlsl_pop_scope(ctx);
- if (!add_technique(ctx, $2, scope, $3, "technique", &@1)) - YYABORT; + add_technique(ctx, $2, scope, $3, "technique", &@1); }
technique10: @@ -6894,8 +7356,7 @@ technique10: struct hlsl_scope *scope = ctx->cur_scope; hlsl_pop_scope(ctx);
- if (!add_technique(ctx, $2, scope, $3, "technique10", &@1)) - YYABORT; + add_technique(ctx, $2, scope, $3, "technique10", &@1); }
technique11: @@ -6908,8 +7369,7 @@ technique11: hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "The 'technique11' keyword is invalid for this profile.");
- if (!add_technique(ctx, $2, scope, $3, "technique11", &@1)) - YYABORT; + add_technique(ctx, $2, scope, $3, "technique11", &@1); }
global_technique: @@ -6930,8 +7390,7 @@ effect_group: { struct hlsl_scope *scope = ctx->cur_scope; hlsl_pop_scope(ctx); - if (!(add_effect_group(ctx, $2, scope, $3, &@2))) - YYABORT; + add_effect_group(ctx, $2, scope, $3, &@2); }
buffer_declaration: @@ -7400,10 +7859,9 @@ compound_statement: if (!($$ = make_empty_block(ctx))) YYABORT; } - | '{' scope_start statement_list '}' + | '{' statement_list '}' { - hlsl_pop_scope(ctx); - $$ = $3; + $$ = $2; }
scope_start: @@ -7649,11 +8107,7 @@ param_list: { $$ = $1; if (!add_func_parameter(ctx, &$$, &$3, &@3)) - { - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_REDEFINED, - "Parameter "%s" is already declared.", $3.name); YYABORT; - } }
parameter: @@ -7684,8 +8138,11 @@ parameter_decl: { hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in function parameters."); + type = ctx->builtin_types.error; + break; } - type = hlsl_new_array_type(ctx, type, $4.sizes[i]); + + type = hlsl_new_array_type(ctx, type, $4.sizes[i], HLSL_ARRAY_GENERIC); } vkd3d_free($4.sizes);
@@ -7805,6 +8262,20 @@ rov_type: $$ = HLSL_SAMPLER_DIM_3D; }
+so_type: + KW_POINTSTREAM + { + $$ = HLSL_STREAM_OUTPUT_POINT_STREAM; + } + | KW_LINESTREAM + { + $$ = HLSL_STREAM_OUTPUT_LINE_STREAM; + } + | KW_TRIANGLESTREAM + { + $$ = HLSL_STREAM_OUTPUT_TRIANGLE_STREAM; + } + resource_format: var_modifiers type { @@ -7814,6 +8285,16 @@ resource_format: YYABORT; }
+patch_type: + KW_INPUTPATCH + { + $$ = HLSL_ARRAY_PATCH_INPUT; + } + | KW_OUTPUTPATCH + { + $$ = HLSL_ARRAY_PATCH_OUTPUT; + } + type_no_void: KW_VECTOR '<' type ',' C_INTEGER '>' { @@ -7948,6 +8429,24 @@ type_no_void: validate_uav_type(ctx, $1, $3, &@4); $$ = hlsl_new_uav_type(ctx, $1, $3, true); } + | so_type '<' type '>' + { + $$ = hlsl_new_stream_output_type(ctx, $1, $3); + } + | patch_type '<' type ',' C_INTEGER '>' + { + struct hlsl_type *type; + + if ($5 < 1) + { + hlsl_error(ctx, &@5, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, + "Control point size %d is not positive.", $5); + YYABORT; + } + + type = hlsl_new_array_type(ctx, $3, $5, $1); + $$ = hlsl_type_clone(ctx, type, 0, HLSL_MODIFIER_CONST); + } | KW_RWBYTEADDRESSBUFFER { $$ = hlsl_new_uav_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), false); @@ -8088,14 +8587,9 @@ typedef: }
if (modifiers) - { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Storage modifiers are not allowed on typedefs."); - LIST_FOR_EACH_ENTRY_SAFE(v, v_next, $4, struct parse_variable_def, entry) - vkd3d_free(v); - vkd3d_free($4); - YYABORT; - } + if (!add_typedef(ctx, type, $4)) YYABORT; } @@ -8623,7 +9117,11 @@ statement_list: statement: declaration_statement | expr_statement - | compound_statement + | scope_start compound_statement + { + hlsl_pop_scope(ctx); + $$ = $2; + } | jump_statement | selection_statement | loop_statement @@ -8753,25 +9251,25 @@ if_body: loop_statement: attribute_list_optional loop_scope_start KW_WHILE '(' expr ')' statement { - $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3); + $$ = create_loop(ctx, HLSL_LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3); hlsl_pop_scope(ctx); cleanup_parse_attribute_list(&$1); } | attribute_list_optional loop_scope_start KW_DO statement KW_WHILE '(' expr ')' ';' { - $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3); + $$ = create_loop(ctx, HLSL_LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3); hlsl_pop_scope(ctx); cleanup_parse_attribute_list(&$1); } | attribute_list_optional loop_scope_start KW_FOR '(' expr_statement expr_statement expr_optional ')' statement { - $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); + $$ = create_loop(ctx, HLSL_LOOP_FOR, &$1, $5, $6, $7, $9, &@3); hlsl_pop_scope(ctx); cleanup_parse_attribute_list(&$1); } | attribute_list_optional loop_scope_start KW_FOR '(' declaration expr_statement expr_optional ')' statement { - $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); + $$ = create_loop(ctx, HLSL_LOOP_FOR, &$1, $5, $6, $7, $9, &@3); hlsl_pop_scope(ctx); cleanup_parse_attribute_list(&$1); } @@ -8779,33 +9277,10 @@ loop_statement: switch_statement: attribute_list_optional switch_scope_start KW_SWITCH '(' expr ')' '{' switch_cases '}' { - struct hlsl_ir_node *selector = node_from_block($5); - struct hlsl_ir_node *s; - - if (!(selector = add_implicit_conversion(ctx, $5, selector, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &@5))) - { - destroy_switch_cases($8); - destroy_block($5); - cleanup_parse_attribute_list(&$1); - YYABORT; - } - - s = hlsl_new_switch(ctx, selector, $8, &@3); - - destroy_switch_cases($8); - - if (!s) - { - destroy_block($5); - cleanup_parse_attribute_list(&$1); - YYABORT; - } - $$ = $5; - hlsl_block_add_instr($$, s); - + if (!add_switch(ctx, $$, &$1, $8, &@3)) + YYABORT; hlsl_pop_scope(ctx); - cleanup_parse_attribute_list(&$1); }
switch_case: @@ -8979,17 +9454,24 @@ primary_expr: struct hlsl_ir_load *load; struct hlsl_ir_var *var;
- if (!(var = hlsl_get_var(ctx->cur_scope, $1))) + if ((var = hlsl_get_var(ctx->cur_scope, $1))) + { + vkd3d_free($1); + + if (!(load = hlsl_new_var_load(ctx, var, &@1))) + YYABORT; + if (!($$ = make_block(ctx, &load->node))) + YYABORT; + } + else { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Variable "%s" is not defined.", $1); vkd3d_free($1); - YYABORT; + + if (!($$ = make_empty_block(ctx))) + YYABORT; + $$->value = ctx->error_instr; } - vkd3d_free($1); - if (!(load = hlsl_new_var_load(ctx, var, &@1))) - YYABORT; - if (!($$ = make_block(ctx, &load->node))) - YYABORT; } | '(' expr ')' { @@ -9149,23 +9631,8 @@ postfix_expr: | var_modifiers type '(' initializer_expr_list ')' { if ($1) - { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Modifiers are not allowed on constructors."); - free_parse_initializer(&$4); - YYABORT; - } - if (!hlsl_is_numeric_type($2)) - { - struct vkd3d_string_buffer *string; - - if ((string = hlsl_type_to_string(ctx, $2))) - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Constructor data type %s is not numeric.", string->buffer); - hlsl_release_string_buffer(ctx, string); - free_parse_initializer(&$4); - YYABORT; - }
if (!($$ = add_constructor(ctx, $2, &$4, &@2))) { @@ -9233,11 +9700,8 @@ unary_expr: | '(' var_modifiers type arrays ')' unary_expr { if ($2) - { hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Modifiers are not allowed on casts."); - YYABORT; - }
if (!add_explicit_conversion(ctx, $6, $3, &$4, &@3)) { @@ -9381,13 +9845,10 @@ assignment_expr: struct hlsl_ir_node *lhs = node_from_block($1), *rhs = node_from_block($3);
if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) - { hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Statement modifies a const expression."); - YYABORT; - } hlsl_block_add_block($3, $1); destroy_block($1); - if (!add_assignment(ctx, $3, lhs, $2, rhs)) + if (!add_assignment(ctx, $3, lhs, $2, rhs, false)) YYABORT; $$ = $3; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index d11ff481f6b..2afd3e1e1e5 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -19,9 +19,14 @@ */
#include "hlsl.h" +#include "vkd3d_shader_private.h" +#include "d3dcommon.h" #include <stdio.h> #include <math.h>
+/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ +#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 + /* TODO: remove when no longer needed, only used for new_offset_instr_from_deref() */ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_type *type, struct hlsl_ir_node *base_offset, struct hlsl_ir_node *idx, @@ -269,7 +274,15 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls if (ctx->profile->major_version < 4) return true;
- if (type1->dimx != type2->dimx) + if (hlsl_type_is_patch_array(type1)) + { + return hlsl_type_is_patch_array(type2) + && type1->e.array.array_type == type2->e.array.array_type + && type1->e.array.elements_count == type2->e.array.elements_count + && types_are_semantic_equivalent(ctx, type1->e.array.type, type2->e.array.type); + } + + if (type1->e.numeric.dimx != type2->e.numeric.dimx) return false;
return base_type_get_semantic_equivalent(type1->e.numeric.type) @@ -282,15 +295,25 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir { struct hlsl_semantic new_semantic; struct hlsl_ir_var *ext_var; + const char *prefix; char *new_name;
- if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", output ? "output" : "input", semantic->name, index))) + if (hlsl_type_is_patch_array(type)) + prefix = type->e.array.array_type == HLSL_ARRAY_PATCH_INPUT ? "inputpatch" : "outputpatch"; + else + prefix = output ? "output" : "input"; + + if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", prefix, semantic->name, index))) return NULL;
LIST_FOR_EACH_ENTRY(ext_var, &func->extern_vars, struct hlsl_ir_var, extern_entry) { if (!ascii_strcasecmp(ext_var->name, new_name)) { + VKD3D_ASSERT(hlsl_type_is_patch_array(ext_var->data_type) + || ext_var->data_type->class <= HLSL_CLASS_VECTOR); + VKD3D_ASSERT(hlsl_type_is_patch_array(type) || type->class <= HLSL_CLASS_VECTOR); + if (output) { if (index >= semantic->reported_duplicated_output_next_index) @@ -362,7 +385,8 @@ static uint32_t combine_field_storage_modifiers(uint32_t modifiers, uint32_t fie return field_modifiers; }
-static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, +static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, + struct hlsl_block *block, struct hlsl_ir_var *top_var, uint32_t patch_index, struct hlsl_ir_load *lhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) { struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; @@ -396,27 +420,54 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec struct hlsl_ir_var *input; struct hlsl_ir_load *load;
- if (!(input = add_semantic_var(ctx, func, var, vector_type_src, - modifiers, semantic, semantic_index + i, false, force_align, loc))) - return; + if (hlsl_type_is_patch_array(top_var->data_type)) + { + struct hlsl_type *top_type = top_var->data_type; + struct hlsl_type *patch_type; + struct hlsl_deref patch_deref; + struct hlsl_ir_node *idx;
- if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) - return; - list_add_after(&lhs->node.entry, &load->node.entry); + if (!(patch_type = hlsl_new_array_type(ctx, vector_type_src, top_type->e.array.elements_count, + top_type->e.array.array_type))) + return; + + if (!(input = add_semantic_var(ctx, func, var, patch_type, + modifiers, semantic, semantic_index + i, false, force_align, loc))) + return; + hlsl_init_simple_deref_from_var(&patch_deref, input); + + if (!(idx = hlsl_new_uint_constant(ctx, patch_index, &var->loc))) + return; + hlsl_block_add_instr(block, idx); + + if (!(load = hlsl_new_load_index(ctx, &patch_deref, idx, loc))) + return; + hlsl_block_add_instr(block, &load->node); + } + else + { + if (!(input = add_semantic_var(ctx, func, var, vector_type_src, + modifiers, semantic, semantic_index + i, false, force_align, loc))) + return; + + if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) + return; + hlsl_block_add_instr(block, &load->node); + }
if (!(cast = hlsl_new_cast(ctx, &load->node, vector_type_dst, &var->loc))) return; - list_add_after(&load->node.entry, &cast->entry); + hlsl_block_add_instr(block, cast);
if (type->class == HLSL_CLASS_MATRIX) { if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - list_add_after(&cast->entry, &c->entry); + hlsl_block_add_instr(block, c);
if (!(store = hlsl_new_store_index(ctx, &lhs->src, c, cast, 0, &var->loc))) return; - list_add_after(&c->entry, &store->entry); + hlsl_block_add_instr(block, store); } else { @@ -424,14 +475,14 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec
if (!(store = hlsl_new_store_index(ctx, &lhs->src, NULL, cast, 0, &var->loc))) return; - list_add_after(&cast->entry, &store->entry); + hlsl_block_add_instr(block, store); } } }
-static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, - struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, uint32_t modifiers, - struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) +static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, + struct hlsl_block *block, struct hlsl_ir_var *top_var, uint32_t patch_index, struct hlsl_ir_load *lhs, + uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) { struct vkd3d_shader_location *loc = &lhs->node.loc; struct hlsl_type *type = lhs->node.data_type; @@ -455,6 +506,9 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; element_modifiers = modifiers; force_align = true; + + if (hlsl_type_is_patch_array(type)) + patch_index = i; } else { @@ -474,20 +528,20 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx,
if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - list_add_after(&lhs->node.entry, &c->entry); + hlsl_block_add_instr(block, c);
/* This redundant load is expected to be deleted later by DCE. */ if (!(element_load = hlsl_new_load_index(ctx, &lhs->src, c, loc))) return; - list_add_after(&c->entry, &element_load->node.entry); + hlsl_block_add_instr(block, &element_load->node);
- prepend_input_copy_recurse(ctx, func, element_load, element_modifiers, - semantic, elem_semantic_index, force_align); + prepend_input_copy_recurse(ctx, func, block, top_var, patch_index, element_load, + element_modifiers, semantic, elem_semantic_index, force_align); } } else { - prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index, force_align); + prepend_input_copy(ctx, func, block, var, patch_index, lhs, modifiers, semantic, semantic_index, force_align); } }
@@ -496,13 +550,19 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var) { struct hlsl_ir_load *load; + struct hlsl_block block; + + hlsl_block_init(&block);
/* This redundant load is expected to be deleted later by DCE. */ if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; - list_add_head(&func->body.instrs, &load->node.entry); + hlsl_block_add_instr(&block, &load->node); + + prepend_input_copy_recurse(ctx, func, &block, var, 0, load, + var->storage_modifiers, &var->semantic, var->semantic.index, false);
- prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index, false); + list_move_head(&func->body.instrs, &block.instrs); }
static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, @@ -731,6 +791,10 @@ static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *in res = func(ctx, &hlsl_ir_resource_store(instr)->resource, instr); return res;
+ case HLSL_IR_INTERLOCKED: + res = func(ctx, &hlsl_ir_interlocked(instr)->dst, instr); + return res; + default: return false; } @@ -1028,10 +1092,10 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * return true; }
-static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct hlsl_ir_node *index, - const struct vkd3d_shader_location *loc) +static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) { - unsigned int dim_count = index->data_type->dimx; + unsigned int dim_count = index->data_type->e.numeric.dimx; struct hlsl_ir_node *store, *zero; struct hlsl_ir_load *coords_load; struct hlsl_deref coords_deref; @@ -1046,23 +1110,107 @@ static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct h hlsl_init_simple_deref_from_var(&coords_deref, coords); if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, index, (1u << dim_count) - 1, loc))) return NULL; - list_add_after(&index->entry, &store->entry); + hlsl_block_add_instr(block, store);
if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) return NULL; - list_add_after(&store->entry, &zero->entry); + hlsl_block_add_instr(block, zero);
if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, zero, 1u << dim_count, loc))) return NULL; - list_add_after(&zero->entry, &store->entry); + hlsl_block_add_instr(block, store);
if (!(coords_load = hlsl_new_var_load(ctx, coords, loc))) return NULL; - list_add_after(&store->entry, &coords_load->node.entry); + hlsl_block_add_instr(block, &coords_load->node);
return &coords_load->node; }
+static bool lower_complex_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + unsigned int src_comp_count, dst_comp_count; + struct hlsl_type *src_type, *dst_type; + struct hlsl_deref var_deref; + bool broadcast, matrix_cast; + struct hlsl_ir_load *load; + struct hlsl_ir_node *arg; + struct hlsl_ir_var *var; + unsigned int dst_idx; + + if (instr->type != HLSL_IR_EXPR) + return false; + + if (hlsl_ir_expr(instr)->op != HLSL_OP1_CAST) + return false; + + arg = hlsl_ir_expr(instr)->operands[0].node; + dst_type = instr->data_type; + src_type = arg->data_type; + + if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR) + return false; + + src_comp_count = hlsl_type_component_count(src_type); + dst_comp_count = hlsl_type_component_count(dst_type); + broadcast = hlsl_is_numeric_type(src_type) && src_type->e.numeric.dimx == 1 && src_type->e.numeric.dimy == 1; + matrix_cast = !broadcast && dst_comp_count != src_comp_count + && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX; + + VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast); + if (matrix_cast) + { + VKD3D_ASSERT(dst_type->e.numeric.dimx <= src_type->e.numeric.dimx); + VKD3D_ASSERT(dst_type->e.numeric.dimy <= src_type->e.numeric.dimy); + } + + if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, &instr->loc))) + return false; + hlsl_init_simple_deref_from_var(&var_deref, var); + + for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx) + { + struct hlsl_ir_node *component_load, *cast; + struct hlsl_type *dst_comp_type; + struct hlsl_block store_block; + unsigned int src_idx; + + if (broadcast) + { + src_idx = 0; + } + else if (matrix_cast) + { + unsigned int x = dst_idx % dst_type->e.numeric.dimx, y = dst_idx / dst_type->e.numeric.dimx; + + src_idx = y * src_type->e.numeric.dimx + x; + } + else + { + src_idx = dst_idx; + } + + dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); + + if (!(component_load = hlsl_add_load_component(ctx, block, arg, src_idx, &arg->loc))) + return false; + + if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, &arg->loc))) + return false; + hlsl_block_add_instr(block, cast); + + if (!hlsl_new_store_component(ctx, &store_block, &var_deref, dst_idx, cast)) + return false; + hlsl_block_add_block(block, &store_block); + } + + if (!(load = hlsl_new_var_load(ctx, var, &instr->loc))) + return false; + hlsl_block_add_instr(block, &load->node); + + return true; +} + /* hlsl_ir_swizzle nodes that directly point to a matrix value are only a parse-time construct that * represents matrix swizzles (e.g. mat._m01_m23) before we know if they will be used in the lhs of * an assignment or as a value made from different components of the matrix. The former cases should @@ -1075,7 +1223,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins struct hlsl_deref var_deref; struct hlsl_type *matrix_type; struct hlsl_ir_var *var; - unsigned int x, y, k, i; + unsigned int k, i;
if (instr->type != HLSL_IR_SWIZZLE) return false; @@ -1088,14 +1236,12 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins return false; hlsl_init_simple_deref_from_var(&var_deref, var);
- for (i = 0; i < instr->data_type->dimx; ++i) + for (i = 0; i < instr->data_type->e.numeric.dimx; ++i) { struct hlsl_block store_block; struct hlsl_ir_node *load;
- y = (swizzle->swizzle >> (8 * i + 4)) & 0xf; - x = (swizzle->swizzle >> 8 * i) & 0xf; - k = y * matrix_type->dimx + x; + k = swizzle->u.matrix.components[i].y * matrix_type->e.numeric.dimx + swizzle->u.matrix.components[i].x;
if (!(load = hlsl_add_load_component(ctx, block, swizzle->val.node, k, &instr->loc))) return false; @@ -1140,9 +1286,9 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); - VKD3D_ASSERT(coords->data_type->dimx == dim_count); + VKD3D_ASSERT(coords->data_type->e.numeric.dimx == dim_count);
- if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) + if (!(coords = add_zero_mipmap_level(ctx, block, coords, &instr->loc))) return false;
params.type = HLSL_RESOURCE_LOAD; @@ -1176,7 +1322,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; hlsl_init_simple_deref_from_var(&row_deref, var);
- for (i = 0; i < mat->data_type->dimx; ++i) + for (i = 0; i < mat->data_type->e.numeric.dimx; ++i) { struct hlsl_ir_node *c;
@@ -1225,7 +1371,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type;
- if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->dimx == 1) + if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->e.numeric.dimx == 1) { struct hlsl_ir_node *new_cast, *swizzle;
@@ -1236,9 +1382,10 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s return false; hlsl_block_add_instr(block, new_cast);
- if (dst_type->dimx != 1) + if (dst_type->e.numeric.dimx != 1) { - if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), dst_type->dimx, new_cast, &cast->node.loc))) + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), + dst_type->e.numeric.dimx, new_cast, &cast->node.loc))) return false; hlsl_block_add_instr(block, swizzle); } @@ -1358,8 +1505,10 @@ struct copy_propagation_var_def
struct copy_propagation_state { - struct rb_tree var_defs; - struct copy_propagation_state *parent; + struct rb_tree *scope_var_defs; + size_t scope_count, scopes_capacity; + struct hlsl_ir_node *stop; + bool stopped; };
static int copy_propagation_var_def_compare(const void *key, const struct rb_entry *entry) @@ -1381,6 +1530,38 @@ static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *conte vkd3d_free(var_def); }
+static size_t copy_propagation_push_scope(struct copy_propagation_state *state, struct hlsl_ctx *ctx) +{ + if (!(hlsl_array_reserve(ctx, (void **)&state->scope_var_defs, &state->scopes_capacity, + state->scope_count + 1, sizeof(*state->scope_var_defs)))) + return false; + + rb_init(&state->scope_var_defs[state->scope_count++], copy_propagation_var_def_compare); + + return state->scope_count; +} + +static size_t copy_propagation_pop_scope(struct copy_propagation_state *state) +{ + rb_destroy(&state->scope_var_defs[--state->scope_count], copy_propagation_var_def_destroy, NULL); + + return state->scope_count; +} + +static bool copy_propagation_state_init(struct copy_propagation_state *state, struct hlsl_ctx *ctx) +{ + memset(state, 0, sizeof(*state)); + + return copy_propagation_push_scope(state, ctx); +} + +static void copy_propagation_state_destroy(struct copy_propagation_state *state) +{ + while (copy_propagation_pop_scope(state)); + + vkd3d_free(state->scope_var_defs); +} + static struct copy_propagation_value *copy_propagation_get_value_at_time( struct copy_propagation_component_trace *trace, unsigned int time) { @@ -1398,9 +1579,10 @@ static struct copy_propagation_value *copy_propagation_get_value_at_time( static struct copy_propagation_value *copy_propagation_get_value(const struct copy_propagation_state *state, const struct hlsl_ir_var *var, unsigned int component, unsigned int time) { - for (; state; state = state->parent) + for (size_t i = state->scope_count - 1; i < state->scope_count; i--) { - struct rb_entry *entry = rb_get(&state->var_defs, var); + struct rb_tree *tree = &state->scope_var_defs[i]; + struct rb_entry *entry = rb_get(tree, var); if (entry) { struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry); @@ -1426,7 +1608,8 @@ static struct copy_propagation_value *copy_propagation_get_value(const struct co static struct copy_propagation_var_def *copy_propagation_create_var_def(struct hlsl_ctx *ctx, struct copy_propagation_state *state, struct hlsl_ir_var *var) { - struct rb_entry *entry = rb_get(&state->var_defs, var); + struct rb_tree *tree = &state->scope_var_defs[state->scope_count - 1]; + struct rb_entry *entry = rb_get(tree, var); struct copy_propagation_var_def *var_def; unsigned int component_count = hlsl_type_component_count(var->data_type); int res; @@ -1439,7 +1622,7 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h
var_def->var = var;
- res = rb_put(&state->var_defs, var, &var_def->entry); + res = rb_put(tree, var, &var_def->entry); VKD3D_ASSERT(!res);
return var_def; @@ -1596,7 +1779,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count)); return false; } - ret_swizzle |= value->component << HLSL_SWIZZLE_SHIFT(i); + hlsl_swizzle_set_component(&ret_swizzle, i, value->component); }
TRACE("Load from %s[%u-%u]%s propagated as instruction %p%s.\n", @@ -1678,6 +1861,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_GEOMETRY_SHADER: case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: case HLSL_CLASS_NULL: break;
@@ -1719,10 +1903,10 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, return false; load = hlsl_ir_load(swizzle->val.node);
- if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->swizzle, &swizzle->node)) + if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->u.vector, &swizzle->node)) return true;
- if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->swizzle, &swizzle->node)) + if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->u.vector, &swizzle->node)) return true;
return false; @@ -1792,6 +1976,15 @@ static bool copy_propagation_transform_resource_store(struct hlsl_ctx *ctx, return progress; }
+static bool copy_propagation_transform_interlocked(struct hlsl_ctx *ctx, + struct hlsl_ir_interlocked *interlocked, struct copy_propagation_state *state) +{ + bool progress = false; + + progress |= copy_propagation_transform_object_load(ctx, &interlocked->dst, state, interlocked->node.index); + return progress; +} + static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, struct copy_propagation_state *state) { @@ -1818,18 +2011,6 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s } }
-static void copy_propagation_state_init(struct hlsl_ctx *ctx, struct copy_propagation_state *state, - struct copy_propagation_state *parent) -{ - rb_init(&state->var_defs, copy_propagation_var_def_compare); - state->parent = parent; -} - -static void copy_propagation_state_destroy(struct copy_propagation_state *state) -{ - rb_destroy(&state->var_defs, copy_propagation_var_def_destroy, NULL); -} - static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state, struct hlsl_block *block, unsigned int time) { @@ -1898,16 +2079,19 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, struct copy_propagation_state *state) { - struct copy_propagation_state inner_state; bool progress = false;
- copy_propagation_state_init(ctx, &inner_state, state); - progress |= copy_propagation_transform_block(ctx, &iff->then_block, &inner_state); - copy_propagation_state_destroy(&inner_state); + copy_propagation_push_scope(state, ctx); + progress |= copy_propagation_transform_block(ctx, &iff->then_block, state); + if (state->stopped) + return progress; + copy_propagation_pop_scope(state);
- copy_propagation_state_init(ctx, &inner_state, state); - progress |= copy_propagation_transform_block(ctx, &iff->else_block, &inner_state); - copy_propagation_state_destroy(&inner_state); + copy_propagation_push_scope(state, ctx); + progress |= copy_propagation_transform_block(ctx, &iff->else_block, state); + if (state->stopped) + return progress; + copy_propagation_pop_scope(state);
/* Ideally we'd invalidate the outer state looking at what was * touched in the two inner states, but this doesn't work for @@ -1922,14 +2106,16 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop, struct copy_propagation_state *state) { - struct copy_propagation_state inner_state; bool progress = false;
copy_propagation_invalidate_from_block(ctx, state, &loop->body, loop->node.index); + copy_propagation_invalidate_from_block(ctx, state, &loop->iter, loop->node.index);
- copy_propagation_state_init(ctx, &inner_state, state); - progress |= copy_propagation_transform_block(ctx, &loop->body, &inner_state); - copy_propagation_state_destroy(&inner_state); + copy_propagation_push_scope(state, ctx); + progress |= copy_propagation_transform_block(ctx, &loop->body, state); + if (state->stopped) + return progress; + copy_propagation_pop_scope(state);
return progress; } @@ -1937,15 +2123,16 @@ static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_l static bool copy_propagation_process_switch(struct hlsl_ctx *ctx, struct hlsl_ir_switch *s, struct copy_propagation_state *state) { - struct copy_propagation_state inner_state; struct hlsl_ir_switch_case *c; bool progress = false;
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) { - copy_propagation_state_init(ctx, &inner_state, state); - progress |= copy_propagation_transform_block(ctx, &c->body, &inner_state); - copy_propagation_state_destroy(&inner_state); + copy_propagation_push_scope(state, ctx); + progress |= copy_propagation_transform_block(ctx, &c->body, state); + if (state->stopped) + return progress; + copy_propagation_pop_scope(state); }
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) @@ -1964,6 +2151,12 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b
LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) { + if (instr == state->stop) + { + state->stopped = true; + return progress; + } + switch (instr->type) { case HLSL_IR_LOAD: @@ -1998,9 +2191,15 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b progress |= copy_propagation_process_switch(ctx, hlsl_ir_switch(instr), state); break;
+ case HLSL_IR_INTERLOCKED: + progress |= copy_propagation_transform_interlocked(ctx, hlsl_ir_interlocked(instr), state); + default: break; } + + if (state->stopped) + return progress; }
return progress; @@ -2013,7 +2212,7 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc
index_instructions(block, 2);
- copy_propagation_state_init(ctx, &state, NULL); + copy_propagation_state_init(&state, ctx);
progress = copy_propagation_transform_block(ctx, block, &state);
@@ -2053,10 +2252,10 @@ static enum validation_result validate_component_index_range_from_deref(struct h switch (type->class) { case HLSL_CLASS_VECTOR: - if (idx >= type->dimx) + if (idx >= type->e.numeric.dimx) { hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, - "Vector index is out of bounds. %u/%u", idx, type->dimx); + "Vector index is out of bounds. %u/%u", idx, type->e.numeric.dimx); return DEREF_VALIDATION_OUT_OF_BOUNDS; } break; @@ -2178,6 +2377,24 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins validate_component_index_range_from_deref(ctx, &store->lhs); break; } + case HLSL_IR_INTERLOCKED: + { + struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr); + + if (!interlocked->dst.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Accessed resource must have a single uniform source."); + } + else if (validate_component_index_range_from_deref(ctx, &interlocked->dst) == DEREF_VALIDATION_NOT_CONSTANT) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Accessed resource from "%s" must be determinable at compile time.", + interlocked->dst.var->name); + note_non_static_deref_expressions(ctx, &interlocked->dst, "accessed resource"); + } + break; + } default: break; } @@ -2187,7 +2404,7 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins
static bool is_vec1(const struct hlsl_type *type) { - return (type->class == HLSL_CLASS_SCALAR) || (type->class == HLSL_CLASS_VECTOR && type->dimx == 1); + return (type->class == HLSL_CLASS_SCALAR) || (type->class == HLSL_CLASS_VECTOR && type->e.numeric.dimx == 1); }
static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -2364,18 +2581,20 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type;
- if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && dst_type->dimx < src_type->dimx) + if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR + && dst_type->e.numeric.dimx < src_type->e.numeric.dimx) { struct hlsl_ir_node *new_cast, *swizzle;
- dst_vector_type = hlsl_get_vector_type(ctx, dst_type->e.numeric.type, src_type->dimx); + dst_vector_type = hlsl_get_vector_type(ctx, dst_type->e.numeric.type, src_type->e.numeric.dimx); /* We need to preserve the cast since it might be doing more than just * narrowing the vector. */ if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_vector_type, &cast->node.loc))) return false; hlsl_block_add_instr(block, new_cast);
- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dst_type->dimx, new_cast, &cast->node.loc))) + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), + dst_type->e.numeric.dimx, new_cast, &cast->node.loc))) return false; hlsl_block_add_instr(block, swizzle);
@@ -2401,11 +2620,12 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr struct hlsl_ir_node *new_swizzle; uint32_t combined_swizzle;
- combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->swizzle, - swizzle->swizzle, instr->data_type->dimx); + combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->u.vector, + swizzle->u.vector, instr->data_type->e.numeric.dimx); next_instr = hlsl_ir_swizzle(next_instr)->val.node;
- if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc))) + if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, + instr->data_type->e.numeric.dimx, next_instr, &instr->loc))) return false;
list_add_before(&instr->entry, &new_swizzle->entry); @@ -2425,11 +2645,11 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i return false; swizzle = hlsl_ir_swizzle(instr);
- if (instr->data_type->dimx != swizzle->val.node->data_type->dimx) + if (instr->data_type->e.numeric.dimx != swizzle->val.node->data_type->e.numeric.dimx) return false;
- for (i = 0; i < instr->data_type->dimx; ++i) - if (hlsl_swizzle_get_component(swizzle->swizzle, i) != i) + for (i = 0; i < instr->data_type->e.numeric.dimx; ++i) + if (hlsl_swizzle_get_component(swizzle->u.vector, i) != i) return false;
hlsl_replace_node(instr, swizzle->val.node); @@ -2589,6 +2809,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT) { struct hlsl_ir_node *eq, *swizzle, *dot, *c, *operands[HLSL_MAX_OPERANDS] = {0}; + unsigned int width = type->e.numeric.dimx; struct hlsl_constant_value value; struct hlsl_ir_load *vector_load; enum hlsl_ir_expr_op op; @@ -2597,7 +2818,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir return false; hlsl_block_add_instr(block, &vector_load->node);
- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), type->dimx, idx, &instr->loc))) + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), width, idx, &instr->loc))) return false; hlsl_block_add_instr(block, swizzle);
@@ -2605,14 +2826,14 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir value.u[1].u = 1; value.u[2].u = 2; value.u[3].u = 3; - if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, type->dimx), &value, &instr->loc))) + if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, width), &value, &instr->loc))) return false; hlsl_block_add_instr(block, c);
operands[0] = swizzle; operands[1] = c; if (!(eq = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, - hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, type->dimx), &instr->loc))) + hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, width), &instr->loc))) return false; hlsl_block_add_instr(block, eq);
@@ -2621,7 +2842,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir hlsl_block_add_instr(block, eq);
op = HLSL_OP2_DOT; - if (type->dimx == 1) + if (width == 1) op = type->e.numeric.type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL;
/* Note: We may be creating a DOT for bool vectors here, which we need to lower to @@ -2748,7 +2969,8 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n return false; hlsl_block_add_instr(block, equals);
- if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), var->data_type->dimx, equals, &cut_index->loc))) + if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), + var->data_type->e.numeric.dimx, equals, &cut_index->loc))) return false; hlsl_block_add_instr(block, equals);
@@ -2788,6 +3010,116 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n
return true; } + +static struct hlsl_type *clone_texture_array_as_combined_sampler_array(struct hlsl_ctx *ctx, struct hlsl_type *type) +{ + struct hlsl_type *sampler_type; + + if (type->class == HLSL_CLASS_ARRAY) + { + if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, type->e.array.type))) + return NULL; + + return hlsl_new_array_type(ctx, sampler_type, type->e.array.elements_count, HLSL_ARRAY_GENERIC); + } + + return ctx->builtin_types.sampler[type->sampler_dim]; +} + +static bool deref_offset_is_zero(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) +{ + enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); + unsigned int index; + + if (!hlsl_regset_index_from_deref(ctx, deref, regset, &index)) + return false; + return index == 0; +} + +/* Lower samples from separate texture and sampler variables to samples from + * synthetized combined samplers. That is, translate SM4-style samples in the + * source to SM1-style samples in the bytecode. */ +static bool lower_separate_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_var *var, *resource, *sampler; + struct hlsl_ir_resource_load *load; + struct vkd3d_string_buffer *name; + struct hlsl_type *sampler_type; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + load = hlsl_ir_resource_load(instr); + + if (load->load_type != HLSL_RESOURCE_SAMPLE + && load->load_type != HLSL_RESOURCE_SAMPLE_GRAD + && load->load_type != HLSL_RESOURCE_SAMPLE_LOD + && load->load_type != HLSL_RESOURCE_SAMPLE_LOD_BIAS) + return false; + + if (!load->sampler.var) + return false; + resource = load->resource.var; + sampler = load->sampler.var; + + VKD3D_ASSERT(hlsl_type_is_resource(resource->data_type)); + VKD3D_ASSERT(hlsl_type_is_resource(sampler->data_type)); + if (sampler->data_type->class == HLSL_CLASS_ARRAY && !deref_offset_is_zero(ctx, &load->sampler)) + { + /* Not supported by d3dcompiler. */ + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, + "Lower separated samples with sampler arrays."); + return false; + } + if (!resource->is_uniform) + return false; + if(!sampler->is_uniform) + return false; + + if (!(name = hlsl_get_string_buffer(ctx))) + return false; + vkd3d_string_buffer_printf(name, "%s+%s", sampler->name, resource->name); + + if (load->texel_offset.node) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "Texel offsets are not supported on profiles lower than 4.0.\n"); + return false; + } + + TRACE("Lowering to combined sampler %s.\n", debugstr_a(name->buffer)); + + if (!(var = hlsl_get_var(ctx->globals, name->buffer))) + { + if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, resource->data_type))) + { + hlsl_release_string_buffer(ctx, name); + return false; + } + + if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, sampler_type, &instr->loc, false))) + { + hlsl_release_string_buffer(ctx, name); + return false; + } + var->storage_modifiers |= HLSL_STORAGE_UNIFORM; + var->is_combined_sampler = true; + var->is_uniform = 1; + + list_remove(&var->scope_entry); + list_add_after(&sampler->scope_entry, &var->scope_entry); + + list_add_after(&sampler->extern_entry, &var->extern_entry); + } + hlsl_release_string_buffer(ctx, name); + + /* Only change the deref's var, keep the path. */ + load->resource.var = var; + hlsl_cleanup_deref(&load->sampler); + load->sampler.var = NULL; + + return true; +} + /* Lower combined samples and sampler variables to synthesized separated textures and samplers. * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -2808,6 +3140,10 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in case HLSL_RESOURCE_GATHER_GREEN: case HLSL_RESOURCE_GATHER_BLUE: case HLSL_RESOURCE_GATHER_ALPHA: + case HLSL_RESOURCE_GATHER_CMP_RED: + case HLSL_RESOURCE_GATHER_CMP_GREEN: + case HLSL_RESOURCE_GATHER_CMP_BLUE: + case HLSL_RESOURCE_GATHER_CMP_ALPHA: case HLSL_RESOURCE_RESINFO: case HLSL_RESOURCE_SAMPLE_CMP: case HLSL_RESOURCE_SAMPLE_CMP_LZ: @@ -2848,7 +3184,8 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in for (i = 0; i < load->resource.path_len; ++i) { VKD3D_ASSERT(arr_type->class == HLSL_CLASS_ARRAY); - texture_array_type = hlsl_new_array_type(ctx, texture_array_type, arr_type->e.array.elements_count); + texture_array_type = hlsl_new_array_type(ctx, texture_array_type, + arr_type->e.array.elements_count, HLSL_ARRAY_GENERIC); arr_type = arr_type->e.array.type; }
@@ -2899,6 +3236,27 @@ static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl list_add_tail(list, &to_add->extern_entry); }
+static bool sort_synthetic_combined_samplers_first(struct hlsl_ctx *ctx) +{ + struct list separated_resources; + struct hlsl_ir_var *var, *next; + + list_init(&separated_resources); + + LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_combined_sampler) + { + list_remove(&var->extern_entry); + insert_ensuring_decreasing_bind_count(&separated_resources, var, HLSL_REGSET_SAMPLERS); + } + } + + list_move_head(&ctx->extern_vars, &separated_resources); + + return false; +} + static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) { struct list separated_resources; @@ -2920,11 +3278,24 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) return false; }
-/* Turn CAST to int or uint into FLOOR + REINTERPRET (which is written as a mere MOV). */ +/* Turn CAST to int or uint as follows: + * + * CAST(x) = x - FRACT(x) + extra + * + * where + * + * extra = FRACT(x) > 0 && x < 0 + * + * where the comparisons in the extra term are performed using CMP or SLT + * depending on whether this is a pixel or vertex shader, respectively. + * + * A REINTERPET (which is written as a mere MOV) is also applied to the final + * result for type consistency. + */ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; - struct hlsl_ir_node *arg, *floor, *res; + struct hlsl_ir_node *arg, *res; struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) @@ -2939,24 +3310,95 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, if (arg->data_type->e.numeric.type != HLSL_TYPE_FLOAT && arg->data_type->e.numeric.type != HLSL_TYPE_HALF) return false;
- if (!(floor = hlsl_new_unary_expr(ctx, HLSL_OP1_FLOOR, arg, &instr->loc))) - return false; - hlsl_block_add_instr(block, floor); + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + { + struct hlsl_ir_node *fract, *neg_fract, *has_fract, *floor, *extra, *zero, *one; + struct hlsl_constant_value zero_value, one_value;
- memset(operands, 0, sizeof(operands)); - operands[0] = floor; - if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) - return false; - hlsl_block_add_instr(block, res); + memset(&zero_value, 0, sizeof(zero_value)); + if (!(zero = hlsl_new_constant(ctx, arg->data_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero);
- return true; -} + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, arg->data_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one);
-/* Lower DIV to RCP + MUL. */ -static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -{ - struct hlsl_ir_node *rcp, *mul; - struct hlsl_ir_expr *expr; + if (!(fract = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, arg, &instr->loc))) + return false; + hlsl_block_add_instr(block, fract); + + if (!(neg_fract = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, fract, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg_fract); + + if (!(has_fract = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, neg_fract, zero, one))) + return false; + hlsl_block_add_instr(block, has_fract); + + if (!(extra = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, arg, zero, has_fract))) + return false; + hlsl_block_add_instr(block, extra); + + if (!(floor = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, neg_fract))) + return false; + hlsl_block_add_instr(block, floor); + + if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, floor, extra))) + return false; + hlsl_block_add_instr(block, res); + } + else + { + struct hlsl_ir_node *neg_arg, *is_neg, *fract, *neg_fract, *has_fract, *floor; + + if (!(neg_arg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg_arg); + + if (!(is_neg = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, arg, neg_arg))) + return false; + hlsl_block_add_instr(block, is_neg); + + if (!(fract = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, arg, &instr->loc))) + return false; + hlsl_block_add_instr(block, fract); + + if (!(neg_fract = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, fract, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg_fract); + + if (!(has_fract = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, neg_fract, fract))) + return false; + hlsl_block_add_instr(block, has_fract); + + if (!(floor = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, neg_fract))) + return false; + hlsl_block_add_instr(block, floor); + + if (!(res = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, is_neg, has_fract, floor))) + return false; + hlsl_block_add_instr(block, res); + } + + memset(operands, 0, sizeof(operands)); + operands[0] = res; + if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, res); + + return true; +} + +/* Lower DIV to RCP + MUL. */ +static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_node *rcp, *mul; + struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR) return false; @@ -3010,7 +3452,7 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_DOT) return false; - if (arg1->data_type->dimx != 2) + if (arg1->data_type->e.numeric.dimx != 2) return false;
if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) @@ -3034,11 +3476,13 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h return false; hlsl_block_add_instr(block, mul);
- if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), instr->data_type->dimx, mul, &expr->node.loc))) + if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), + instr->data_type->e.numeric.dimx, mul, &expr->node.loc))) return false; hlsl_block_add_instr(block, add_x);
- if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y), instr->data_type->dimx, mul, &expr->node.loc))) + if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y), + instr->data_type->e.numeric.dimx, mul, &expr->node.loc))) return false; hlsl_block_add_instr(block, add_y);
@@ -3202,7 +3646,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct type = arg->data_type;
/* Reduce the range of the input angles to [-pi, pi]. */ - for (i = 0; i < type->dimx; ++i) + for (i = 0; i < type->e.numeric.dimx; ++i) { half_value.u[i].f = 0.5; two_pi_value.u[i].f = 2.0 * M_PI; @@ -3230,7 +3674,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct return false; hlsl_block_add_instr(block, reduced);
- if (type->dimx == 1) + if (type->e.numeric.dimx == 1) { if (!(sincos = hlsl_new_unary_expr(ctx, op, reduced, &instr->loc))) return false; @@ -3243,7 +3687,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct struct hlsl_deref var_deref; struct hlsl_ir_load *var_load;
- for (i = 0; i < type->dimx; ++i) + for (i = 0; i < type->e.numeric.dimx; ++i) { uint32_t s = hlsl_swizzle_from_writemask(1 << i);
@@ -3256,7 +3700,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct return false; hlsl_init_simple_deref_from_var(&var_deref, var);
- for (i = 0; i < type->dimx; ++i) + for (i = 0; i < type->e.numeric.dimx; ++i) { struct hlsl_block store_block;
@@ -3292,7 +3736,7 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st return false;
arg = expr->operands[0].node; - float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->e.numeric.dimx);
/* If this is happens, it means we failed to cast the argument to boolean somewhere. */ VKD3D_ASSERT(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); @@ -3354,7 +3798,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL);
type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, - instr->data_type->dimx, instr->data_type->dimy); + instr->data_type->e.numeric.dimx, instr->data_type->e.numeric.dimy);
if (!(float_cond = hlsl_new_cast(ctx, cond, type, &instr->loc))) return false; @@ -3375,6 +3819,51 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru return true; }
+static bool lower_resource_load_bias(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *swizzle, *store; + struct hlsl_ir_resource_load *load; + struct hlsl_ir_load *tmp_load; + struct hlsl_ir_var *tmp_var; + struct hlsl_deref deref; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + load = hlsl_ir_resource_load(instr); + if (load->load_type != HLSL_RESOURCE_SAMPLE_LOD + && load->load_type != HLSL_RESOURCE_SAMPLE_LOD_BIAS) + return false; + + if (!load->lod.node) + return false; + + if (!(tmp_var = hlsl_new_synthetic_var(ctx, "coords-with-lod", + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), &instr->loc))) + return false; + + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), 4, load->lod.node, &load->lod.node->loc))) + return false; + list_add_before(&instr->entry, &swizzle->entry); + + if (!(store = hlsl_new_simple_store(ctx, tmp_var, swizzle))) + return false; + list_add_before(&instr->entry, &store->entry); + + hlsl_init_simple_deref_from_var(&deref, tmp_var); + if (!(store = hlsl_new_store_index(ctx, &deref, NULL, load->coords.node, 0, &instr->loc))) + return false; + list_add_before(&instr->entry, &store->entry); + + if (!(tmp_load = hlsl_new_var_load(ctx, tmp_var, &instr->loc))) + return false; + list_add_before(&instr->entry, &tmp_load->node.entry); + + hlsl_src_remove(&load->coords); + hlsl_src_from_node(&load->coords, &tmp_load->node); + hlsl_src_remove(&load->lod); + return true; +} + static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { @@ -3393,7 +3882,7 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node
arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; - float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx);
if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) return false; @@ -3519,7 +4008,7 @@ static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h
arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; - float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx);
if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) return false; @@ -3579,7 +4068,7 @@ static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h if (expr->op != HLSL_OP3_CMP) return false;
- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx);
for (i = 0; i < 3; ++i) { @@ -3649,7 +4138,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return false;
/* Narrowing casts should have already been lowered. */ - VKD3D_ASSERT(type->dimx == arg_type->dimx); + VKD3D_ASSERT(type->e.numeric.dimx == arg_type->e.numeric.dimx);
zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc); if (!zero) @@ -3675,7 +4164,8 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc
if (cond_type->e.numeric.type != HLSL_TYPE_BOOL) { - cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, cond_type->dimx, cond_type->dimy); + cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, + cond_type->e.numeric.dimx, cond_type->e.numeric.dimy);
if (!(condition = hlsl_new_cast(ctx, condition, cond_type, &condition->loc))) return NULL; @@ -3711,13 +4201,13 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; if (type->e.numeric.type != HLSL_TYPE_INT) return false; - utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); + utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy);
if (!(xor = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_XOR, arg1, arg2))) return false; hlsl_block_add_instr(block, xor);
- for (i = 0; i < type->dimx; ++i) + for (i = 0; i < type->e.numeric.dimx; ++i) high_bit_value.u[i].u = 0x80000000; if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) return false; @@ -3777,9 +4267,9 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; if (type->e.numeric.type != HLSL_TYPE_INT) return false; - utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); + utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy);
- for (i = 0; i < type->dimx; ++i) + for (i = 0; i < type->e.numeric.dimx; ++i) high_bit_value.u[i].u = 0x80000000; if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) return false; @@ -3870,8 +4360,8 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru { arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; - VKD3D_ASSERT(arg1->data_type->dimx == arg2->data_type->dimx); - dimx = arg1->data_type->dimx; + VKD3D_ASSERT(arg1->data_type->e.numeric.dimx == arg2->data_type->e.numeric.dimx); + dimx = arg1->data_type->e.numeric.dimx; is_bool = type->e.numeric.type == HLSL_TYPE_BOOL;
if (!(mult = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL, arg1, arg2))) @@ -3920,7 +4410,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return false; if (type->e.numeric.type != HLSL_TYPE_FLOAT) return false; - btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); + btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->e.numeric.dimx, type->e.numeric.dimy);
if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) return false; @@ -3942,7 +4432,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr if (!(cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2))) return false;
- for (i = 0; i < type->dimx; ++i) + for (i = 0; i < type->e.numeric.dimx; ++i) one_value.u[i].f = 1.0f; if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) return false; @@ -4000,7 +4490,7 @@ static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst if (!arg) continue;
- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->e.numeric.dimx); if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &instr->loc))) return false; hlsl_block_add_instr(block, arg_cast); @@ -4008,7 +4498,7 @@ static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst operands[i] = arg_cast; }
- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx); if (!(float_expr = hlsl_new_expr(ctx, expr->op, operands, float_type, &instr->loc))) return false; hlsl_block_add_instr(block, float_expr); @@ -4049,7 +4539,8 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
operands[0] = jump->condition.node; operands[1] = zero; - cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy); + cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, + arg_type->e.numeric.dimx, arg_type->e.numeric.dimy); if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) return false; hlsl_block_add_instr(&block, cmp); @@ -4093,7 +4584,7 @@ static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v return false;
cond = jump->condition.node; - float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, cond->data_type->dimx); + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, cond->data_type->e.numeric.dimx);
hlsl_block_init(&block);
@@ -4158,13 +4649,11 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) case HLSL_IR_LOOP: case HLSL_IR_RESOURCE_STORE: case HLSL_IR_SWITCH: + case HLSL_IR_INTERLOCKED: break; case HLSL_IR_STATEBLOCK_CONSTANT: /* Stateblock constants should not appear in the shader program. */ vkd3d_unreachable(); - case HLSL_IR_VSIR_INSTRUCTION_REF: - /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ - vkd3d_unreachable(); }
return false; @@ -4304,9 +4793,6 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop case HLSL_IR_STATEBLOCK_CONSTANT: /* Stateblock constants should not appear in the shader program. */ vkd3d_unreachable(); - case HLSL_IR_VSIR_INSTRUCTION_REF: - /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ - vkd3d_unreachable();
case HLSL_IR_STORE: { @@ -4410,6 +4896,19 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop index->idx.node->last_read = last_read; break; } + case HLSL_IR_INTERLOCKED: + { + struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr); + + var = interlocked->dst.var; + var->last_read = max(var->last_read, last_read); + deref_mark_last_read(&interlocked->dst, last_read); + interlocked->coords.node->last_read = last_read; + interlocked->value.node->last_read = last_read; + if (interlocked->cmp_value.node) + interlocked->cmp_value.node->last_read = last_read; + break; + } case HLSL_IR_JUMP: { struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); @@ -4494,6 +4993,9 @@ struct register_allocator
/* Two allocations with different mode can't share the same register. */ int mode; + /* If an allocation is VIP, no new allocations can be made in the + * register unless they are VIP as well. */ + bool vip; } *allocations; size_t count, capacity;
@@ -4513,7 +5015,7 @@ struct register_allocator };
static unsigned int get_available_writemask(const struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode) + unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode, bool vip) { unsigned int writemask = VKD3DSP_WRITEMASK_ALL; size_t i; @@ -4532,6 +5034,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all writemask &= ~allocation->writemask; if (allocation->mode != mode) writemask = 0; + if (allocation->vip && !vip) + writemask = 0; }
if (!writemask) @@ -4542,7 +5046,7 @@ static unsigned int get_available_writemask(const struct register_allocator *all }
static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx, - unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode) + unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode, bool vip) { struct allocation *allocation;
@@ -4556,16 +5060,25 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a allocation->first_write = first_write; allocation->last_read = last_read; allocation->mode = mode; + allocation->vip = vip;
allocator->reg_count = max(allocator->reg_count, reg_idx + 1); }
-/* reg_size is the number of register components to be reserved, while component_count is the number - * of components for the register's writemask. In SM1, floats and vectors allocate the whole - * register, even if they don't use it completely. */ +/* Allocates a register (or some components of it) within the register allocator. + * 'reg_size' is the number of register components to be reserved. + * 'component_count' is the number of components for the hlsl_reg's + * writemask, which can be smaller than 'reg_size'. For instance, sm1 + * floats and vectors allocate the whole register even if they are not + * using all components. + * 'mode' can be provided to avoid allocating on a register that already has an + * allocation with a different mode. + * 'force_align' can be used so that the allocation always start in '.x'. + * 'vip' can be used so that no new allocations can be made in the given register + * unless they are 'vip' as well. */ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, unsigned int reg_size, - unsigned int component_count, int mode, bool force_align) + unsigned int component_count, int mode, bool force_align, bool vip) { struct hlsl_reg ret = {.allocation_size = 1, .allocated = true}; unsigned int required_size = force_align ? 4 : reg_size; @@ -4579,7 +5092,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx) { unsigned int available_writemask = get_available_writemask(allocator, - first_write, last_read, reg_idx, mode); + first_write, last_read, reg_idx, mode, vip);
if (vkd3d_popcount(available_writemask) >= pref) { @@ -4589,7 +5102,8 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a ret.id = reg_idx; ret.writemask = hlsl_combine_writemasks(writemask, vkd3d_write_mask_from_component_count(component_count)); - record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode); + + record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode, vip); return ret; } } @@ -4598,13 +5112,14 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a ret.id = allocator->reg_count; ret.writemask = vkd3d_write_mask_from_component_count(component_count); record_allocation(ctx, allocator, allocator->reg_count, - vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode); + vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode, vip); return ret; }
/* Allocate a register with writemask, while reserving reg_writemask. */ -static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask, int mode) +static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, + struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, + uint32_t reg_writemask, uint32_t writemask, int mode, bool vip) { struct hlsl_reg ret = {0}; uint32_t reg_idx; @@ -4614,11 +5129,11 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct for (reg_idx = 0;; ++reg_idx) { if ((get_available_writemask(allocator, first_write, last_read, - reg_idx, mode) & reg_writemask) == reg_writemask) + reg_idx, mode, vip) & reg_writemask) == reg_writemask) break; }
- record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode); + record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode, vip);
ret.id = reg_idx; ret.allocation_size = 1; @@ -4628,7 +5143,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct }
static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write, - unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode) + unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode, bool vip) { unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1; unsigned int writemask; @@ -4636,18 +5151,18 @@ static bool is_range_available(const struct register_allocator *allocator, unsig
for (i = 0; i < (reg_size / 4); ++i) { - writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode); + writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode, vip); if (writemask != VKD3DSP_WRITEMASK_ALL) return false; } - writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode); + writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode, vip); if ((writemask & last_reg_mask) != last_reg_mask) return false; return true; }
static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode) + unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode, bool vip) { struct hlsl_reg ret = {0}; uint32_t reg_idx; @@ -4655,15 +5170,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo
for (reg_idx = 0;; ++reg_idx) { - if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode)) + if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode, vip)) break; }
for (i = 0; i < reg_size / 4; ++i) - record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode); + record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode, vip); if (reg_size % 4) record_allocation(ctx, allocator, reg_idx + (reg_size / 4), - (1u << (reg_size % 4)) - 1, first_write, last_read, mode); + (1u << (reg_size % 4)) - 1, first_write, last_read, mode, vip);
ret.id = reg_idx; ret.allocation_size = align(reg_size, 4) / 4; @@ -4679,9 +5194,10 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, /* FIXME: We could potentially pack structs or arrays more efficiently... */
if (type->class <= HLSL_CLASS_VECTOR) - return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false); + return allocate_register(ctx, allocator, first_write, last_read, + type->e.numeric.dimx, type->e.numeric.dimx, 0, false, false); else - return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0); + return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0, false); }
static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) @@ -4804,6 +5320,10 @@ static bool track_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *in register_deref_usage(ctx, &hlsl_ir_resource_store(instr)->resource); break;
+ case HLSL_IR_INTERLOCKED: + register_deref_usage(ctx, &hlsl_ir_interlocked(instr)->dst); + break; + default: break; } @@ -4859,8 +5379,8 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, }
if (reg_writemask) - instr->reg = allocate_register_with_masks(ctx, allocator, - instr->index, instr->last_read, reg_writemask, dst_writemask, 0); + instr->reg = allocate_register_with_masks(ctx, allocator, instr->index, + instr->last_read, reg_writemask, dst_writemask, 0, false); else instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, instr->data_type); @@ -5006,13 +5526,13 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type));
VKD3D_ASSERT(hlsl_is_numeric_type(type)); - VKD3D_ASSERT(type->dimy == 1); + VKD3D_ASSERT(type->e.numeric.dimy == 1); VKD3D_ASSERT(constant->reg.writemask);
for (x = 0, i = 0; x < 4; ++x) { const union hlsl_constant_value_component *value; - float f; + float f = 0;
if (!(constant->reg.writemask & (1u << x))) continue; @@ -5040,9 +5560,6 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, case HLSL_TYPE_DOUBLE: FIXME("Double constant.\n"); return; - - default: - vkd3d_unreachable(); }
record_constant(ctx, constant->reg.id * 4 + x, f, &constant->node.loc); @@ -5084,7 +5601,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, } }
-static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort) +static void sort_uniform_by_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort, enum hlsl_regset regset) { struct hlsl_ir_var *var;
@@ -5092,8 +5609,8 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_
LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) { - uint32_t to_sort_size = to_sort->bind_count[HLSL_REGSET_NUMERIC]; - uint32_t var_size = var->bind_count[HLSL_REGSET_NUMERIC]; + uint32_t to_sort_size = to_sort->bind_count[regset]; + uint32_t var_size = var->bind_count[regset];
if (to_sort_size > var_size) { @@ -5105,7 +5622,7 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ list_add_tail(sorted, &to_sort->extern_entry); }
-static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) +static void sort_uniforms_by_bind_count(struct hlsl_ctx *ctx, enum hlsl_regset regset) { struct list sorted = LIST_INIT(sorted); struct hlsl_ir_var *var, *next; @@ -5113,7 +5630,7 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_uniform) - sort_uniform_by_numeric_bind_count(&sorted, var); + sort_uniform_by_bind_count(&sorted, var, regset); } list_move_tail(&ctx->extern_vars, &sorted); } @@ -5161,7 +5678,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi struct register_allocator allocator = {0}; struct hlsl_ir_var *var;
- sort_uniforms_by_numeric_bind_count(ctx); + sort_uniforms_by_bind_count(ctx, HLSL_REGSET_NUMERIC);
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -5181,14 +5698,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi { if (i < bind_count) { - if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i, 0) != VKD3DSP_WRITEMASK_ALL) + if (get_available_writemask(&allocator_used, 1, UINT_MAX, + reg_idx + i, 0, false) != VKD3DSP_WRITEMASK_ALL) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "Overlapping register() reservations on 'c%u'.", reg_idx + i); } - record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); + record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); } - record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); + record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); }
var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; @@ -5211,7 +5729,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
if (!var->regs[HLSL_REGSET_NUMERIC].allocated) { - var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0); + var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0, false); TRACE("Allocated %s to %s.\n", var->name, debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } @@ -5254,7 +5772,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun var = entry_func->parameters.vars[i]; if (var->is_output_semantic) { - record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read, 0); + record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, + var->first_write, var->last_read, 0, false); break; } } @@ -5266,7 +5785,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun return allocator.reg_count; }
-enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, unsigned int storage_modifiers) +static enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, + unsigned int storage_modifiers) { unsigned int i;
@@ -5297,7 +5817,7 @@ enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type }
static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, - struct register_allocator *allocator, bool output, bool optimize, bool is_patch_constant_func) + struct register_allocator *allocator, bool output, bool optimize) { static const char *const shader_names[] = { @@ -5309,8 +5829,11 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var [VKD3D_SHADER_TYPE_COMPUTE] = "Compute", };
+ bool is_patch = hlsl_type_is_patch_array(var->data_type); enum vkd3d_shader_register_type type; struct vkd3d_shader_version version; + bool special_interpolation = false; + bool vip_allocation = false; uint32_t reg; bool builtin;
@@ -5346,8 +5869,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var enum vkd3d_shader_sysval_semantic semantic; bool has_idx;
- if (!sm4_sysval_semantic_from_semantic_name(&semantic, &version, ctx->semantic_compat_mapping, - ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func)) + if (!sm4_sysval_semantic_from_semantic_name(&semantic, &version, ctx->semantic_compat_mapping, ctx->domain, + var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_patch)) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Invalid semantic '%s'.", var->semantic.name); @@ -5363,6 +5886,14 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var * domains, it is allocated as if it was 'float[1]'. */ var->force_align = true; } + + if (semantic == VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX + || semantic == VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX + || semantic == VKD3D_SHADER_SV_PRIMITIVE_ID) + vip_allocation = true; + + if (semantic == VKD3D_SHADER_SV_IS_FRONT_FACE || semantic == VKD3D_SHADER_SV_SAMPLE_INDEX) + special_interpolation = true; }
if (builtin) @@ -5372,12 +5903,17 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var } else { + unsigned int component_count = is_patch + ? var->data_type->e.array.type->e.numeric.dimx : var->data_type->e.numeric.dimx; int mode = (ctx->profile->major_version < 4) ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); - unsigned int reg_size = optimize ? var->data_type->dimx : 4; + unsigned int reg_size = optimize ? component_count : 4; + + if (special_interpolation) + mode = VKD3DSIM_NONE;
- var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, - UINT_MAX, reg_size, var->data_type->dimx, mode, var->force_align); + var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX, + reg_size, component_count, mode, var->force_align, vip_allocation);
TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v', var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); @@ -5386,10 +5922,10 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { + struct register_allocator in_patch_allocator = {0}, patch_constant_out_patch_allocator = {0}; struct register_allocator input_allocator = {0}, output_allocator = {0}; bool is_vertex_shader = ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; bool is_pixel_shader = ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL; - bool is_patch_constant_func = entry_func == ctx->patch_constant_func; struct hlsl_ir_var *var;
input_allocator.prioritize_smaller_writemasks = true; @@ -5398,9 +5934,25 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_input_semantic) - allocate_semantic_register(ctx, var, &input_allocator, false, !is_vertex_shader, is_patch_constant_func); + { + if (hlsl_type_is_patch_array(var->data_type)) + { + bool is_patch_constant_output_patch = ctx->is_patch_constant_func && + var->data_type->e.array.array_type == HLSL_ARRAY_PATCH_OUTPUT; + + if (is_patch_constant_output_patch) + allocate_semantic_register(ctx, var, &patch_constant_out_patch_allocator, false, + !is_vertex_shader); + else + allocate_semantic_register(ctx, var, &in_patch_allocator, false, + !is_vertex_shader); + } + else + allocate_semantic_register(ctx, var, &input_allocator, false, !is_vertex_shader); + } + if (var->is_output_semantic) - allocate_semantic_register(ctx, var, &output_allocator, true, !is_pixel_shader, is_patch_constant_func); + allocate_semantic_register(ctx, var, &output_allocator, true, !is_pixel_shader); }
vkd3d_free(input_allocator.allocations); @@ -5831,7 +6383,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl switch (type->class) { case HLSL_CLASS_VECTOR: - if (idx >= type->dimx) + if (idx >= type->e.numeric.dimx) return false; *start += idx; break; @@ -5840,9 +6392,9 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl if (idx >= hlsl_type_major_size(type)) return false; if (hlsl_type_is_row_major(type)) - *start += idx * type->dimx; + *start += idx * type->e.numeric.dimx; else - *start += idx * type->dimy; + *start += idx * type->e.numeric.dimy; break;
case HLSL_CLASS_ARRAY: @@ -5950,6 +6502,9 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref
*offset = deref->const_offset;
+ if (hlsl_type_is_patch_array(deref->var->data_type)) + return false; + if (offset_node) { /* We should always have generated a cast to UINT. */ @@ -6329,6 +6884,77 @@ static void validate_hull_shader_attributes(struct hlsl_ctx *ctx, const struct h } }
+static void validate_and_record_patch_type(struct hlsl_ctx *ctx, struct hlsl_ir_var *var) +{ + unsigned int control_point_count = var->data_type->e.array.elements_count; + enum hlsl_array_type array_type = var->data_type->e.array.array_type; + struct hlsl_type *control_point_type = var->data_type->e.array.type; + const struct hlsl_profile_info *profile = ctx->profile; + + if (array_type == HLSL_ARRAY_PATCH_INPUT) + { + if (profile->type != VKD3D_SHADER_TYPE_HULL + && !(profile->type == VKD3D_SHADER_TYPE_GEOMETRY && hlsl_version_ge(ctx, 5, 0))) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "InputPatch parameters can only be used in hull shaders, " + "and geometry shaders with shader model 5.0 or higher."); + return; + } + } + else + { + if (!ctx->is_patch_constant_func && profile->type != VKD3D_SHADER_TYPE_DOMAIN) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "OutputPatch parameters can only be used in " + "hull shader patch constant functions and domain shaders."); + return; + } + } + + if (control_point_count > 32) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, + "Control point count %u exceeds 32.", control_point_count); + return; + } + VKD3D_ASSERT(control_point_count > 0); + + if (ctx->is_patch_constant_func && array_type == HLSL_ARRAY_PATCH_OUTPUT) + { + if (control_point_count != ctx->output_control_point_count) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, + "Output control point count %u does not match the count %u specified in the control point function.", + control_point_count, ctx->output_control_point_count); + + if (!hlsl_types_are_equal(control_point_type, ctx->output_control_point_type)) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Output control point type does not match the output type of the control point function."); + + return; + } + + if (ctx->input_control_point_count != UINT_MAX) + { + VKD3D_ASSERT(ctx->is_patch_constant_func); + + if (control_point_count != ctx->input_control_point_count) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, + "Input control point count %u does not match the count %u specified in the control point function.", + control_point_count, ctx->input_control_point_count); + + if (!hlsl_types_are_equal(control_point_type, ctx->input_control_point_type)) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Input control point type does not match the input type specified in the control point function."); + + return; + } + + ctx->input_control_point_count = control_point_count; + ctx->input_control_point_type = control_point_type; +} + static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) { struct hlsl_ir_node *instr, *next; @@ -6394,6 +7020,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) { bool progress;
+ lower_ir(ctx, lower_complex_casts, body); lower_ir(ctx, lower_matrix_swizzles, body);
lower_ir(ctx, lower_broadcasts, body); @@ -6419,6 +7046,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) { progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_normalize_binary_exprs, body, NULL); progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); progress |= hlsl_copy_propagation_execute(ctx, body); progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); @@ -6428,10 +7056,11 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) }
static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program, - struct shader_signature *signature, bool output, bool is_patch_constant_func, struct hlsl_ir_var *var) + struct shader_signature *signature, bool output, struct hlsl_ir_var *var) { + enum vkd3d_shader_component_type component_type = VKD3D_SHADER_COMPONENT_VOID; enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; - enum vkd3d_shader_component_type component_type; + bool is_patch = hlsl_type_is_patch_array(var->data_type); unsigned int register_index, mask, use_mask; const char *name = var->semantic.name; enum vkd3d_shader_register_type type; @@ -6440,10 +7069,11 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog if (hlsl_version_ge(ctx, 4, 0)) { struct vkd3d_string_buffer *string; + enum hlsl_base_type numeric_type; bool has_idx, ret;
ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, ctx->semantic_compat_mapping, - ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); + ctx->domain, var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_patch); VKD3D_ASSERT(ret); if (sysval == ~0u) return; @@ -6451,7 +7081,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx)) { register_index = has_idx ? var->semantic.index : ~0u; - mask = (1u << var->data_type->dimx) - 1; + mask = (1u << var->data_type->e.numeric.dimx) - 1; } else { @@ -6462,7 +7092,12 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog
use_mask = mask; /* FIXME: retrieve use mask accurately. */
- switch (var->data_type->e.numeric.type) + if (var->data_type->class == HLSL_CLASS_ARRAY) + numeric_type = var->data_type->e.array.type->e.numeric.type; + else + numeric_type = var->data_type->e.numeric.type; + + switch (numeric_type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: @@ -6478,12 +7113,11 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog component_type = VKD3D_SHADER_COMPONENT_UINT; break;
- default: + case HLSL_TYPE_DOUBLE: if ((string = hlsl_type_to_string(ctx, var->data_type))) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid data type %s for semantic variable %s.", string->buffer, var->name); hlsl_release_string_buffer(ctx, string); - component_type = VKD3D_SHADER_COMPONENT_VOID; break; }
@@ -6519,19 +7153,19 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog sysval = VKD3D_SHADER_SV_POSITION; }
- mask = (1 << var->data_type->dimx) - 1; + mask = (1 << var->data_type->e.numeric.dimx) - 1;
if (!ascii_strcasecmp(var->semantic.name, "PSIZE") && output && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX) { - if (var->data_type->dimx > 1) + if (var->data_type->e.numeric.dimx > 1) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "PSIZE output must have only 1 component in this shader model."); /* For some reason the writemask has all components set. */ mask = VKD3DSP_WRITEMASK_ALL; } if (!ascii_strcasecmp(var->semantic.name, "FOG") && output && program->shader_version.major < 3 - && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->dimx > 1) + && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->e.numeric.dimx > 1) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "FOG output must have only 1 component in this shader model.");
@@ -6570,26 +7204,40 @@ static void generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_function_decl *func) { bool is_domain = program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN; - bool is_patch_constant_func = func == ctx->patch_constant_func; struct hlsl_ir_var *var;
+ ctx->is_patch_constant_func = func == ctx->patch_constant_func; + LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_input_semantic) { - if (is_patch_constant_func) - generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, true, var); + bool is_patch = hlsl_type_is_patch_array(var->data_type); + + if (ctx->is_patch_constant_func) + { + if (!is_patch) + generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, var); + } else if (is_domain) - generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, false, var); + { + if (is_patch) + generate_vsir_signature_entry(ctx, program, &program->input_signature, false, var); + else + generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, var); + } else - generate_vsir_signature_entry(ctx, program, &program->input_signature, false, false, var); + { + generate_vsir_signature_entry(ctx, program, &program->input_signature, false, var); + } } + if (var->is_output_semantic) { - if (is_patch_constant_func) - generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, true, true, var); + if (ctx->is_patch_constant_func) + generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, true, var); else - generate_vsir_signature_entry(ctx, program, &program->output_signature, true, false, var); + generate_vsir_signature_entry(ctx, program, &program->output_signature, true, var); } } } @@ -6636,7 +7284,6 @@ static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t d
swizzle = hlsl_swizzle_from_writemask(src_writemask); swizzle = hlsl_map_swizzle(swizzle, dst_writemask); - swizzle = vsir_swizzle_from_hlsl(swizzle); return swizzle; }
@@ -6726,7 +7373,8 @@ static void sm1_generate_vsir_sampler_dcls(struct hlsl_ctx *ctx, break;
case HLSL_SAMPLER_DIM_GENERIC: - /* These can appear in sm4-style combined sample instructions. */ + /* These can appear in sm4-style separate sample + * instructions that haven't been lowered. */ hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); continue;
@@ -6765,6 +7413,39 @@ static void sm1_generate_vsir_sampler_dcls(struct hlsl_ctx *ctx, } }
+static enum vkd3d_shader_register_type sm4_get_semantic_register_type(enum vkd3d_shader_type shader_type, + bool is_patch_constant_func, const struct hlsl_ir_var *var) +{ + if (hlsl_type_is_patch_array(var->data_type)) + { + VKD3D_ASSERT(var->is_input_semantic); + + switch (shader_type) + { + case VKD3D_SHADER_TYPE_HULL: + if (is_patch_constant_func) + { + bool is_inputpatch = var->data_type->e.array.array_type == HLSL_ARRAY_PATCH_INPUT; + + return is_inputpatch ? VKD3DSPR_INCONTROLPOINT : VKD3DSPR_OUTCONTROLPOINT; + } + return VKD3DSPR_INPUT; + + case VKD3D_SHADER_TYPE_DOMAIN: + return VKD3DSPR_INCONTROLPOINT; + + default: + return VKD3DSPR_INPUT; + } + } + + if (var->is_output_semantic) + return VKD3DSPR_OUTPUT; + if (shader_type == VKD3D_SHADER_TYPE_DOMAIN) + return VKD3DSPR_PATCHCONST; + return VKD3DSPR_INPUT; +} + static struct vkd3d_shader_instruction *generate_vsir_add_program_instruction( struct hlsl_ctx *ctx, struct vsir_program *program, const struct vkd3d_shader_location *loc, enum vkd3d_shader_opcode opcode, @@ -6812,7 +7493,7 @@ static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src }
static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, - struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, uint32_t map_writemask) + struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr, uint32_t map_writemask) { struct hlsl_ir_constant *constant;
@@ -6821,7 +7502,7 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, /* In SM4 constants are inlined */ constant = hlsl_ir_constant(instr); vsir_src_from_hlsl_constant_value(src, ctx, &constant->value, - vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->dimx, map_writemask); + vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->e.numeric.dimx, map_writemask); } else { @@ -6832,107 +7513,360 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, } }
-static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, - struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) -{ - VKD3D_ASSERT(instr->reg.allocated); - vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); - dst->reg.idx[0].offset = instr->reg.id; - dst->reg.dimension = VSIR_DIMENSION_VEC4; - dst->write_mask = instr->reg.writemask; -} - -static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, - struct vsir_program *program, struct hlsl_ir_constant *constant) +static struct vkd3d_shader_src_param *sm4_generate_vsir_new_idx_src(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_node *rel_offset) { - struct hlsl_ir_node *instr = &constant->node; - struct vkd3d_shader_dst_param *dst_param; - struct vkd3d_shader_src_param *src_param; - struct vkd3d_shader_instruction *ins; - - VKD3D_ASSERT(instr->reg.allocated); - VKD3D_ASSERT(constant->reg.allocated); - - if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) - return; + struct vkd3d_shader_src_param *idx_src;
- src_param = &ins->src[0]; - vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); - src_param->reg.idx[0].offset = constant->reg.id; - src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); + if (!(idx_src = vsir_program_get_src_params(program, 1))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + }
- dst_param = &ins->dst[0]; - vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - dst_param->reg.idx[0].offset = instr->reg.id; - dst_param->write_mask = instr->reg.writemask; + memset(idx_src, 0, sizeof(*idx_src)); + vsir_src_from_hlsl_node(idx_src, ctx, rel_offset, VKD3DSP_WRITEMASK_ALL); + return idx_src; }
-static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx, - struct vsir_program *program, struct hlsl_ir_expr *expr) +static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) { - struct vkd3d_shader_src_param *src_param; - struct hlsl_ir_node *instr = &expr->node; - struct vkd3d_shader_instruction *ins; - - if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) - return; - ins->flags = VKD3DSI_SAMPLE_INFO_UINT; - - vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); - - src_param = &ins->src[0]; - vsir_src_param_init(src_param, VKD3DSPR_RASTERIZER, VKD3D_DATA_UNUSED, 0); - src_param->reg.dimension = VSIR_DIMENSION_VEC4; - src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -} + const struct hlsl_ir_var *var = deref->var; + unsigned int offset_const_deref;
-/* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ -static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, - struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, - uint32_t src_mod, uint32_t dst_mod, bool map_src_swizzles) -{ - struct hlsl_ir_node *instr = &expr->node; - struct vkd3d_shader_dst_param *dst_param; - struct vkd3d_shader_src_param *src_param; - struct vkd3d_shader_instruction *ins; - unsigned int i, src_count = 0; + reg->type = var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; + reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + reg->dimension = VSIR_DIMENSION_VEC4;
- VKD3D_ASSERT(instr->reg.allocated); + VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated);
- for (i = 0; i < HLSL_MAX_OPERANDS; ++i) + if (!var->indexable) { - if (expr->operands[i].node) - src_count = i + 1; + offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); + reg->idx[0].offset += offset_const_deref / 4; + reg->idx_count = 1; } - VKD3D_ASSERT(!src_mod || src_count == 1); - - if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) - return; - - dst_param = &ins->dst[0]; - vsir_dst_from_hlsl_node(dst_param, ctx, instr); - dst_param->modifiers = dst_mod; - - for (i = 0; i < src_count; ++i) + else { - struct hlsl_ir_node *operand = expr->operands[i].node; + offset_const_deref = deref->const_offset; + reg->idx[1].offset = offset_const_deref / 4; + reg->idx_count = 2;
- src_param = &ins->src[i]; - vsir_src_from_hlsl_node(src_param, ctx, operand, - map_src_swizzles ? dst_param->write_mask : VKD3DSP_WRITEMASK_ALL); - src_param->modifiers = src_mod; + if (deref->rel_offset.node) + { + if (!(reg->idx[1].rel_addr = sm4_generate_vsir_new_idx_src(ctx, program, deref->rel_offset.node))) + return false; + } } + + *writemask = 0xf & (0xf << (offset_const_deref % 4)); + if (var->regs[HLSL_REGSET_NUMERIC].writemask) + *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); + return true; }
-/* Translate ops that have 1 src and need one instruction for each component in - * the d3dbc backend. */ -static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx *ctx, - struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode) +static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) { - struct hlsl_ir_node *operand = expr->operands[0].node; - struct hlsl_ir_node *instr = &expr->node; - struct vkd3d_shader_dst_param *dst_param; - struct vkd3d_shader_src_param *src_param; + const struct vkd3d_shader_version *version = &program->shader_version; + const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); + const struct hlsl_ir_var *var = deref->var; + + if (var->is_uniform) + { + enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); + + if (regset == HLSL_REGSET_TEXTURES) + { + reg->type = VKD3DSPR_RESOURCE; + reg->dimension = VSIR_DIMENSION_VEC4; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } + VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_UAVS) + { + reg->type = VKD3DSPR_UAV; + reg->dimension = VSIR_DIMENSION_VEC4; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } + VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_SAMPLERS) + { + reg->type = VKD3DSPR_SAMPLER; + reg->dimension = VSIR_DIMENSION_NONE; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } + VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; + + VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); + reg->type = VKD3DSPR_CONSTBUFFER; + reg->dimension = VSIR_DIMENSION_VEC4; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->buffer->reg.id; + reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ + reg->idx[2].offset = offset / 4; + reg->idx_count = 3; + } + else + { + reg->idx[0].offset = var->buffer->reg.index; + reg->idx[1].offset = offset / 4; + reg->idx_count = 2; + } + *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset & 3); + } + } + else if (var->is_input_semantic) + { + bool is_patch = hlsl_type_is_patch_array(var->data_type); + bool has_idx; + + if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + + VKD3D_ASSERT(!is_patch); + + if (has_idx) + { + reg->idx[0].offset = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + + if (shader_sm4_is_scalar_register(reg)) + reg->dimension = VSIR_DIMENSION_SCALAR; + else + reg->dimension = VSIR_DIMENSION_VEC4; + *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset % 4); + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + VKD3D_ASSERT(hlsl_reg.allocated); + + reg->type = sm4_get_semantic_register_type(version->type, ctx->is_patch_constant_func, var); + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[is_patch ? 1 : 0].offset = hlsl_reg.id; + reg->idx_count = is_patch ? 2 : 1; + *writemask = hlsl_reg.writemask; + } + + if (is_patch) + { + reg->idx[0].offset = deref->const_offset / 4; + if (deref->rel_offset.node) + { + if (!(reg->idx[0].rel_addr = sm4_generate_vsir_new_idx_src(ctx, program, deref->rel_offset.node))) + return false; + } + } + } + else if (var->is_output_semantic) + { + bool has_idx; + + if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + + if (has_idx) + { + reg->idx[0].offset = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + + if (shader_sm4_is_scalar_register(reg)) + reg->dimension = VSIR_DIMENSION_SCALAR; + else + reg->dimension = VSIR_DIMENSION_VEC4; + *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset % 4); + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + VKD3D_ASSERT(hlsl_reg.allocated); + reg->type = VKD3DSPR_OUTPUT; + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + else + { + return sm4_generate_vsir_numeric_reg_from_deref(ctx, program, reg, writemask, deref); + } + return true; +} + +static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_src_param *src_param, const struct hlsl_deref *deref, + unsigned int dst_writemask, const struct vkd3d_shader_location *loc) +{ + uint32_t writemask; + + if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref)) + return false; + src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); + return true; +} + +static bool sm4_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_dst_param *dst_param, const struct hlsl_deref *deref, + const struct vkd3d_shader_location *loc, unsigned int writemask) +{ + uint32_t reg_writemask; + + if (!sm4_generate_vsir_reg_from_deref(ctx, program, &dst_param->reg, ®_writemask, deref)) + return false; + dst_param->write_mask = hlsl_combine_writemasks(reg_writemask, writemask); + return true; +} + +static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, + struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) +{ + VKD3D_ASSERT(instr->reg.allocated); + vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); + dst->reg.idx[0].offset = instr->reg.id; + dst->reg.dimension = VSIR_DIMENSION_VEC4; + dst->write_mask = instr->reg.writemask; +} + +static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_constant *constant) +{ + struct hlsl_ir_node *instr = &constant->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + + VKD3D_ASSERT(instr->reg.allocated); + VKD3D_ASSERT(constant->reg.allocated); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return; + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = constant->reg.id; + src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->write_mask = instr->reg.writemask; +} + +static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_expr *expr) +{ + struct vkd3d_shader_src_param *src_param; + struct hlsl_ir_node *instr = &expr->node; + struct vkd3d_shader_instruction *ins; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) + return; + ins->flags = VKD3DSI_SAMPLE_INFO_UINT; + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + src_param = &ins->src[0]; + vsir_src_param_init(src_param, VKD3DSPR_RASTERIZER, VKD3D_DATA_UNUSED, 0); + src_param->reg.dimension = VSIR_DIMENSION_VEC4; + src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); +} + +/* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ +static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, + uint32_t src_mod, uint32_t dst_mod, bool map_src_swizzles) +{ + struct hlsl_ir_node *instr = &expr->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + unsigned int i, src_count = 0; + + VKD3D_ASSERT(instr->reg.allocated); + + for (i = 0; i < HLSL_MAX_OPERANDS; ++i) + { + if (expr->operands[i].node) + src_count = i + 1; + } + VKD3D_ASSERT(!src_mod || src_count == 1); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) + return; + + dst_param = &ins->dst[0]; + vsir_dst_from_hlsl_node(dst_param, ctx, instr); + dst_param->modifiers = dst_mod; + + for (i = 0; i < src_count; ++i) + { + struct hlsl_ir_node *operand = expr->operands[i].node; + + src_param = &ins->src[i]; + vsir_src_from_hlsl_node(src_param, ctx, operand, + map_src_swizzles ? dst_param->write_mask : VKD3DSP_WRITEMASK_ALL); + src_param->modifiers = src_mod; + } +} + +/* Translate ops that have 1 src and need one instruction for each component in + * the d3dbc backend. */ +static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode) +{ + struct hlsl_ir_node *operand = expr->operands[0].node; + struct hlsl_ir_node *instr = &expr->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; struct vkd3d_shader_instruction *ins; uint32_t src_swizzle; unsigned int i, c; @@ -7014,7 +7948,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, dst_type = instr->data_type;
/* Narrowing casts were already lowered. */ - VKD3D_ASSERT(src_type->dimx == dst_type->dimx); + VKD3D_ASSERT(src_type->e.numeric.dimx == dst_type->e.numeric.dimx);
switch (dst_type->e.numeric.type) { @@ -7040,9 +7974,6 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "The 'double' type is not supported for the %s profile.", ctx->profile->name); break; - - default: - vkd3d_unreachable(); } break;
@@ -7059,19 +7990,13 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
case HLSL_TYPE_INT: case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); return true;
- case HLSL_TYPE_BOOL: - hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer."); - break; - case HLSL_TYPE_DOUBLE: hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer."); break; - - default: - vkd3d_unreachable(); } break;
@@ -7096,7 +8021,6 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
case HLSL_TYPE_BOOL: /* Casts to bool should have already been lowered. */ - default: hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.", debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type)); break; @@ -7178,7 +8102,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr break;
case HLSL_OP2_DOT: - switch (expr->operands[0].node->data_type->dimx) + switch (expr->operands[0].node->data_type->e.numeric.dimx) { case 3: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); @@ -7276,7 +8200,7 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, register_index = reg.id; } else - writemask = (1u << deref->var->data_type->dimx) - 1; + writemask = (1u << deref->var->data_type->e.numeric.dimx) - 1;
if (version.type == VKD3D_SHADER_TYPE_PIXEL && (!ascii_strcasecmp(semantic_name, "PSIZE") || (!ascii_strcasecmp(semantic_name, "FOG") && version.major < 3))) @@ -7334,7 +8258,7 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, if (sm1_register_from_semantic_name(&version, deref->var->semantic.name, deref->var->semantic.index, false, &type, ®ister_index)) { - writemask = (1 << deref->var->data_type->dimx) - 1; + writemask = (1 << deref->var->data_type->e.numeric.dimx) - 1; } else { @@ -7472,9 +8396,8 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, dst_param->write_mask = instr->reg.writemask;
swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); - swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->swizzle, instr->data_type->dimx); + swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->e.numeric.dimx); swizzle = hlsl_map_swizzle(swizzle, ins->dst[0].write_mask); - swizzle = vsir_swizzle_from_hlsl(swizzle);
src_param = &ins->src[0]; VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT); @@ -7539,7 +8462,7 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program hlsl_fixme(ctx, &instr->loc, "Flatten "if" conditionals branches."); return; } - VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); + VKD3D_ASSERT(condition->data_type->e.numeric.dimx == 1 && condition->data_type->e.numeric.dimy == 1);
if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IFC, 0, 2))) return; @@ -7624,31 +8547,20 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo }
static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) + uint64_t config_flags, struct vsir_program *program) { struct vkd3d_shader_version version = {0}; - struct vkd3d_bytecode_buffer buffer = {0}; struct hlsl_block block;
version.major = ctx->profile->major_version; version.minor = ctx->profile->minor_version; version.type = ctx->profile->type; - if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) + if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) { ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; return; }
- write_sm1_uniforms(ctx, &buffer); - if (buffer.status) - { - vkd3d_free(buffer.data); - ctx->result = buffer.status; - return; - } - ctab->code = buffer.data; - ctab->size = buffer.size; - generate_vsir_signature(ctx, program, entry_func);
hlsl_block_init(&block); @@ -7659,45 +8571,413 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl sm1_generate_vsir_block(ctx, &entry_func->body, program); }
-static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_block *block) +D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) { - struct vkd3d_shader_location *loc; - struct hlsl_ir_node *vsir_instr; - - loc = &program->instructions.elements[program->instructions.count - 1].location; - - if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, program->instructions.count - 1, NULL, NULL, loc))) + switch (type->class) { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; + case HLSL_CLASS_ARRAY: + return hlsl_sm1_class(type->e.array.type); + case HLSL_CLASS_MATRIX: + VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3DXPC_MATRIX_COLUMNS; + else + return D3DXPC_MATRIX_ROWS; + case HLSL_CLASS_SCALAR: + return D3DXPC_SCALAR; + case HLSL_CLASS_STRUCT: + return D3DXPC_STRUCT; + case HLSL_CLASS_VECTOR: + return D3DXPC_VECTOR; + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_VERTEX_SHADER: + return D3DXPC_OBJECT; + case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_ERROR: + case HLSL_CLASS_PASS: + case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VOID: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; } - hlsl_block_add_instr(block, vsir_instr); -}
-static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx, - struct vsir_program *program, struct hlsl_ir_node *instr) + vkd3d_unreachable(); +} + +D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_combined_sampler) { - struct vkd3d_shader_location *loc; - struct hlsl_ir_node *vsir_instr; + enum hlsl_type_class class = type->class;
- loc = &program->instructions.elements[program->instructions.count - 1].location; + if (is_combined_sampler) + class = HLSL_CLASS_TEXTURE;
- if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, - program->instructions.count - 1, instr->data_type, &instr->reg, loc))) + switch (class) { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + switch (type->e.numeric.type) + { + case HLSL_TYPE_BOOL: + return D3DXPT_BOOL; + /* Actually double behaves differently depending on DLL version: + * For <= 36, it maps to D3DXPT_FLOAT. + * For 37-40, it maps to zero (D3DXPT_VOID). + * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_* + * values are mostly compatible with D3DXPT_*). + * However, the latter two cases look like bugs, and a reasonable + * application certainly wouldn't know what to do with them. + * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */ + case HLSL_TYPE_DOUBLE: + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3DXPT_FLOAT; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return D3DXPT_INT; + } + break; + + case HLSL_CLASS_SAMPLER: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_SAMPLER1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_SAMPLER2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_SAMPLER3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_SAMPLERCUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_SAMPLER; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } + break; + + case HLSL_CLASS_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_TEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_TEXTURECUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_TEXTURE; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } + break; + + case HLSL_CLASS_ARRAY: + return hlsl_sm1_base_type(type->e.array.type, is_combined_sampler); + + case HLSL_CLASS_STRUCT: + return D3DXPT_VOID; + + case HLSL_CLASS_STRING: + return D3DXPT_STRING; + + case HLSL_CLASS_PIXEL_SHADER: + return D3DXPT_PIXELSHADER; + + case HLSL_CLASS_VERTEX_SHADER: + return D3DXPT_VERTEXSHADER; + + case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_ERROR: + case HLSL_CLASS_PASS: + case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VOID: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + } + + vkd3d_unreachable(); +} + +static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, + struct hlsl_type *type, bool is_combined_sampler, unsigned int ctab_start) +{ + const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); + unsigned int array_size = hlsl_get_multiarray_size(type); + struct hlsl_struct_field *field; + size_t i; + + if (type->bytecode_offset) return; + + if (array_type->class == HLSL_CLASS_STRUCT) + { + unsigned int field_count = array_type->e.record.field_count; + size_t fields_offset; + + for (i = 0; i < field_count; ++i) + { + field = &array_type->e.record.fields[i]; + field->name_bytecode_offset = put_string(buffer, field->name); + write_sm1_type(buffer, field->type, false, ctab_start); + } + + fields_offset = bytecode_align(buffer) - ctab_start; + + for (i = 0; i < field_count; ++i) + { + field = &array_type->e.record.fields[i]; + put_u32(buffer, field->name_bytecode_offset - ctab_start); + put_u32(buffer, field->type->bytecode_offset - ctab_start); + } + + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3DXPC_STRUCT, D3DXPT_VOID)); + put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); + put_u32(buffer, vkd3d_make_u32(array_size, field_count)); + put_u32(buffer, fields_offset); + } + else + { + type->bytecode_offset = put_u32(buffer, + vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type, is_combined_sampler))); + if (hlsl_is_numeric_type(array_type)) + put_u32(buffer, vkd3d_make_u32(array_type->e.numeric.dimy, array_type->e.numeric.dimx)); + else + put_u32(buffer, vkd3d_make_u32(1, 1)); + put_u32(buffer, vkd3d_make_u32(array_size, 0)); + put_u32(buffer, 1); + } +} + +static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) +{ + struct hlsl_ir_var *var; + + list_remove(&to_sort->extern_entry); + + LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) + { + if (strcmp(to_sort->name, var->name) < 0) + { + list_add_before(&var->extern_entry, &to_sort->extern_entry); + return; + } + } + + list_add_tail(sorted, &to_sort->extern_entry); +} + +static void sm1_sort_externs(struct hlsl_ctx *ctx) +{ + struct list sorted = LIST_INIT(sorted); + struct hlsl_ir_var *var, *next; + + LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform) + sm1_sort_extern(&sorted, var); + } + list_move_tail(&ctx->extern_vars, &sorted); +} + +static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + size_t ctab_start, vars_offset, vars_start, creator_offset, offset; + unsigned int uniform_count = 0, r; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + if (var->semantic.name || !var->regs[r].allocated || !var->last_read) + continue; + + ++uniform_count; + + if (var->is_param && var->is_uniform) + { + char *new_name; + + if (!(new_name = hlsl_sprintf_alloc(ctx, "$%s", var->name))) + return; + vkd3d_free((char *)var->name); + var->name = new_name; + } + } + } + + sm1_sort_externs(ctx); + + ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */ + creator_offset = put_u32(buffer, 0); + if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) + put_u32(buffer, D3DVS_VERSION(ctx->profile->major_version, ctx->profile->minor_version)); + else + put_u32(buffer, D3DPS_VERSION(ctx->profile->major_version, ctx->profile->minor_version)); + put_u32(buffer, uniform_count); + vars_offset = put_u32(buffer, 0); + put_u32(buffer, 0); /* FIXME: flags */ + put_u32(buffer, 0); /* FIXME: target string */ + + vars_start = bytecode_align(buffer); + set_u32(buffer, vars_offset, vars_start - ctab_start); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + if (var->semantic.name || !var->regs[r].allocated || !var->last_read) + continue; + + put_u32(buffer, 0); /* name */ + if (r == HLSL_REGSET_NUMERIC) + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); + put_u32(buffer, var->bind_count[r]); + } + else + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index)); + put_u32(buffer, var->bind_count[r]); + } + put_u32(buffer, 0); /* type */ + put_u32(buffer, 0); /* default value */ + } + } + + uniform_count = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + size_t var_offset, name_offset; + + if (var->semantic.name || !var->regs[r].allocated || !var->last_read) + continue; + + var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); + + name_offset = put_string(buffer, var->name); + set_u32(buffer, var_offset, name_offset - ctab_start); + + write_sm1_type(buffer, var->data_type, var->is_combined_sampler, ctab_start); + set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); + + if (var->default_values) + { + unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + unsigned int comp_count = hlsl_type_component_count(var->data_type); + unsigned int default_value_offset; + unsigned int k; + + default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t)); + set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start); + + for (k = 0; k < comp_count; ++k) + { + struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); + unsigned int comp_offset; + enum hlsl_regset regset; + + comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); + if (regset == HLSL_REGSET_NUMERIC) + { + union + { + uint32_t u; + float f; + } uni = {0}; + + switch (comp_type->e.numeric.type) + { + case HLSL_TYPE_DOUBLE: + if (ctx->double_as_float_alias) + uni.u = var->default_values[k].number.u; + else + uni.u = 0; + break; + + case HLSL_TYPE_INT: + uni.f = var->default_values[k].number.i; + break; + + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + uni.f = var->default_values[k].number.u; + break; + + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + uni.u = var->default_values[k].number.u; + break; + } + + set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u); + } + } + } + + ++uniform_count; + } }
- list_add_before(&instr->entry, &vsir_instr->entry); - hlsl_replace_node(instr, vsir_instr); + offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(buffer, creator_offset, offset - ctab_start); +} + +static void sm1_generate_ctab(struct hlsl_ctx *ctx, struct vkd3d_shader_code *ctab) +{ + struct vkd3d_bytecode_buffer buffer = {0}; + + write_sm1_uniforms(ctx, &buffer); + if (buffer.status) + { + vkd3d_free(buffer.data); + ctx->result = buffer.status; + return; + } + ctab->code = buffer.data; + ctab->size = buffer.size; }
static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program, - const struct hlsl_ir_var *var, bool is_patch_constant_func, struct hlsl_block *block, - const struct vkd3d_shader_location *loc) + const struct hlsl_ir_var *var, struct hlsl_block *block, const struct vkd3d_shader_location *loc) { const struct vkd3d_shader_version *version = &program->shader_version; + const bool is_patch = hlsl_type_is_patch_array(var->data_type); const bool output = var->is_output_semantic; enum vkd3d_shader_sysval_semantic semantic; struct vkd3d_shader_dst_param *dst_param; @@ -7708,8 +8988,8 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs uint32_t write_mask; bool has_idx;
- sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping, - ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); + sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping, ctx->domain, + var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_patch); if (semantic == ~0u) semantic = VKD3D_SHADER_SV_NONE;
@@ -7732,14 +9012,19 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs break;
default: - opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3DSIH_DCL_INPUT_PS_SIV : VKD3DSIH_DCL_INPUT_SIV; + if (version->type == VKD3D_SHADER_TYPE_PIXEL) + opcode = VKD3DSIH_DCL_INPUT_PS_SIV; + else if (is_patch) + opcode = VKD3DSIH_DCL_INPUT; + else + opcode = VKD3DSIH_DCL_INPUT_SIV; break; } } else { - if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL) + if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL + || version->type == VKD3D_SHADER_TYPE_HULL) opcode = VKD3DSIH_DCL_OUTPUT; else opcode = VKD3DSIH_DCL_OUTPUT_SIV; @@ -7749,17 +9034,11 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs { if (has_idx) idx = var->semantic.index; - write_mask = (1u << var->data_type->dimx) - 1; + write_mask = (1u << var->data_type->e.numeric.dimx) - 1; } else { - if (output) - type = VKD3DSPR_OUTPUT; - else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) - type = VKD3DSPR_PATCHCONST; - else - type = VKD3DSPR_INPUT; - + type = sm4_get_semantic_register_type(version->type, ctx->is_patch_constant_func, var); has_idx = true; idx = var->regs[HLSL_REGSET_NUMERIC].id; write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; @@ -7770,13 +9049,13 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs
if (opcode == VKD3DSIH_DCL_OUTPUT) { - VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE - || semantic == VKD3D_SHADER_SV_TARGET || type != VKD3DSPR_OUTPUT); + VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || semantic == VKD3D_SHADER_SV_TARGET + || version->type == VKD3D_SHADER_TYPE_HULL || type != VKD3DSPR_OUTPUT); dst_param = &ins->declaration.dst; } else if (opcode == VKD3DSIH_DCL_INPUT || opcode == VKD3DSIH_DCL_INPUT_PS) { - VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE); + VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || is_patch); dst_param = &ins->declaration.dst; } else @@ -7787,7 +9066,14 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs dst_param = &ins->declaration.register_semantic.reg; }
- if (has_idx) + if (is_patch) + { + VKD3D_ASSERT(has_idx); + vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 2); + dst_param->reg.idx[0].offset = var->data_type->e.array.elements_count; + dst_param->reg.idx[1].offset = idx; + } + else if (has_idx) { vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); dst_param->reg.idx[0].offset = idx; @@ -7806,8 +9092,6 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs
if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL) ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); - - add_last_vsir_instr_to_block(ctx, program, block); }
static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, @@ -7819,8 +9103,6 @@ static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_ return;
ins->declaration.count = temp_count; - - add_last_vsir_instr_to_block(ctx, program, block); }
static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, @@ -7838,8 +9120,6 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT; ins->declaration.indexable_temp.component_count = comp_count; ins->declaration.indexable_temp.has_function_scope = false; - - add_last_vsir_instr_to_block(ctx, program, block); }
static bool type_is_float(const struct hlsl_type *type) @@ -7891,7 +9171,7 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, } one = { .f = 1.0 };
/* Narrowing casts were already lowered. */ - VKD3D_ASSERT(src_type->dimx == dst_type->dimx); + VKD3D_ASSERT(src_type->e.numeric.dimx == dst_type->e.numeric.dimx);
switch (dst_type->e.numeric.type) { @@ -7919,9 +9199,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, case HLSL_TYPE_DOUBLE: hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); return false; - - default: - vkd3d_unreachable(); } break;
@@ -7945,9 +9222,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, case HLSL_TYPE_DOUBLE: hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); return false; - - default: - vkd3d_unreachable(); } break;
@@ -7971,9 +9245,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, case HLSL_TYPE_DOUBLE: hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); return false; - - default: - vkd3d_unreachable(); } break;
@@ -7983,9 +9254,10 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
case HLSL_TYPE_BOOL: /* Casts to bool should have already been lowered. */ - default: - vkd3d_unreachable(); + break; } + + vkd3d_unreachable(); }
static void sm4_generate_vsir_expr_with_two_destinations(struct hlsl_ctx *ctx, struct vsir_program *program, @@ -8040,7 +9312,7 @@ static void sm4_generate_vsir_rcp_using_div(struct hlsl_ctx *ctx, value.u[2].f = 1.0f; value.u[3].f = 1.0f; vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, - VKD3D_DATA_FLOAT, instr->data_type->dimx, dst_param->write_mask); + VKD3D_DATA_FLOAT, instr->data_type->e.numeric.dimx, dst_param->write_mask);
vsir_src_from_hlsl_node(&ins->src[1], ctx, operand, dst_param->write_mask); } @@ -8270,7 +9542,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, switch (dst_type->e.numeric.type) { case HLSL_TYPE_FLOAT: - switch (expr->operands[0].node->data_type->dimx) + switch (expr->operands[0].node->data_type->e.numeric.dimx) { case 4: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); @@ -8505,188 +9777,2045 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, } }
-static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) +static bool sm4_generate_vsir_instr_store(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_store *store) { - struct vkd3d_string_buffer *dst_type_string; - struct hlsl_ir_node *instr, *next; - struct hlsl_ir_switch_case *c; + struct hlsl_ir_node *instr = &store->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins;
- LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { - if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) - { - hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); - break; - } - } + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return false;
- switch (instr->type) - { - case HLSL_IR_CALL: - vkd3d_unreachable(); + dst_param = &ins->dst[0]; + if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, + dst_param, &store->lhs, &instr->loc, store->writemask)) + return false;
- case HLSL_IR_CONSTANT: - /* In SM4 all constants are inlined. */ - break; + src_param = &ins->src[0]; + vsir_src_from_hlsl_node(src_param, ctx, store->rhs.node, dst_param->write_mask);
- case HLSL_IR_EXPR: - if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) - break; + return true; +}
- if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer)) - replace_instr_with_last_vsir_instr(ctx, program, instr); +/* Does this variable's data come directly from the API user, rather than + * being temporary or from a previous shader stage? I.e. is it a uniform or + * VS input? */ +static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) +{ + if (var->is_uniform) + return true;
- hlsl_release_string_buffer(ctx, dst_type_string); - break; + return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; +}
- case HLSL_IR_IF: - sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->then_block, program); - sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->else_block, program); - break; +static bool sm4_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_load *load) +{ + const struct vkd3d_shader_version *version = &program->shader_version; + const struct hlsl_type *type = load->node.data_type; + struct vkd3d_shader_dst_param *dst_param; + struct hlsl_ir_node *instr = &load->node; + struct vkd3d_shader_instruction *ins; + struct hlsl_constant_value value;
- case HLSL_IR_LOOP: - sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program); - break; + VKD3D_ASSERT(hlsl_is_numeric_type(type)); + if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) + { + /* Uniform bools can be specified as anything, but internal bools + * always have 0 for false and ~0 for true. Normalise that here. */
- case HLSL_IR_SWITCH: - LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry) - sm4_generate_vsir_block(ctx, &c->body, program); + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOVC, 1, 3))) + return false; + + dst_param = &ins->dst[0]; + vsir_dst_from_hlsl_node(dst_param, ctx, instr); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) + return false; + + memset(&value, 0xff, sizeof(value)); + vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, + VKD3D_DATA_UINT, type->e.numeric.dimx, dst_param->write_mask); + memset(&value, 0x00, sizeof(value)); + vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, &value, + VKD3D_DATA_UINT, type->e.numeric.dimx, dst_param->write_mask); + } + else + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return false; + + dst_param = &ins->dst[0]; + vsir_dst_from_hlsl_node(dst_param, ctx, instr); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) + return false; + } + return true; +} + +static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_resource_store *store) +{ + struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource); + struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; + struct hlsl_ir_node *instr = &store->node; + struct vkd3d_shader_instruction *ins; + unsigned int writemask; + + if (!store->resource.var->is_uniform) + { + hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); + return false; + } + + if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented."); + return false; + } + + if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_RAW, 1, 2))) + return false; + + writemask = vkd3d_write_mask_from_component_count(value->data_type->e.numeric.dimx); + if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, + &ins->dst[0], &store->resource, &instr->loc, writemask)) + return false; + } + else + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_UAV_TYPED, 1, 2))) + return false; + + if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, + &ins->dst[0], &store->resource, &instr->loc, VKD3DSP_WRITEMASK_ALL)) + return false; + } + + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); + vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL); + + return true; +} + +static bool sm4_generate_vsir_validate_texel_offset_aoffimmi(const struct hlsl_ir_node *texel_offset) +{ + struct hlsl_ir_constant *offset; + + VKD3D_ASSERT(texel_offset); + if (texel_offset->type != HLSL_IR_CONSTANT) + return false; + offset = hlsl_ir_constant(texel_offset); + + if (offset->value.u[0].i < -8 || offset->value.u[0].i > 7) + return false; + if (offset->node.data_type->e.numeric.dimx > 1 && (offset->value.u[1].i < -8 || offset->value.u[1].i > 7)) + return false; + if (offset->node.data_type->e.numeric.dimx > 2 && (offset->value.u[2].i < -8 || offset->value.u[2].i > 7)) + return false; + return true; +} + +static void sm4_generate_vsir_encode_texel_offset_as_aoffimmi( + struct vkd3d_shader_instruction *ins, const struct hlsl_ir_node *texel_offset) +{ + struct hlsl_ir_constant *offset; + + if (!texel_offset) + return; + offset = hlsl_ir_constant(texel_offset); + + ins->texel_offset.u = offset->value.u[0].i; + ins->texel_offset.v = 0; + ins->texel_offset.w = 0; + if (offset->node.data_type->e.numeric.dimx > 1) + ins->texel_offset.v = offset->value.u[1].i; + if (offset->node.data_type->e.numeric.dimx > 2) + ins->texel_offset.w = offset->value.u[2].i; +} + +static bool sm4_generate_vsir_instr_ld(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &load->resource); + bool uav = (hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_UAVS); + const struct vkd3d_shader_version *version = &program->shader_version; + bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; + const struct hlsl_ir_node *sample_index = load->sample_index.node; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *instr = &load->node; + enum hlsl_sampler_dim dim = load->sampling_dim; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_opcode opcode; + bool multisampled; + + VKD3D_ASSERT(load->load_type == HLSL_RESOURCE_LOAD); + + multisampled = resource_type->class == HLSL_CLASS_TEXTURE + && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); + + if (uav) + opcode = VKD3DSIH_LD_UAV_TYPED; + else if (raw) + opcode = VKD3DSIH_LD_RAW; + else + opcode = multisampled ? VKD3DSIH_LD2DMS : VKD3DSIH_LD; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 2 + multisampled))) + return false; + + if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return false; + } + sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + if (!uav) + { + /* Mipmap level is in the last component in the IR, but needs to be in + * the W component in the instruction. */ + unsigned int dim_count = hlsl_sampler_dim_count(dim); + + if (dim_count == 1) + coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; + if (dim_count == 2) + coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; + } + + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, coords_writemask); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + if (multisampled) + { + if (sample_index->type == HLSL_IR_CONSTANT) + vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, + &hlsl_ir_constant(sample_index)->value, VKD3D_DATA_INT, 1, 0); + else if (version->major == 4 && version->minor == 0) + hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); + else + vsir_src_from_hlsl_node(&ins->src[2], ctx, sample_index, VKD3DSP_WRITEMASK_ALL); + } + return true; +} + +static bool sm4_generate_vsir_instr_sample(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_deref *sampler = &load->sampler; + const struct hlsl_ir_node *instr = &load->node; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_opcode opcode; + unsigned int src_count; + + switch (load->load_type) + { + case HLSL_RESOURCE_SAMPLE: + opcode = VKD3DSIH_SAMPLE; + src_count = 3; + break; + + case HLSL_RESOURCE_SAMPLE_CMP: + opcode = VKD3DSIH_SAMPLE_C; + src_count = 4; + break; + + case HLSL_RESOURCE_SAMPLE_CMP_LZ: + opcode = VKD3DSIH_SAMPLE_C_LZ; + src_count = 4; + break; + + case HLSL_RESOURCE_SAMPLE_LOD: + opcode = VKD3DSIH_SAMPLE_LOD; + src_count = 4; + break; + + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + opcode = VKD3DSIH_SAMPLE_B; + src_count = 4; + break; + + case HLSL_RESOURCE_SAMPLE_GRAD: + opcode = VKD3DSIH_SAMPLE_GRAD; + src_count = 5; + break; + + default: + vkd3d_unreachable(); + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) + return false; + + if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return false; + } + sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[1], + resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[2], + sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) + return false; + + if (opcode == VKD3DSIH_SAMPLE_LOD || opcode == VKD3DSIH_SAMPLE_B) + { + vsir_src_from_hlsl_node(&ins->src[3], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL); + } + else if (opcode == VKD3DSIH_SAMPLE_C || opcode == VKD3DSIH_SAMPLE_C_LZ) + { + vsir_src_from_hlsl_node(&ins->src[3], ctx, load->cmp.node, VKD3DSP_WRITEMASK_ALL); + } + else if (opcode == VKD3DSIH_SAMPLE_GRAD) + { + vsir_src_from_hlsl_node(&ins->src[3], ctx, load->ddx.node, VKD3DSP_WRITEMASK_ALL); + vsir_src_from_hlsl_node(&ins->src[4], ctx, load->ddy.node, VKD3DSP_WRITEMASK_ALL); + } + return true; +} + +static bool sm4_generate_vsir_instr_gather(struct hlsl_ctx *ctx, struct vsir_program *program, + const struct hlsl_ir_resource_load *load, uint32_t swizzle, bool compare) +{ + const struct vkd3d_shader_version *version = &program->shader_version; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + const struct hlsl_deref *resource = &load->resource; + enum vkd3d_shader_opcode opcode = VKD3DSIH_GATHER4; + const struct hlsl_deref *sampler = &load->sampler; + const struct hlsl_ir_node *instr = &load->node; + unsigned int src_count = 3, current_arg = 0; + struct vkd3d_shader_instruction *ins; + + if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) + { + if (!vkd3d_shader_ver_ge(version, 5, 0)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); + return false; + } + opcode = VKD3DSIH_GATHER4_PO; + ++src_count; + } + + if (compare) + { + opcode = opcode == VKD3DSIH_GATHER4 ? VKD3DSIH_GATHER4_C : VKD3DSIH_GATHER4_PO_C; + ++src_count; + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) + return false; + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + vsir_src_from_hlsl_node(&ins->src[current_arg++], ctx, coords, VKD3DSP_WRITEMASK_ALL); + + if (opcode == VKD3DSIH_GATHER4_PO || opcode == VKD3DSIH_GATHER4_PO_C) + vsir_src_from_hlsl_node(&ins->src[current_arg++], ctx, texel_offset, VKD3DSP_WRITEMASK_ALL); + else + sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[current_arg++], resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[current_arg], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) + return false; + ins->src[current_arg].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[current_arg].swizzle = swizzle; + current_arg++; + + if (compare) + vsir_src_from_hlsl_node(&ins->src[current_arg++], ctx, load->cmp.node, VKD3DSP_WRITEMASK_0); + + return true; +} + +static bool sm4_generate_vsir_instr_sample_info(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *instr = &load->node; + struct hlsl_type *type = instr->data_type; + struct vkd3d_shader_instruction *ins; + + VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) + return false; + + if (type->e.numeric.type == HLSL_TYPE_UINT) + ins->flags = VKD3DSI_SAMPLE_INFO_UINT; + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[0], resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + return true; +} + +static bool sm4_generate_vsir_instr_resinfo(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *instr = &load->node; + struct hlsl_type *type = instr->data_type; + struct vkd3d_shader_instruction *ins; + + if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER + || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + hlsl_fixme(ctx, &load->node.loc, "resinfo for buffers."); + return false; + } + + VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_RESINFO, 1, 2))) + return false; + + if (type->e.numeric.type == HLSL_TYPE_UINT) + ins->flags = VKD3DSI_RESINFO_UINT; + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + vsir_src_from_hlsl_node(&ins->src[0], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + return true; +} + +static uint32_t get_gather_swizzle(enum hlsl_resource_load_type type) +{ + switch (type) + { + case HLSL_RESOURCE_GATHER_RED: + case HLSL_RESOURCE_GATHER_CMP_RED: + return VKD3D_SHADER_SWIZZLE(X, X, X, X); + + case HLSL_RESOURCE_GATHER_GREEN: + case HLSL_RESOURCE_GATHER_CMP_GREEN: + return VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y); + + case HLSL_RESOURCE_GATHER_BLUE: + case HLSL_RESOURCE_GATHER_CMP_BLUE: + return VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z); + + case HLSL_RESOURCE_GATHER_ALPHA: + case HLSL_RESOURCE_GATHER_CMP_ALPHA: + return VKD3D_SHADER_SWIZZLE(W, W, W, W); + default: + return 0; + } + + return 0; +} + +static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ + if (load->sampler.var && !load->sampler.var->is_uniform) + { + hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); + return false; + } + + if (!load->resource.var->is_uniform) + { + hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); + return false; + } + + switch (load->load_type) + { + case HLSL_RESOURCE_LOAD: + return sm4_generate_vsir_instr_ld(ctx, program, load); + + case HLSL_RESOURCE_SAMPLE: + case HLSL_RESOURCE_SAMPLE_CMP: + case HLSL_RESOURCE_SAMPLE_CMP_LZ: + case HLSL_RESOURCE_SAMPLE_LOD: + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + case HLSL_RESOURCE_SAMPLE_GRAD: + /* Combined sample expressions were lowered. */ + VKD3D_ASSERT(load->sampler.var); + return sm4_generate_vsir_instr_sample(ctx, program, load); + + case HLSL_RESOURCE_GATHER_RED: + case HLSL_RESOURCE_GATHER_GREEN: + case HLSL_RESOURCE_GATHER_BLUE: + case HLSL_RESOURCE_GATHER_ALPHA: + return sm4_generate_vsir_instr_gather(ctx, program, load, get_gather_swizzle(load->load_type), false); + + case HLSL_RESOURCE_GATHER_CMP_RED: + case HLSL_RESOURCE_GATHER_CMP_GREEN: + case HLSL_RESOURCE_GATHER_CMP_BLUE: + case HLSL_RESOURCE_GATHER_CMP_ALPHA: + return sm4_generate_vsir_instr_gather(ctx, program, load, get_gather_swizzle(load->load_type), true); + + case HLSL_RESOURCE_SAMPLE_INFO: + return sm4_generate_vsir_instr_sample_info(ctx, program, load); + + case HLSL_RESOURCE_RESINFO: + return sm4_generate_vsir_instr_resinfo(ctx, program, load); + + case HLSL_RESOURCE_SAMPLE_PROJ: + vkd3d_unreachable(); + + default: + return false; + } +} + +static bool sm4_generate_vsir_instr_interlocked(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_interlocked *interlocked) +{ + + static const enum vkd3d_shader_opcode opcodes[] = + { + [HLSL_INTERLOCKED_ADD] = VKD3DSIH_ATOMIC_IADD, + [HLSL_INTERLOCKED_AND] = VKD3DSIH_ATOMIC_AND, + [HLSL_INTERLOCKED_CMP_EXCH] = VKD3DSIH_ATOMIC_CMP_STORE, + [HLSL_INTERLOCKED_MAX] = VKD3DSIH_ATOMIC_UMAX, + [HLSL_INTERLOCKED_MIN] = VKD3DSIH_ATOMIC_UMIN, + [HLSL_INTERLOCKED_OR] = VKD3DSIH_ATOMIC_OR, + [HLSL_INTERLOCKED_XOR] = VKD3DSIH_ATOMIC_XOR, + }; + + static const enum vkd3d_shader_opcode imm_opcodes[] = + { + [HLSL_INTERLOCKED_ADD] = VKD3DSIH_IMM_ATOMIC_IADD, + [HLSL_INTERLOCKED_AND] = VKD3DSIH_IMM_ATOMIC_AND, + [HLSL_INTERLOCKED_CMP_EXCH] = VKD3DSIH_IMM_ATOMIC_CMP_EXCH, + [HLSL_INTERLOCKED_EXCH] = VKD3DSIH_IMM_ATOMIC_EXCH, + [HLSL_INTERLOCKED_MAX] = VKD3DSIH_IMM_ATOMIC_UMAX, + [HLSL_INTERLOCKED_MIN] = VKD3DSIH_IMM_ATOMIC_UMIN, + [HLSL_INTERLOCKED_OR] = VKD3DSIH_IMM_ATOMIC_OR, + [HLSL_INTERLOCKED_XOR] = VKD3DSIH_IMM_ATOMIC_XOR, + }; + + struct hlsl_ir_node *cmp_value = interlocked->cmp_value.node, *value = interlocked->value.node; + struct hlsl_ir_node *coords = interlocked->coords.node; + struct hlsl_ir_node *instr = &interlocked->node; + bool is_imm = interlocked->node.reg.allocated; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_opcode opcode; + + opcode = is_imm ? imm_opcodes[interlocked->op] : opcodes[interlocked->op]; + + if (value->data_type->e.numeric.type == HLSL_TYPE_INT) + { + if (opcode == VKD3DSIH_ATOMIC_UMAX) + opcode = VKD3DSIH_ATOMIC_IMAX; + else if (opcode == VKD3DSIH_ATOMIC_UMIN) + opcode = VKD3DSIH_ATOMIC_IMIN; + else if (opcode == VKD3DSIH_IMM_ATOMIC_UMAX) + opcode = VKD3DSIH_IMM_ATOMIC_IMAX; + else if (opcode == VKD3DSIH_IMM_ATOMIC_UMIN) + opcode = VKD3DSIH_IMM_ATOMIC_IMIN; + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, + is_imm ? 2 : 1, cmp_value ? 3 : 2))) + return false; + + if (is_imm) + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + dst_param = is_imm ? &ins->dst[1] : &ins->dst[0]; + if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, dst_param, &interlocked->dst, &instr->loc, 0)) + return false; + dst_param->reg.dimension = VSIR_DIMENSION_NONE; + + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); + if (cmp_value) + { + vsir_src_from_hlsl_node(&ins->src[1], ctx, cmp_value, VKD3DSP_WRITEMASK_ALL); + vsir_src_from_hlsl_node(&ins->src[2], ctx, value, VKD3DSP_WRITEMASK_ALL); + } + else + { + vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL); + } + + return true; +} + +static bool sm4_generate_vsir_instr_jump(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_jump *jump) +{ + const struct hlsl_ir_node *instr = &jump->node; + struct vkd3d_shader_instruction *ins; + + switch (jump->type) + { + case HLSL_IR_JUMP_BREAK: + return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_BREAK, 0, 0); + + case HLSL_IR_JUMP_CONTINUE: + return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CONTINUE, 0, 0); + + case HLSL_IR_JUMP_DISCARD_NZ: + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DISCARD, 0, 1))) + return false; + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; + + vsir_src_from_hlsl_node(&ins->src[0], ctx, jump->condition.node, VKD3DSP_WRITEMASK_ALL); + return true; + + case HLSL_IR_JUMP_RETURN: + vkd3d_unreachable(); + + default: + hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + return false; + } +} + +static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program); + +static void sm4_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_if *iff) +{ + struct hlsl_ir_node *instr = &iff->node; + struct vkd3d_shader_instruction *ins; + + VKD3D_ASSERT(iff->condition.node->data_type->e.numeric.dimx == 1); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IF, 0, 1))) + return; + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; + + vsir_src_from_hlsl_node(&ins->src[0], ctx, iff->condition.node, VKD3DSP_WRITEMASK_ALL); + + sm4_generate_vsir_block(ctx, &iff->then_block, program); + + if (!list_empty(&iff->else_block.instrs)) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ELSE, 0, 0))) + return; + sm4_generate_vsir_block(ctx, &iff->else_block, program); + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDIF, 0, 0))) + return; +} + +static void sm4_generate_vsir_instr_loop(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_loop *loop) +{ + struct hlsl_ir_node *instr = &loop->node; + struct vkd3d_shader_instruction *ins; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_LOOP, 0, 0))) + return; + + sm4_generate_vsir_block(ctx, &loop->body, program); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDLOOP, 0, 0))) + return; +} + +static void sm4_generate_vsir_instr_switch(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_switch *swi) +{ + const struct hlsl_ir_node *selector = swi->selector.node; + struct hlsl_ir_node *instr = &swi->node; + struct vkd3d_shader_instruction *ins; + struct hlsl_ir_switch_case *cas; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SWITCH, 0, 1))) + return; + vsir_src_from_hlsl_node(&ins->src[0], ctx, selector, VKD3DSP_WRITEMASK_ALL); + + LIST_FOR_EACH_ENTRY(cas, &swi->cases, struct hlsl_ir_switch_case, entry) + { + if (cas->is_default) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DEFAULT, 0, 0))) + return; + } + else + { + struct hlsl_constant_value value = {.u[0].u = cas->value}; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CASE, 0, 1))) + return; + vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, VKD3D_DATA_UINT, 1, VKD3DSP_WRITEMASK_ALL); + } + + sm4_generate_vsir_block(ctx, &cas->body, program); + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDSWITCH, 0, 0))) + return; +} + +static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) +{ + struct vkd3d_string_buffer *dst_type_string; + struct hlsl_ir_node *instr, *next; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) + { + hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); + break; + } + } + + switch (instr->type) + { + case HLSL_IR_CALL: + vkd3d_unreachable(); + + case HLSL_IR_CONSTANT: + /* In SM4 all constants are inlined. */ + break; + + case HLSL_IR_EXPR: + if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) + break; + sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer); + hlsl_release_string_buffer(ctx, dst_type_string); + break; + + case HLSL_IR_IF: + sm4_generate_vsir_instr_if(ctx, program, hlsl_ir_if(instr)); + break; + + case HLSL_IR_LOAD: + sm4_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr)); + break; + + case HLSL_IR_LOOP: + sm4_generate_vsir_instr_loop(ctx, program, hlsl_ir_loop(instr)); + break; + + case HLSL_IR_RESOURCE_LOAD: + sm4_generate_vsir_instr_resource_load(ctx, program, hlsl_ir_resource_load(instr)); + break; + + case HLSL_IR_RESOURCE_STORE: + sm4_generate_vsir_instr_resource_store(ctx, program, hlsl_ir_resource_store(instr)); + break; + + case HLSL_IR_JUMP: + sm4_generate_vsir_instr_jump(ctx, program, hlsl_ir_jump(instr)); + break; + + case HLSL_IR_STORE: + sm4_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr)); + break; + + case HLSL_IR_SWITCH: + sm4_generate_vsir_instr_switch(ctx, program, hlsl_ir_switch(instr)); + break; + + case HLSL_IR_SWIZZLE: + generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); + break; + + case HLSL_IR_INTERLOCKED: + sm4_generate_vsir_instr_interlocked(ctx, program, hlsl_ir_interlocked(instr)); + break; + + default: + break; + } + } +} + +static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, + struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) +{ + struct hlsl_block block = {0}; + struct hlsl_scope *scope; + struct hlsl_ir_var *var; + uint32_t temp_count; + + ctx->is_patch_constant_func = func == ctx->patch_constant_func; + + compute_liveness(ctx, func); + mark_indexable_vars(ctx, func); + temp_count = allocate_temp_registers(ctx, func); + if (ctx->result) + return; + program->temp_count = max(program->temp_count, temp_count); + + hlsl_block_init(&block); + + LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + { + if ((var->is_input_semantic && var->last_read) + || (var->is_output_semantic && var->first_write)) + sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, &block, &var->loc); + } + + if (temp_count) + sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); + + LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + { + if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) + continue; + if (!var->regs[HLSL_REGSET_NUMERIC].allocated) + continue; + + if (var->indexable) + { + unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; + unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; + + sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc); + } + } + } + + list_move_head(&func->body.instrs, &block.instrs); + + hlsl_block_cleanup(&block); + + sm4_generate_vsir_block(ctx, &func->body, program); + + generate_vsir_add_program_instruction(ctx, program, &func->loc, VKD3DSIH_RET, 0, 0); +} + +static int sm4_compare_extern_resources(const void *a, const void *b) +{ + const struct extern_resource *aa = a; + const struct extern_resource *bb = b; + int r; + + if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) + return r; + + if ((r = vkd3d_u32_compare(aa->space, bb->space))) + return r; + + return vkd3d_u32_compare(aa->index, bb->index); +} + +static const char *string_skip_tag(const char *string) +{ + if (!strncmp(string, "<resource>", strlen("<resource>"))) + return string + strlen("<resource>"); + return string; +} + +static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; ++i) + { + vkd3d_free(extern_resources[i].name); + } + vkd3d_free(extern_resources); +} + +static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) +{ + bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; + struct extern_resource *extern_resources = NULL; + const struct hlsl_ir_var *var; + struct hlsl_buffer *buffer; + enum hlsl_regset regset; + size_t capacity = 0; + char *name; + + *count = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (separate_components) + { + unsigned int component_count = hlsl_type_component_count(var->data_type); + unsigned int k, regset_offset; + + for (k = 0; k < component_count; ++k) + { + struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); + struct vkd3d_string_buffer *name_buffer; + + if (!hlsl_type_is_resource(component_type)) + continue; + + regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); + if (regset_offset > var->regs[regset].allocation_size) + continue; + + if (!var->objects_usage[regset][regset_offset].used) + continue; + + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, + &capacity, *count + 1, sizeof(*extern_resources)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + if (!(name_buffer = hlsl_component_to_string(ctx, var, k))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + hlsl_release_string_buffer(ctx, name_buffer); + return NULL; + } + hlsl_release_string_buffer(ctx, name_buffer); + + extern_resources[*count].var = NULL; + extern_resources[*count].buffer = NULL; + + extern_resources[*count].name = name; + extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; + + extern_resources[*count].component_type = component_type; + + extern_resources[*count].regset = regset; + extern_resources[*count].id = var->regs[regset].id; + extern_resources[*count].space = var->regs[regset].space; + extern_resources[*count].index = var->regs[regset].index + regset_offset; + extern_resources[*count].bind_count = 1; + extern_resources[*count].loc = var->loc; + + ++*count; + } + } + else + { + unsigned int r; + + if (!hlsl_type_is_resource(var->data_type)) + continue; + + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + if (!var->regs[r].allocated) + continue; + + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, + &capacity, *count + 1, sizeof(*extern_resources)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + extern_resources[*count].var = var; + extern_resources[*count].buffer = NULL; + + extern_resources[*count].name = name; + /* For some reason 5.1 resources aren't marked as + * user-packed, but cbuffers still are. */ + extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1) + && !!var->reg_reservation.reg_type; + + extern_resources[*count].component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); + + extern_resources[*count].regset = r; + extern_resources[*count].id = var->regs[r].id; + extern_resources[*count].space = var->regs[r].space; + extern_resources[*count].index = var->regs[r].index; + extern_resources[*count].bind_count = var->bind_count[r]; + extern_resources[*count].loc = var->loc; + + ++*count; + } + } + } + + LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!buffer->reg.allocated) + continue; + + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, + &capacity, *count + 1, sizeof(*extern_resources)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + if (!(name = hlsl_strdup(ctx, buffer->name))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + extern_resources[*count].var = NULL; + extern_resources[*count].buffer = buffer; + + extern_resources[*count].name = name; + extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type; + + extern_resources[*count].component_type = NULL; + + extern_resources[*count].regset = HLSL_REGSET_NUMERIC; + extern_resources[*count].id = buffer->reg.id; + extern_resources[*count].space = buffer->reg.space; + extern_resources[*count].index = buffer->reg.index; + extern_resources[*count].bind_count = 1; + extern_resources[*count].loc = buffer->loc; + + ++*count; + } + + if (extern_resources) + qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); + + return extern_resources; +} + +static void generate_vsir_scan_required_features(struct hlsl_ctx *ctx, struct vsir_program *program) +{ + struct extern_resource *extern_resources; + unsigned int extern_resources_count; + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + for (unsigned int i = 0; i < extern_resources_count; ++i) + { + if (extern_resources[i].component_type && extern_resources[i].component_type->e.resource.rasteriser_ordered) + program->features.rovs = true; + } + sm4_free_extern_resources(extern_resources, extern_resources_count); + + /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, + * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ +} + +static void generate_vsir_scan_global_flags(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_function_decl *entry_func) +{ + const struct vkd3d_shader_version *version = &program->shader_version; + struct extern_resource *extern_resources; + unsigned int extern_resources_count, i; + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + + if (version->major == 4) + { + for (i = 0; i < extern_resources_count; ++i) + { + const struct extern_resource *resource = &extern_resources[i]; + const struct hlsl_type *type = resource->component_type; + + if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) + { + program->global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS; break; + } + } + } + + sm4_free_extern_resources(extern_resources, extern_resources_count); + + if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) + program->global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; +} + +static void sm4_generate_vsir_add_dcl_constant_buffer(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_buffer *cbuffer) +{ + unsigned int array_first = cbuffer->reg.index; + unsigned int array_last = cbuffer->reg.index; /* FIXME: array end. */ + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &cbuffer->loc, VKD3DSIH_DCL_CONSTANT_BUFFER, 0, 0))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + + ins->declaration.cb.size = cbuffer->size; + + src_param = &ins->declaration.cb.src; + vsir_src_param_init(src_param, VKD3DSPR_CONSTBUFFER, VKD3D_DATA_FLOAT, 0); + src_param->reg.dimension = VSIR_DIMENSION_VEC4; + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + + ins->declaration.cb.range.space = cbuffer->reg.space; + ins->declaration.cb.range.first = array_first; + ins->declaration.cb.range.last = array_last; + + src_param->reg.idx[0].offset = cbuffer->reg.id; + src_param->reg.idx[1].offset = array_first; + src_param->reg.idx[2].offset = array_last; + src_param->reg.idx_count = 3; +} + +static void sm4_generate_vsir_add_dcl_sampler(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct extern_resource *resource) +{ + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + unsigned int i; + + VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS); + VKD3D_ASSERT(hlsl_version_lt(ctx, 5, 1) || resource->bind_count == 1); + + for (i = 0; i < resource->bind_count; ++i) + { + unsigned int array_first = resource->index + i; + unsigned int array_last = resource->index + i; /* FIXME: array end. */ + + if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + continue; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &resource->loc, VKD3DSIH_DCL_SAMPLER, 0, 0))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + + if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) + ins->flags |= VKD3DSI_SAMPLER_COMPARISON_MODE; + + src_param = &ins->declaration.sampler.src; + vsir_src_param_init(src_param, VKD3DSPR_SAMPLER, VKD3D_DATA_UNUSED, 0); + + ins->declaration.sampler.range.first = array_first; + ins->declaration.sampler.range.last = array_last; + ins->declaration.sampler.range.space = resource->space; + + src_param->reg.idx[0].offset = resource->id; + src_param->reg.idx[1].offset = array_first; + src_param->reg.idx[2].offset = array_last; + src_param->reg.idx_count = 3; + } +} + +static enum vkd3d_shader_resource_type sm4_generate_vsir_get_resource_type(const struct hlsl_type *type) +{ + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return VKD3D_SHADER_RESOURCE_TEXTURE_1D; + case HLSL_SAMPLER_DIM_2D: + return VKD3D_SHADER_RESOURCE_TEXTURE_2D; + case HLSL_SAMPLER_DIM_3D: + return VKD3D_SHADER_RESOURCE_TEXTURE_3D; + case HLSL_SAMPLER_DIM_CUBE: + return VKD3D_SHADER_RESOURCE_TEXTURE_CUBE; + case HLSL_SAMPLER_DIM_1DARRAY: + return VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY; + case HLSL_SAMPLER_DIM_2DARRAY: + return VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; + case HLSL_SAMPLER_DIM_2DMS: + return VKD3D_SHADER_RESOURCE_TEXTURE_2DMS; + case HLSL_SAMPLER_DIM_2DMSARRAY: + return VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY; + case HLSL_SAMPLER_DIM_CUBEARRAY: + return VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY; + case HLSL_SAMPLER_DIM_BUFFER: + case HLSL_SAMPLER_DIM_RAW_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + return VKD3D_SHADER_RESOURCE_BUFFER; + default: + vkd3d_unreachable(); + } +} + +static enum vkd3d_data_type sm4_generate_vsir_get_format_type(const struct hlsl_type *type) +{ + const struct hlsl_type *format = type->e.resource.format; + + switch (format->e.numeric.type) + { + case HLSL_TYPE_DOUBLE: + return VKD3D_DATA_DOUBLE; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + if (format->modifiers & HLSL_MODIFIER_UNORM) + return VKD3D_DATA_UNORM; + if (format->modifiers & HLSL_MODIFIER_SNORM) + return VKD3D_DATA_SNORM; + return VKD3D_DATA_FLOAT; + + case HLSL_TYPE_INT: + return VKD3D_DATA_INT; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + return VKD3D_DATA_UINT; + } + + vkd3d_unreachable(); +} + +static void sm4_generate_vsir_add_dcl_texture(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct extern_resource *resource, + bool uav) +{ + enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; + struct vkd3d_shader_structured_resource *structured_resource; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_semantic *semantic; + struct vkd3d_shader_instruction *ins; + struct hlsl_type *component_type; + enum vkd3d_shader_opcode opcode; + bool multisampled; + unsigned int i, j; + + VKD3D_ASSERT(resource->regset == regset); + VKD3D_ASSERT(hlsl_version_lt(ctx, 5, 1) || resource->bind_count == 1); + + component_type = resource->component_type; + + for (i = 0; i < resource->bind_count; ++i) + { + unsigned int array_first = resource->index + i; + unsigned int array_last = resource->index + i; /* FIXME: array end. */ + + if (resource->var && !resource->var->objects_usage[regset][i].used) + continue; + + if (uav) + { + switch (component_type->sampler_dim) + { + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + opcode = VKD3DSIH_DCL_UAV_STRUCTURED; + break; + case HLSL_SAMPLER_DIM_RAW_BUFFER: + opcode = VKD3DSIH_DCL_UAV_RAW; + break; + default: + opcode = VKD3DSIH_DCL_UAV_TYPED; + break; + } + } + else + { + switch (component_type->sampler_dim) + { + case HLSL_SAMPLER_DIM_RAW_BUFFER: + opcode = VKD3DSIH_DCL_RESOURCE_RAW; + break; + default: + opcode = VKD3DSIH_DCL; + break; + } + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &resource->loc, opcode, 0, 0))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + semantic = &ins->declaration.semantic; + structured_resource = &ins->declaration.structured_resource; + dst_param = &semantic->resource.reg; + vsir_dst_param_init(dst_param, uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 0); + + if (uav && component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + structured_resource->byte_stride = 4 * component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC]; + if (uav && component_type->e.resource.rasteriser_ordered) + ins->flags = VKD3DSUF_RASTERISER_ORDERED_VIEW; + + multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; + + if (!hlsl_version_ge(ctx, 4, 1) && multisampled && !component_type->sample_count) + { + hlsl_error(ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Multisampled texture object declaration needs sample count for profile %u.%u.", + ctx->profile->major_version, ctx->profile->minor_version); + } + + for (j = 0; j < 4; ++j) + semantic->resource_data_type[j] = sm4_generate_vsir_get_format_type(component_type); + + semantic->resource.range.first = array_first; + semantic->resource.range.last = array_last; + semantic->resource.range.space = resource->space; + + dst_param->reg.idx[0].offset = resource->id; + dst_param->reg.idx[1].offset = array_first; + dst_param->reg.idx[2].offset = array_last; + dst_param->reg.idx_count = 3; + + ins->resource_type = sm4_generate_vsir_get_resource_type(resource->component_type); + if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) + ins->raw = true; + if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + ins->structured = true; + ins->resource_stride = 4 * component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC]; + } + + if (multisampled) + semantic->sample_count = component_type->sample_count; + } +} + +/* OBJECTIVE: Translate all the information from ctx and entry_func to the + * vsir_program, so it can be used as input to tpf_compile() without relying + * on ctx and entry_func. */ +static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, + uint64_t config_flags, struct vsir_program *program) +{ + struct vkd3d_shader_version version = {0}; + struct extern_resource *extern_resources; + unsigned int extern_resources_count; + const struct hlsl_buffer *cbuffer; + + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + + if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + + generate_vsir_signature(ctx, program, func); + if (version.type == VKD3D_SHADER_TYPE_HULL) + generate_vsir_signature(ctx, program, ctx->patch_constant_func); + + if (version.type == VKD3D_SHADER_TYPE_COMPUTE) + { + program->thread_group_size.x = ctx->thread_count[0]; + program->thread_group_size.y = ctx->thread_count[1]; + program->thread_group_size.z = ctx->thread_count[2]; + } + else if (version.type == VKD3D_SHADER_TYPE_HULL) + { + program->input_control_point_count = ctx->input_control_point_count == UINT_MAX + ? 1 : ctx->input_control_point_count; + program->output_control_point_count = ctx->output_control_point_count; + program->tess_domain = ctx->domain; + program->tess_partitioning = ctx->partitioning; + program->tess_output_primitive = ctx->output_primitive; + } + else if (version.type == VKD3D_SHADER_TYPE_DOMAIN) + { + program->input_control_point_count = ctx->input_control_point_count == UINT_MAX + ? 0 : ctx->input_control_point_count; + program->tess_domain = ctx->domain; + } + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) + sm4_generate_vsir_add_dcl_constant_buffer(ctx, program, cbuffer); + } + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + for (unsigned int i = 0; i < extern_resources_count; ++i) + { + const struct extern_resource *resource = &extern_resources[i]; + + if (resource->regset == HLSL_REGSET_SAMPLERS) + sm4_generate_vsir_add_dcl_sampler(ctx, program, resource); + else if (resource->regset == HLSL_REGSET_TEXTURES) + sm4_generate_vsir_add_dcl_texture(ctx, program, resource, false); + else if (resource->regset == HLSL_REGSET_UAVS) + sm4_generate_vsir_add_dcl_texture(ctx, program, resource, true); + } + sm4_free_extern_resources(extern_resources, extern_resources_count); + + if (version.type == VKD3D_SHADER_TYPE_HULL) + generate_vsir_add_program_instruction(ctx, program, + &ctx->patch_constant_func->loc, VKD3DSIH_HS_CONTROL_POINT_PHASE, 0, 0); + sm4_generate_vsir_add_function(ctx, func, config_flags, program); + if (version.type == VKD3D_SHADER_TYPE_HULL) + { + generate_vsir_add_program_instruction(ctx, program, + &ctx->patch_constant_func->loc, VKD3DSIH_HS_FORK_PHASE, 0, 0); + sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); + } + + generate_vsir_scan_required_features(ctx, program); + generate_vsir_scan_global_flags(ctx, program, func); +} + +/* For some reason, for matrices, values from default value initializers end + * up in different components than from regular initializers. Default value + * initializers fill the matrix in vertical reading order + * (left-to-right top-to-bottom) instead of regular reading order + * (top-to-bottom left-to-right), so they have to be adjusted. An exception is + * that the order of matrix initializers for function parameters are row-major + * (top-to-bottom left-to-right). */ +static unsigned int get_component_index_from_default_initializer_index(struct hlsl_type *type, unsigned int index) +{ + unsigned int element_comp_count, element, x, y, i; + unsigned int base = 0; + + switch (type->class) + { + case HLSL_CLASS_MATRIX: + x = index / type->e.numeric.dimy; + y = index % type->e.numeric.dimy; + return y * type->e.numeric.dimx + x; + + case HLSL_CLASS_ARRAY: + element_comp_count = hlsl_type_component_count(type->e.array.type); + element = index / element_comp_count; + base = element * element_comp_count; + return base + get_component_index_from_default_initializer_index(type->e.array.type, index - base); + + case HLSL_CLASS_STRUCT: + for (i = 0; i < type->e.record.field_count; ++i) + { + struct hlsl_type *field_type = type->e.record.fields[i].type; + + element_comp_count = hlsl_type_component_count(field_type); + if (index - base < element_comp_count) + return base + get_component_index_from_default_initializer_index(field_type, index - base); + base += element_comp_count; + } + break; + + default: + return index; + } + + vkd3d_unreachable(); +}
- case HLSL_IR_SWIZZLE: - generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); - replace_instr_with_last_vsir_instr(ctx, program, instr); - break; +static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) +{ + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3D_SRV_DIMENSION_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3D_SRV_DIMENSION_TEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3D_SRV_DIMENSION_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3D_SRV_DIMENSION_TEXTURECUBE; + case HLSL_SAMPLER_DIM_1DARRAY: + return D3D_SRV_DIMENSION_TEXTURE1DARRAY; + case HLSL_SAMPLER_DIM_2DARRAY: + return D3D_SRV_DIMENSION_TEXTURE2DARRAY; + case HLSL_SAMPLER_DIM_2DMS: + return D3D_SRV_DIMENSION_TEXTURE2DMS; + case HLSL_SAMPLER_DIM_2DMSARRAY: + return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; + case HLSL_SAMPLER_DIM_CUBEARRAY: + return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; + case HLSL_SAMPLER_DIM_BUFFER: + case HLSL_SAMPLER_DIM_RAW_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + return D3D_SRV_DIMENSION_BUFFER; + default: + break; + }
- default: - break; - } + vkd3d_unreachable(); +} + +static enum D3D_RESOURCE_RETURN_TYPE sm4_data_type(const struct hlsl_type *type) +{ + const struct hlsl_type *format = type->e.resource.format; + + switch (format->e.numeric.type) + { + case HLSL_TYPE_DOUBLE: + return D3D_RETURN_TYPE_DOUBLE; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + if (format->modifiers & HLSL_MODIFIER_UNORM) + return D3D_RETURN_TYPE_UNORM; + if (format->modifiers & HLSL_MODIFIER_SNORM) + return D3D_RETURN_TYPE_SNORM; + return D3D_RETURN_TYPE_FLOAT; + + case HLSL_TYPE_INT: + return D3D_RETURN_TYPE_SINT; + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + return D3D_RETURN_TYPE_UINT; } + + vkd3d_unreachable(); }
-static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, - struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) +static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) { - bool is_patch_constant_func = func == ctx->patch_constant_func; - struct hlsl_block block = {0}; - struct hlsl_scope *scope; - struct hlsl_ir_var *var; - uint32_t temp_count; + switch (type->class) + { + case HLSL_CLASS_SAMPLER: + return D3D_SIT_SAMPLER; + case HLSL_CLASS_TEXTURE: + return D3D_SIT_TEXTURE; + case HLSL_CLASS_UAV: + return D3D_SIT_UAV_RWTYPED; + default: + break; + }
- compute_liveness(ctx, func); - mark_indexable_vars(ctx, func); - temp_count = allocate_temp_registers(ctx, func); - if (ctx->result) + vkd3d_unreachable(); +} + +static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) +{ + switch (type->class) + { + case HLSL_CLASS_MATRIX: + VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3D_SVC_MATRIX_COLUMNS; + else + return D3D_SVC_MATRIX_ROWS; + case HLSL_CLASS_SCALAR: + return D3D_SVC_SCALAR; + case HLSL_CLASS_VECTOR: + return D3D_SVC_VECTOR; + + case HLSL_CLASS_ARRAY: + case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_ERROR: + case HLSL_CLASS_STRUCT: + case HLSL_CLASS_PASS: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_VOID: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + } + + vkd3d_unreachable(); +} + +static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) +{ + switch (type->e.numeric.type) + { + case HLSL_TYPE_BOOL: + return D3D_SVT_BOOL; + case HLSL_TYPE_DOUBLE: + return D3D_SVT_DOUBLE; + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3D_SVT_FLOAT; + case HLSL_TYPE_INT: + return D3D_SVT_INT; + case HLSL_TYPE_UINT: + return D3D_SVT_UINT; + } + + vkd3d_unreachable(); +} + +static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) +{ + const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); + const char *name = array_type->name ? array_type->name : "<unnamed>"; + const struct hlsl_profile_info *profile = ctx->profile; + unsigned int array_size = 0; + size_t name_offset = 0; + size_t i; + + if (type->bytecode_offset) return; - program->temp_count = max(program->temp_count, temp_count);
- hlsl_block_init(&block); + if (profile->major_version >= 5) + name_offset = put_string(buffer, name);
- LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + if (type->class == HLSL_CLASS_ARRAY) + array_size = hlsl_get_multiarray_size(type); + + if (array_type->class == HLSL_CLASS_STRUCT) { - if ((var->is_input_semantic && var->last_read) - || (var->is_output_semantic && var->first_write)) - sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); + unsigned int field_count = 0; + size_t fields_offset = 0; + + for (i = 0; i < array_type->e.record.field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) + continue; + + field->name_bytecode_offset = put_string(buffer, field->name); + write_sm4_type(ctx, buffer, field->type); + ++field_count; + } + + fields_offset = bytecode_align(buffer); + + for (i = 0; i < array_type->e.record.field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) + continue; + + put_u32(buffer, field->name_bytecode_offset); + put_u32(buffer, field->type->bytecode_offset); + put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float)); + } + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID)); + put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); + put_u32(buffer, vkd3d_make_u32(array_size, field_count)); + put_u32(buffer, fields_offset); + } + else + { + VKD3D_ASSERT(array_type->class <= HLSL_CLASS_LAST_NUMERIC); + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); + put_u32(buffer, vkd3d_make_u32(array_type->e.numeric.dimy, array_type->e.numeric.dimx)); + put_u32(buffer, vkd3d_make_u32(array_size, 0)); + put_u32(buffer, 1); }
- if (temp_count) - sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); + if (profile->major_version >= 5) + { + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, name_offset); + } +}
- LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) +static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef) +{ + uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); + size_t cbuffers_offset, resources_offset, creator_offset, string_offset; + unsigned int cbuffer_count = 0, extern_resources_count, i, j; + size_t cbuffer_position, resource_position, creator_position; + const struct hlsl_profile_info *profile = ctx->profile; + struct vkd3d_bytecode_buffer buffer = {0}; + struct extern_resource *extern_resources; + const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; + + static const uint16_t target_types[] = { - LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + 0xffff, /* PIXEL */ + 0xfffe, /* VERTEX */ + 0x4753, /* GEOMETRY */ + 0x4853, /* HULL */ + 0x4453, /* DOMAIN */ + 0x4353, /* COMPUTE */ + }; + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) + ++cbuffer_count; + } + + put_u32(&buffer, cbuffer_count); + cbuffer_position = put_u32(&buffer, 0); + put_u32(&buffer, extern_resources_count); + resource_position = put_u32(&buffer, 0); + put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), + target_types[profile->type])); + put_u32(&buffer, 0); /* FIXME: compilation flags */ + creator_position = put_u32(&buffer, 0); + + if (profile->major_version >= 5) + { + put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); + put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ + put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ + put_u32(&buffer, binding_desc_size); /* size of binding desc */ + put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ + put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ + put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ + put_u32(&buffer, 0); /* unknown; possibly a null terminator */ + } + + /* Bound resources. */ + + resources_offset = bytecode_align(&buffer); + set_u32(&buffer, resource_position, resources_offset); + + for (i = 0; i < extern_resources_count; ++i) + { + const struct extern_resource *resource = &extern_resources[i]; + uint32_t flags = 0; + + if (resource->is_user_packed) + flags |= D3D_SIF_USERPACKED; + + put_u32(&buffer, 0); /* name */ + if (resource->buffer) + put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); + else + put_u32(&buffer, sm4_resource_type(resource->component_type)); + if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS) { - if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) + unsigned int dimx = resource->component_type->e.resource.format->e.numeric.dimx; + + put_u32(&buffer, sm4_data_type(resource->component_type)); + put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type)); + put_u32(&buffer, ~0u); /* FIXME: multisample count */ + flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; + } + else + { + put_u32(&buffer, 0); + put_u32(&buffer, 0); + put_u32(&buffer, 0); + } + put_u32(&buffer, resource->index); + put_u32(&buffer, resource->bind_count); + put_u32(&buffer, flags); + + if (hlsl_version_ge(ctx, 5, 1)) + { + put_u32(&buffer, resource->space); + put_u32(&buffer, resource->id); + } + } + + for (i = 0; i < extern_resources_count; ++i) + { + const struct extern_resource *resource = &extern_resources[i]; + + string_offset = put_string(&buffer, resource->name); + set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset); + } + + /* Buffers. */ + + cbuffers_offset = bytecode_align(&buffer); + set_u32(&buffer, cbuffer_position, cbuffers_offset); + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + unsigned int var_count = 0; + + if (!cbuffer->reg.allocated) + continue; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) + ++var_count; + } + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, var_count); + put_u32(&buffer, 0); /* variable offset */ + put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); + put_u32(&buffer, 0); /* FIXME: flags */ + put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); + } + + i = 0; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!cbuffer->reg.allocated) + continue; + + string_offset = put_string(&buffer, cbuffer->name); + set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); + } + + i = 0; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + size_t vars_start = bytecode_align(&buffer); + + if (!cbuffer->reg.allocated) + continue; + + set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + uint32_t flags = 0; + + if (!var->is_uniform || var->buffer != cbuffer || !var->data_type->reg_size[HLSL_REGSET_NUMERIC]) continue; - if (!var->regs[HLSL_REGSET_NUMERIC].allocated) + + if (var->is_read) + flags |= D3D_SVF_USED; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, var->buffer_offset * sizeof(float)); + put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); + put_u32(&buffer, flags); + put_u32(&buffer, 0); /* type */ + put_u32(&buffer, 0); /* default value */ + + if (profile->major_version >= 5) + { + put_u32(&buffer, 0); /* texture start */ + put_u32(&buffer, 0); /* texture count */ + put_u32(&buffer, 0); /* sampler start */ + put_u32(&buffer, 0); /* sampler count */ + } + } + + j = 0; + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); + size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); + + if (!var->is_uniform || var->buffer != cbuffer || !var->data_type->reg_size[HLSL_REGSET_NUMERIC]) continue;
- if (var->indexable) - { - unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; - unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; + string_offset = put_string(&buffer, var->name); + set_u32(&buffer, var_offset, string_offset); + write_sm4_type(ctx, &buffer, var->data_type); + set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); + + if (var->default_values) + { + unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + unsigned int comp_count = hlsl_type_component_count(var->data_type); + unsigned int default_value_offset; + unsigned int k; + + default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t)); + set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset); + + for (k = 0; k < comp_count; ++k) + { + struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); + unsigned int comp_offset, comp_index; + enum hlsl_regset regset; + + if (comp_type->class == HLSL_CLASS_STRING) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Cannot write string default value."); + continue; + } + + comp_index = get_component_index_from_default_initializer_index(var->data_type, k); + comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, comp_index, ®set); + if (regset == HLSL_REGSET_NUMERIC) + { + if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) + hlsl_fixme(ctx, &var->loc, "Write double default values."); + + set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t), + var->default_values[k].number.u); + } + } + } + + ++j; + } + } + + creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(&buffer, creator_position, creator_offset); + + sm4_free_extern_resources(extern_resources, extern_resources_count); + + if (buffer.status) + { + vkd3d_free(buffer.data); + ctx->result = buffer.status; + return; + } + rdef->code = buffer.data; + rdef->size = buffer.size; +} + +static bool loop_unrolling_generate_const_bool_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, + bool val, struct hlsl_block *block, struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *const_node, *store; + + if (!(const_node = hlsl_new_bool_constant(ctx, val, loc))) + return false; + hlsl_block_add_instr(block, const_node); + + if (!(store = hlsl_new_simple_store(ctx, var, const_node))) + return false; + hlsl_block_add_instr(block, store); + + return true; +} + +static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued); + +static bool loop_unrolling_remove_jumps_visit(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, + struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) +{ + struct hlsl_ir_jump *jump; + struct hlsl_ir_var *var; + struct hlsl_block draft; + struct hlsl_ir_if *iff; + + if (node->type == HLSL_IR_IF) + { + iff = hlsl_ir_if(node); + if (loop_unrolling_remove_jumps_recurse(ctx, &iff->then_block, loop_broken, loop_continued)) + return true; + if (loop_unrolling_remove_jumps_recurse(ctx, &iff->else_block, loop_broken, loop_continued)) + return true; + return false; + } + + if (node->type == HLSL_IR_JUMP) + { + jump = hlsl_ir_jump(node); + if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE && jump->type != HLSL_IR_JUMP_BREAK) + return false; + + hlsl_block_init(&draft);
- sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc); - } - } - } + if (jump->type == HLSL_IR_JUMP_UNRESOLVED_CONTINUE) + var = loop_continued; + else + var = loop_broken;
- list_move_head(&func->body.instrs, &block.instrs); + if (!loop_unrolling_generate_const_bool_store(ctx, var, true, &draft, &jump->node.loc)) + return false;
- hlsl_block_cleanup(&block); + list_move_before(&jump->node.entry, &draft.instrs); + list_remove(&jump->node.entry); + hlsl_free_instr(&jump->node);
- sm4_generate_vsir_block(ctx, &func->body, program); + return true; + } + + return false; }
-/* OBJECTIVE: Translate all the information from ctx and entry_func to the - * vsir_program, so it can be used as input to tpf_compile() without relying - * on ctx and entry_func. */ -static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, - uint64_t config_flags, struct vsir_program *program) +static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx, + struct hlsl_block *dst, struct hlsl_ir_var *var, struct vkd3d_shader_location *loc) { - struct vkd3d_shader_version version = {0}; + struct hlsl_ir_node *cond, *iff; + struct hlsl_block then_block; + struct hlsl_ir_load *load;
- version.major = ctx->profile->major_version; - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; + hlsl_block_init(&then_block);
- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) - { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; - } + if (!(load = hlsl_new_var_load(ctx, var, loc))) + return NULL; + hlsl_block_add_instr(dst, &load->node);
- generate_vsir_signature(ctx, program, func); - if (version.type == VKD3D_SHADER_TYPE_HULL) - generate_vsir_signature(ctx, program, ctx->patch_constant_func); + if (!(cond = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, loc))) + return NULL; + hlsl_block_add_instr(dst, cond);
- if (version.type == VKD3D_SHADER_TYPE_COMPUTE) - { - program->thread_group_size.x = ctx->thread_count[0]; - program->thread_group_size.y = ctx->thread_count[1]; - program->thread_group_size.z = ctx->thread_count[2]; - } + if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, loc))) + return NULL; + hlsl_block_add_instr(dst, iff);
- sm4_generate_vsir_add_function(ctx, func, config_flags, program); - if (version.type == VKD3D_SHADER_TYPE_HULL) - sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); + return hlsl_ir_if(iff); }
-static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, - struct hlsl_block **found_block) +static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) { - struct hlsl_ir_node *node; + struct hlsl_ir_node *node, *next;
- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + LIST_FOR_EACH_ENTRY_SAFE(node, next, &block->instrs, struct hlsl_ir_node, entry) { - if (node == stop_point) - return NULL; + struct hlsl_ir_if *broken_check, *continued_check; + struct hlsl_block draft;
- if (node->type == HLSL_IR_IF) - { - struct hlsl_ir_if *iff = hlsl_ir_if(node); - struct hlsl_ir_jump *jump = NULL; + if (!loop_unrolling_remove_jumps_visit(ctx, node, loop_broken, loop_continued)) + continue;
- if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) - return jump; - if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) - return jump; - } - else if (node->type == HLSL_IR_JUMP) - { - struct hlsl_ir_jump *jump = hlsl_ir_jump(node); + if (&next->entry == &block->instrs) + return true;
- if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) - { - *found_block = block; - return jump; - } - } + hlsl_block_init(&draft); + + broken_check = loop_unrolling_generate_var_check(ctx, &draft, loop_broken, &next->loc); + continued_check = loop_unrolling_generate_var_check(ctx, + &broken_check->then_block, loop_continued, &next->loc); + + list_move_before(&next->entry, &draft.instrs); + + list_move_slice_tail(&continued_check->then_block.instrs, &next->entry, list_tail(&block->instrs)); + + return true; }
- return NULL; + return false; +} + +static void loop_unrolling_remove_jumps(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) +{ + while (loop_unrolling_remove_jumps_recurse(ctx, block, loop_broken, loop_continued)); }
static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) @@ -8696,7 +11825,7 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru return loop->unroll_limit;
/* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ - if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + if (loop->unroll_type == HLSL_LOOP_FORCE_UNROLL) return 1024;
/* SM4 limits implicit unrolling to 254 iterations. */ @@ -8707,167 +11836,279 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru return 1024; }
-static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) +static void loop_unrolling_simplify(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct copy_propagation_state *state, unsigned int *index) +{ + size_t scopes_depth = state->scope_count - 1; + unsigned int current_index; + bool progress; + + do + { + state->stopped = false; + for (size_t i = state->scope_count; scopes_depth < i; --i) + copy_propagation_pop_scope(state); + copy_propagation_push_scope(state, ctx); + + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, block, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, block, NULL); + + current_index = index_instructions(block, *index); + progress |= copy_propagation_transform_block(ctx, block, state); + + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, block, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, block, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, block, NULL); + } while (progress); + + *index = current_index; +} + +static bool loop_unrolling_check_val(struct copy_propagation_state *state, struct hlsl_ir_var *var) +{ + struct copy_propagation_value *v; + + if (!(v = copy_propagation_get_value(state, var, 0, UINT_MAX)) + || v->node->type != HLSL_IR_CONSTANT) + return false; + + return hlsl_ir_constant(v->node)->value.u[0].u; +} + +static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *loop) { - unsigned int max_iterations, i; + struct hlsl_block draft, tmp_dst, loop_body; + struct hlsl_ir_var *broken, *continued; + unsigned int max_iterations, i, index; + struct copy_propagation_state state; + struct hlsl_ir_if *target_if; + + if (!(broken = hlsl_new_synthetic_var(ctx, "broken", + hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc))) + goto fail; + + if (!(continued = hlsl_new_synthetic_var(ctx, "continued", + hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc))) + goto fail; + + hlsl_block_init(&draft); + hlsl_block_init(&tmp_dst);
max_iterations = loop_unrolling_get_max_iterations(ctx, loop); + copy_propagation_state_init(&state, ctx); + index = 2; + state.stop = &loop->node; + loop_unrolling_simplify(ctx, block, &state, &index); + state.stopped = false; + index = loop->node.index; + + if (!loop_unrolling_generate_const_bool_store(ctx, broken, false, &tmp_dst, &loop->node.loc)) + goto fail; + hlsl_block_add_block(&draft, &tmp_dst); + + if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc)) + goto fail; + hlsl_block_add_block(&draft, &tmp_dst); + + if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc))) + goto fail; + state.stop = LIST_ENTRY(list_head(&tmp_dst.instrs), struct hlsl_ir_node, entry); + hlsl_block_add_block(&draft, &tmp_dst); + + copy_propagation_push_scope(&state, ctx); + loop_unrolling_simplify(ctx, &draft, &state, &index); + + /* As an optimization, we only remove jumps from the loop's body once. */ + if (!hlsl_clone_block(ctx, &loop_body, &loop->body)) + goto fail; + loop_unrolling_remove_jumps(ctx, &loop_body, broken, continued);
for (i = 0; i < max_iterations; ++i) { - struct hlsl_block tmp_dst, *jump_block; - struct hlsl_ir_jump *jump = NULL; + copy_propagation_push_scope(&state, ctx);
- if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) - return false; - list_move_before(&loop->node.entry, &tmp_dst.instrs); - hlsl_block_cleanup(&tmp_dst); + if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc)) + goto fail; + hlsl_block_add_block(&target_if->then_block, &tmp_dst);
- hlsl_run_const_passes(ctx, block); + if (!hlsl_clone_block(ctx, &tmp_dst, &loop_body)) + goto fail; + hlsl_block_add_block(&target_if->then_block, &tmp_dst);
- if ((jump = loop_unrolling_find_jump(loop_parent, &loop->node, &jump_block))) - { - enum hlsl_ir_jump_type type = jump->type; + loop_unrolling_simplify(ctx, &target_if->then_block, &state, &index);
- if (jump_block != loop_parent) - { - if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) - hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, - "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported."); - return false; - } + if (loop_unrolling_check_val(&state, broken)) + break;
- list_move_slice_tail(&tmp_dst.instrs, &jump->node.entry, list_prev(&loop_parent->instrs, &loop->node.entry)); - hlsl_block_cleanup(&tmp_dst); + if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc))) + goto fail; + hlsl_block_add_block(&draft, &tmp_dst);
- if (type == HLSL_IR_JUMP_BREAK) - break; - } - } + if (!hlsl_clone_block(ctx, &tmp_dst, &loop->iter)) + goto fail; + hlsl_block_add_block(&target_if->then_block, &tmp_dst); + }
/* Native will not emit an error if max_iterations has been reached with an * explicit limit. It also will not insert a loop if there are iterations left * i.e [unroll(4)] for (i = 0; i < 8; ++i)) */ if (!loop->unroll_limit && i == max_iterations) { - if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + if (loop->unroll_type == HLSL_LOOP_FORCE_UNROLL) hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, "Unable to unroll loop, maximum iterations reached (%u).", max_iterations); - return false; + goto fail; }
+ hlsl_block_cleanup(&loop_body); + copy_propagation_state_destroy(&state); + + list_move_before(&loop->node.entry, &draft.instrs); + hlsl_block_cleanup(&draft); list_remove(&loop->node.entry); hlsl_free_instr(&loop->node);
return true; + +fail: + hlsl_block_cleanup(&loop_body); + copy_propagation_state_destroy(&state); + hlsl_block_cleanup(&draft); + + return false; }
-/* - * loop_unrolling_find_unrollable_loop() is not the normal way to do things; - * normal passes simply iterate over the whole block and apply a transformation - * to every relevant instruction. However, loop unrolling can fail, and we want - * to leave the loop in its previous state in that case. That isn't a problem by - * itself, except that loop unrolling needs copy-prop in order to work properly, - * and copy-prop state at the time of the loop depends on the rest of the program - * up to that point. This means we need to clone the whole program, and at that - * point we have to search it again anyway to find the clone of the loop we were - * going to unroll. - * - * FIXME: Ideally we wouldn't clone the whole program; instead we would run copyprop - * up until the loop instruction, clone just that loop, then use copyprop again - * with the saved state after unrolling. However, copyprop currently isn't built - * for that yet [notably, it still relies on indices]. Note also this still doesn't - * really let us use transform_ir() anyway [since we don't have a good way to say - * "copyprop from the beginning of the program up to the instruction we're - * currently processing" from the callback]; we'd have to use a dedicated - * recursive function instead. */ -static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_block **containing_block) +static bool unroll_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context) { - struct hlsl_ir_node *instr; + struct hlsl_block *program = context; + struct hlsl_ir_loop *loop;
- LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + if (node->type != HLSL_IR_LOOP) + return true; + + loop = hlsl_ir_loop(node); + + if (loop->unroll_type != HLSL_LOOP_UNROLL && loop->unroll_type != HLSL_LOOP_FORCE_UNROLL) + return true; + + if (!loop_unrolling_unroll_loop(ctx, program, loop)) + loop->unroll_type = HLSL_LOOP_FORCE_LOOP; + + return true; +} + +/* We could handle this at parse time. However, loop unrolling often needs to + * know the value of variables modified in the "iter" block. It is possible to + * detect that all exit paths of a loop body modify such variables in the same + * way, but difficult, and d3dcompiler does not attempt to do so. + * In fact, d3dcompiler is capable of unrolling the following loop: + * for (int i = 0; i < 10; ++i) + * { + * if (some_uniform > 4) + * continue; + * } + * but cannot unroll the same loop with "++i" moved to each exit path: + * for (int i = 0; i < 10;) + * { + * if (some_uniform > 4) + * { + * ++i; + * continue; + * } + * ++i; + * } + */ +static bool resolve_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context) +{ + struct hlsl_ir_loop *loop; + + if (node->type != HLSL_IR_LOOP) + return true; + + loop = hlsl_ir_loop(node); + + hlsl_block_add_block(&loop->body, &loop->iter); + return true; +} + +static void resolve_continues(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *last_loop) +{ + struct hlsl_ir_node *node; + + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) { - switch (instr->type) + switch (node->type) { case HLSL_IR_LOOP: { - struct hlsl_ir_loop *nested_loop; - struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); - - if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) - return nested_loop; - - if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) - { - *containing_block = block; - return loop; - } + struct hlsl_ir_loop *loop = hlsl_ir_loop(node);
+ resolve_continues(ctx, &loop->body, loop); break; } case HLSL_IR_IF: { - struct hlsl_ir_loop *loop; - struct hlsl_ir_if *iff = hlsl_ir_if(instr); - - if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) - return loop; - if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) - return loop; - + struct hlsl_ir_if *iff = hlsl_ir_if(node); + resolve_continues(ctx, &iff->then_block, last_loop); + resolve_continues(ctx, &iff->else_block, last_loop); break; } case HLSL_IR_SWITCH: { - struct hlsl_ir_switch *s = hlsl_ir_switch(instr); + struct hlsl_ir_switch *s = hlsl_ir_switch(node); struct hlsl_ir_switch_case *c; - struct hlsl_ir_loop *loop;
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) { - if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) - return loop; + resolve_continues(ctx, &c->body, last_loop); }
break; } + case HLSL_IR_JUMP: + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(node); + + if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE) + break; + + if (last_loop->type == HLSL_LOOP_FOR) + { + struct hlsl_block draft; + + if (!hlsl_clone_block(ctx, &draft, &last_loop->iter)) + return; + + list_move_before(&node->entry, &draft.instrs); + hlsl_block_cleanup(&draft); + } + + jump->type = HLSL_IR_JUMP_CONTINUE; + break; + } default: break; } } - - return NULL; }
-static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block) +static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) { - while (true) - { - struct hlsl_block clone, *containing_block; - struct hlsl_ir_loop *loop, *cloned_loop; - - if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) - return; - - if (!hlsl_clone_block(ctx, &clone, block)) - return; - - cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); - VKD3D_ASSERT(cloned_loop); + bool progress;
- if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop)) - { - hlsl_block_cleanup(&clone); - loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP; - continue; - } + /* These are required by copy propagation, which in turn is required for + * unrolling. */ + do + { + progress = hlsl_transform_ir(ctx, split_array_copies, block, NULL); + progress |= hlsl_transform_ir(ctx, split_struct_copies, block, NULL); + } while (progress); + hlsl_transform_ir(ctx, split_matrix_copies, block, NULL);
- hlsl_block_cleanup(block); - hlsl_block_init(block); - hlsl_block_add_block(block, &clone); - } + hlsl_transform_ir(ctx, unroll_loops, block, block); + resolve_continues(ctx, block, NULL); + hlsl_transform_ir(ctx, resolve_loops, block, NULL); }
static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) @@ -9026,9 +12267,99 @@ static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru return true; }
+static bool lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) +{ + struct hlsl_ir_node *call, *rhs, *store; + struct hlsl_ir_function_decl *func; + unsigned int component_count; + struct hlsl_ir_load *load; + struct hlsl_ir_expr *expr; + struct hlsl_ir_var *lhs; + const char *template; + char *body; + + static const char template_sm2[] = + "typedef bool%u boolX;\n" + "typedef float%u floatX;\n" + "boolX isinf(floatX x)\n" + "{\n" + " floatX v = 1 / x;\n" + " v = v * v;\n" + " return v <= 0;\n" + "}\n"; + + static const char template_sm3[] = + "typedef bool%u boolX;\n" + "typedef float%u floatX;\n" + "boolX isinf(floatX x)\n" + "{\n" + " floatX v = 1 / x;\n" + " return v <= 0;\n" + "}\n"; + + static const char template_sm4[] = + "typedef bool%u boolX;\n" + "typedef float%u floatX;\n" + "boolX isinf(floatX x)\n" + "{\n" + " return (asuint(x) & 0x7fffffff) == 0x7f800000;\n" + "}\n"; + + static const char template_int[] = + "typedef bool%u boolX;\n" + "typedef float%u floatX;\n" + "boolX isinf(floatX x)\n" + "{\n" + " return false;\n" + "}"; + + if (node->type != HLSL_IR_EXPR) + return false; + + expr = hlsl_ir_expr(node); + + if (expr->op != HLSL_OP1_ISINF) + return false; + + rhs = expr->operands[0].node; + + if (hlsl_version_lt(ctx, 3, 0)) + template = template_sm2; + else if (hlsl_version_lt(ctx, 4, 0)) + template = template_sm3; + else if (type_is_integer(rhs->data_type)) + template = template_int; + else + template = template_sm4; + + component_count = hlsl_type_component_count(rhs->data_type); + if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) + return false; + + if (!(func = hlsl_compile_internal_function(ctx, "isinf", body))) + return false; + + lhs = func->parameters.vars[0]; + + if (!(store = hlsl_new_simple_store(ctx, lhs, rhs))) + return false; + hlsl_block_add_instr(block, store); + + if (!(call = hlsl_new_call(ctx, func, &node->loc))) + return false; + hlsl_block_add_instr(block, call); + + if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc))) + return false; + hlsl_block_add_instr(block, &load->node); + + return true; +} + static void process_entry_function(struct hlsl_ctx *ctx, const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func) { + const struct hlsl_ir_var *input_patch = NULL, *output_patch = NULL; const struct hlsl_profile_info *profile = ctx->profile; struct hlsl_block static_initializers, global_uniforms; struct hlsl_block *const body = &entry_func->body; @@ -9036,6 +12367,8 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct hlsl_ir_var *var; unsigned int i;
+ ctx->is_patch_constant_func = entry_func == ctx->patch_constant_func; + if (!hlsl_clone_block(ctx, &static_initializers, &ctx->static_initializers)) return; list_move_head(&body->instrs, &static_initializers.instrs); @@ -9059,10 +12392,13 @@ static void process_entry_function(struct hlsl_ctx *ctx, lower_ir(ctx, lower_f32tof16, body); }
+ lower_ir(ctx, lower_isinf, body); + lower_return(ctx, entry_func, body, false);
while (hlsl_transform_ir(ctx, lower_calls, body, NULL));
+ lower_ir(ctx, lower_complex_casts, body); lower_ir(ctx, lower_matrix_swizzles, body); lower_ir(ctx, lower_index_loads, body);
@@ -9076,12 +12412,48 @@ static void process_entry_function(struct hlsl_ctx *ctx, } else if ((var->storage_modifiers & HLSL_STORAGE_UNIFORM)) { - if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL && entry_func == ctx->patch_constant_func) + if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL && ctx->is_patch_constant_func) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Patch constant function parameter "%s" cannot be uniform.", var->name); else prepend_uniform_copy(ctx, body, var); } + else if (hlsl_type_is_patch_array(var->data_type)) + { + if (var->data_type->e.array.array_type == HLSL_ARRAY_PATCH_INPUT) + { + if (input_patch) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_DUPLICATE_PATCH, + "Found multiple InputPatch parameters."); + hlsl_note(ctx, &input_patch->loc, VKD3D_SHADER_LOG_ERROR, + "The InputPatch parameter was previously declared here."); + continue; + } + input_patch = var; + } + else + { + if (output_patch) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_DUPLICATE_PATCH, + "Found multiple OutputPatch parameters."); + hlsl_note(ctx, &output_patch->loc, VKD3D_SHADER_LOG_ERROR, + "The OutputPatch parameter was previously declared here."); + continue; + } + output_patch = var; + } + + validate_and_record_patch_type(ctx, var); + if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) + { + hlsl_fixme(ctx, &var->loc, "InputPatch/OutputPatch parameters in geometry shaders."); + continue; + } + + prepend_input_var_copy(ctx, entry_func, var); + } else { if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT @@ -9095,7 +12467,13 @@ static void process_entry_function(struct hlsl_ctx *ctx, if (var->storage_modifiers & HLSL_STORAGE_IN) prepend_input_var_copy(ctx, entry_func, var); if (var->storage_modifiers & HLSL_STORAGE_OUT) - append_output_var_copy(ctx, entry_func, var); + { + if (profile->type == VKD3D_SHADER_TYPE_HULL && !ctx->is_patch_constant_func) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Output parameters are not supported in hull shader control point functions."); + else + append_output_var_copy(ctx, entry_func, var); + } } } if (entry_func->return_var) @@ -9105,18 +12483,27 @@ static void process_entry_function(struct hlsl_ctx *ctx, "Entry point "%s" is missing a return value semantic.", entry_func->func->name);
append_output_var_copy(ctx, entry_func, entry_func->return_var); + + if (profile->type == VKD3D_SHADER_TYPE_HULL && !ctx->is_patch_constant_func) + ctx->output_control_point_type = entry_func->return_var->data_type; + } + else + { + if (profile->type == VKD3D_SHADER_TYPE_HULL && !ctx->is_patch_constant_func) + hlsl_fixme(ctx, &entry_func->loc, "Passthrough hull shader control point function."); }
- if (profile->major_version >= 4) + if (hlsl_version_ge(ctx, 4, 0)) { hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); } else { hlsl_transform_ir(ctx, lower_discard_nz, body, NULL); + hlsl_transform_ir(ctx, lower_resource_load_bias, body, NULL); }
- transform_unroll_loops(ctx, body); + loop_unrolling_execute(ctx, body); hlsl_run_const_passes(ctx, body);
remove_unreachable_code(ctx, body); @@ -9126,9 +12513,13 @@ static void process_entry_function(struct hlsl_ctx *ctx, lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_int_dot, body);
+ if (hlsl_version_lt(ctx, 4, 0)) + hlsl_transform_ir(ctx, lower_separate_samples, body, NULL); + hlsl_transform_ir(ctx, validate_dereferences, body, NULL); hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); - if (profile->major_version >= 4) + + if (hlsl_version_ge(ctx, 4, 0)) hlsl_transform_ir(ctx, lower_combined_samples, body, NULL);
do @@ -9136,7 +12527,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, while (hlsl_transform_ir(ctx, dce, body, NULL));
hlsl_transform_ir(ctx, track_components_usage, body, NULL); - sort_synthetic_separated_samplers_first(ctx); + if (hlsl_version_lt(ctx, 4, 0)) + sort_synthetic_combined_samplers_first(ctx); + else + sort_synthetic_separated_samplers_first(ctx);
if (profile->major_version < 4) { @@ -9241,14 +12635,16 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry mark_indexable_vars(ctx, entry_func); allocate_temp_registers(ctx, entry_func); allocate_const_registers(ctx, entry_func); + sort_uniforms_by_bind_count(ctx, HLSL_REGSET_SAMPLERS); + allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); } else { allocate_buffers(ctx); allocate_objects(ctx, entry_func, HLSL_REGSET_TEXTURES); allocate_objects(ctx, entry_func, HLSL_REGSET_UAVS); + allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); } - allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS);
if (TRACE_ON()) rb_for_each_entry(&ctx->functions, dump_function, ctx); @@ -9265,7 +12661,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry struct vsir_program program; int result;
- sm1_generate_vsir(ctx, entry_func, config_flags, &program, &ctab); + sm1_generate_ctab(ctx, &ctab); + if (ctx->result) + return ctx->result; + + sm1_generate_vsir(ctx, entry_func, config_flags, &program); if (ctx->result) { vsir_program_cleanup(&program); @@ -9282,18 +12682,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry case VKD3D_SHADER_TARGET_DXBC_TPF: { uint32_t config_flags = vkd3d_shader_init_config_flags(); + struct vkd3d_shader_code rdef = {0}; struct vsir_program program; int result;
+ sm4_generate_rdef(ctx, &rdef); + if (ctx->result) + return ctx->result; + sm4_generate_vsir(ctx, entry_func, config_flags, &program); if (ctx->result) { vsir_program_cleanup(&program); + vkd3d_shader_free_shader_code(&rdef); return ctx->result; }
- result = tpf_compile(&program, config_flags, out, ctx->message_context, ctx, entry_func); + result = tpf_compile(&program, config_flags, &rdef, out, ctx->message_context); vsir_program_cleanup(&program); + vkd3d_shader_free_shader_code(&rdef); return result; }
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index 716adb15f08..538f0f46854 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -30,7 +30,7 @@ static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -121,7 +121,7 @@ static bool fold_bit_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -143,20 +143,12 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { unsigned int k; - uint32_t u; - int32_t i; - double d; - float f; + uint32_t u = 0; + double d = 0.0; + float f = 0.0f; + int32_t i = 0;
- if (dst_type->dimx != src->node.data_type->dimx - || dst_type->dimy != src->node.data_type->dimy) - { - FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type), - debug_hlsl_type(ctx, dst_type)); - return false; - } - - for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < src->node.data_type->e.numeric.dimx; ++k) { switch (src->node.data_type->e.numeric.type) { @@ -195,9 +187,6 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, f = !!src->value.u[k].u; d = !!src->value.u[k].u; break; - - default: - vkd3d_unreachable(); }
switch (dst_type->e.numeric.type) @@ -220,11 +209,17 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, break;
case HLSL_TYPE_BOOL: - /* Casts to bool should have already been lowered. */ - default: - vkd3d_unreachable(); + dst->u[k].u = u ? ~0u : 0u; + break; } } + + if (src->node.data_type->e.numeric.dimx == 1) + { + for (k = 1; k < dst_type->e.numeric.dimx; ++k) + dst->u[k] = dst->u[0]; + } + return true; }
@@ -236,7 +231,7 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -262,7 +257,7 @@ static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -288,7 +283,7 @@ static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -315,7 +310,7 @@ static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -341,7 +336,7 @@ static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -386,7 +381,7 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -420,7 +415,7 @@ static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -444,7 +439,7 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -489,7 +484,7 @@ static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -529,7 +524,7 @@ static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -555,7 +550,7 @@ static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -601,7 +596,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -638,7 +633,7 @@ static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -665,7 +660,7 @@ static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -692,7 +687,7 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -717,10 +712,10 @@ static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons
VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx); + VKD3D_ASSERT(src1->node.data_type->e.numeric.dimx == src2->node.data_type->e.numeric.dimx);
dst->u[0].f = 0.0f; - for (k = 0; k < src1->node.data_type->dimx; ++k) + for (k = 0; k < src1->node.data_type->e.numeric.dimx; ++k) { switch (type) { @@ -746,11 +741,11 @@ static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); VKD3D_ASSERT(type == src3->node.data_type->e.numeric.type); - VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx); - VKD3D_ASSERT(src3->node.data_type->dimx == 1); + VKD3D_ASSERT(src1->node.data_type->e.numeric.dimx == src2->node.data_type->e.numeric.dimx); + VKD3D_ASSERT(src3->node.data_type->e.numeric.dimx == 1);
dst->u[0].f = src3->value.u[0].f; - for (k = 0; k < src1->node.data_type->dimx; ++k) + for (k = 0; k < src1->node.data_type->e.numeric.dimx; ++k) { switch (type) { @@ -777,7 +772,7 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -844,7 +839,7 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (src1->node.data_type->e.numeric.type) { @@ -862,9 +857,6 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co case HLSL_TYPE_BOOL: dst->u[k].u = src1->value.u[k].u == src2->value.u[k].u; break; - - default: - vkd3d_unreachable(); }
dst->u[k].u *= ~0u; @@ -880,7 +872,7 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (src1->node.data_type->e.numeric.type) { @@ -901,9 +893,6 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c case HLSL_TYPE_BOOL: dst->u[k].u = src1->value.u[k].u >= src2->value.u[k].u; break; - - default: - vkd3d_unreachable(); }
dst->u[k].u *= ~0u; @@ -919,7 +908,7 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (src1->node.data_type->e.numeric.type) { @@ -940,9 +929,6 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con case HLSL_TYPE_BOOL: dst->u[k].u = src1->value.u[k].u < src2->value.u[k].u; break; - - default: - vkd3d_unreachable(); }
dst->u[k].u *= ~0u; @@ -958,16 +944,13 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { unsigned int shift = src2->value.u[k].u % 32;
switch (src1->node.data_type->e.numeric.type) { case HLSL_TYPE_INT: - dst->u[k].i = src1->value.u[k].i << shift; - break; - case HLSL_TYPE_UINT: dst->u[k].u = src1->value.u[k].u << shift; break; @@ -989,7 +972,7 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -1027,7 +1010,7 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -1066,7 +1049,7 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -1108,7 +1091,7 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (type) { @@ -1142,7 +1125,7 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { switch (src1->node.data_type->e.numeric.type) { @@ -1160,9 +1143,6 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c case HLSL_TYPE_BOOL: dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; break; - - default: - vkd3d_unreachable(); }
dst->u[k].u *= ~0u; @@ -1179,7 +1159,7 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, VKD3D_ASSERT(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type); VKD3D_ASSERT(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k];
return true; @@ -1193,7 +1173,7 @@ static bool fold_rshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT);
- for (k = 0; k < dst_type->dimx; ++k) + for (k = 0; k < dst_type->e.numeric.dimx; ++k) { unsigned int shift = src2->value.u[k].u % 32;
@@ -1401,7 +1381,7 @@ static bool constant_is_zero(struct hlsl_ir_constant *const_arg) struct hlsl_type *data_type = const_arg->node.data_type; unsigned int k;
- for (k = 0; k < data_type->dimx; ++k) + for (k = 0; k < data_type->e.numeric.dimx; ++k) { switch (data_type->e.numeric.type) { @@ -1422,9 +1402,6 @@ static bool constant_is_zero(struct hlsl_ir_constant *const_arg) if (const_arg->value.u[k].u != 0) return false; break; - - default: - return false; } } return true; @@ -1435,7 +1412,7 @@ static bool constant_is_one(struct hlsl_ir_constant *const_arg) struct hlsl_type *data_type = const_arg->node.data_type; unsigned int k;
- for (k = 0; k < data_type->dimx; ++k) + for (k = 0; k < data_type->e.numeric.dimx; ++k) { switch (data_type->e.numeric.type) { @@ -1460,9 +1437,6 @@ static bool constant_is_one(struct hlsl_ir_constant *const_arg) if (const_arg->value.u[k].u != ~0) return false; break; - - default: - return false; } } return true; @@ -1544,6 +1518,260 @@ bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *in return false; }
+static bool is_op_associative(enum hlsl_ir_expr_op op, enum hlsl_base_type type) +{ + switch (op) + { + case HLSL_OP2_ADD: + case HLSL_OP2_MUL: + return type == HLSL_TYPE_INT || type == HLSL_TYPE_UINT; + + case HLSL_OP2_BIT_AND: + case HLSL_OP2_BIT_OR: + case HLSL_OP2_BIT_XOR: + case HLSL_OP2_LOGIC_AND: + case HLSL_OP2_LOGIC_OR: + case HLSL_OP2_MAX: + case HLSL_OP2_MIN: + return true; + + default: + return false; + } +} + +static bool is_op_commutative(enum hlsl_ir_expr_op op) +{ + switch (op) + { + case HLSL_OP2_ADD: + case HLSL_OP2_BIT_AND: + case HLSL_OP2_BIT_OR: + case HLSL_OP2_BIT_XOR: + case HLSL_OP2_DOT: + case HLSL_OP2_LOGIC_AND: + case HLSL_OP2_LOGIC_OR: + case HLSL_OP2_MAX: + case HLSL_OP2_MIN: + case HLSL_OP2_MUL: + return true; + + default: + return false; + } +} + +/* Returns true iff x OPL (y OPR z) = (x OPL y) OPR (x OPL z). */ +static bool is_op_left_distributive(enum hlsl_ir_expr_op opl, enum hlsl_ir_expr_op opr, enum hlsl_base_type type) +{ + switch (opl) + { + case HLSL_OP2_BIT_AND: + return opr == HLSL_OP2_BIT_OR || opr == HLSL_OP2_BIT_XOR; + + case HLSL_OP2_BIT_OR: + return opr == HLSL_OP2_BIT_AND; + + case HLSL_OP2_DOT: + case HLSL_OP2_MUL: + return opr == HLSL_OP2_ADD && (type == HLSL_TYPE_INT || type == HLSL_TYPE_UINT); + + case HLSL_OP2_MAX: + return opr == HLSL_OP2_MIN; + + case HLSL_OP2_MIN: + return opr == HLSL_OP2_MAX; + + default: + return false; + } +} + +/* Attempt to collect together the expression (x OPL a) OPR (x OPL b) -> x OPL (a OPR b). */ +static struct hlsl_ir_node *collect_exprs(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *instr, + enum hlsl_ir_expr_op opr, struct hlsl_ir_node *node1, struct hlsl_ir_node *node2) +{ + enum hlsl_base_type type = instr->data_type->e.numeric.type; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_node *ab, *res; + struct hlsl_ir_expr *e1, *e2; + enum hlsl_ir_expr_op opl; + + if (!node1 || !node2 || node1->type != HLSL_IR_EXPR || node2->type != HLSL_IR_EXPR) + return NULL; + e1 = hlsl_ir_expr(node1); + e2 = hlsl_ir_expr(node2); + opl = e1->op; + + if (e2->op != opl || !is_op_left_distributive(opl, opr, type)) + return NULL; + if (e1->operands[0].node != e2->operands[0].node) + return NULL; + if (e1->operands[1].node->type != HLSL_IR_CONSTANT || e2->operands[1].node->type != HLSL_IR_CONSTANT) + return NULL; + + if (!(ab = hlsl_new_binary_expr(ctx, opr, e1->operands[1].node, e2->operands[1].node))) + return NULL; + hlsl_block_add_instr(block, ab); + + operands[0] = e1->operands[0].node; + operands[1] = ab; + + if (!(res = hlsl_new_expr(ctx, opl, operands, instr->data_type, &instr->loc))) + return NULL; + hlsl_block_add_instr(block, res); + return res; +} + +bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *arg1, *arg2, *tmp; + struct hlsl_ir_expr *expr; + enum hlsl_base_type type; + enum hlsl_ir_expr_op op; + struct hlsl_block block; + bool progress = false; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + + if (instr->data_type->class > HLSL_CLASS_VECTOR) + return false; + + hlsl_block_init(&block); + + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + type = instr->data_type->e.numeric.type; + op = expr->op; + + if (!arg1 || !arg2) + return false; + + if ((tmp = collect_exprs(ctx, &block, instr, op, arg1, arg2))) + { + /* (x OPL a) OPR (x OPL b) -> x OPL (a OPR b) */ + list_move_before(&instr->entry, &block.instrs); + hlsl_replace_node(instr, tmp); + return true; + } + + if (is_op_commutative(op) && arg1->type == HLSL_IR_CONSTANT && arg2->type != HLSL_IR_CONSTANT) + { + /* a OP x -> x OP a */ + tmp = arg1; + arg1 = arg2; + arg2 = tmp; + progress = true; + } + + if (is_op_associative(op, type)) + { + struct hlsl_ir_expr *e1 = arg1->type == HLSL_IR_EXPR ? hlsl_ir_expr(arg1) : NULL; + struct hlsl_ir_expr *e2 = arg2->type == HLSL_IR_EXPR ? hlsl_ir_expr(arg2) : NULL; + + if (e1 && e1->op == op && e1->operands[0].node->type != HLSL_IR_CONSTANT + && e1->operands[1].node->type == HLSL_IR_CONSTANT) + { + if (arg2->type == HLSL_IR_CONSTANT) + { + /* (x OP a) OP b -> x OP (a OP b) */ + struct hlsl_ir_node *ab; + + if (!(ab = hlsl_new_binary_expr(ctx, op, e1->operands[1].node, arg2))) + goto fail; + hlsl_block_add_instr(&block, ab); + + arg1 = e1->operands[0].node; + arg2 = ab; + progress = true; + } + else if (is_op_commutative(op)) + { + /* (x OP a) OP y -> (x OP y) OP a */ + struct hlsl_ir_node *xy; + + if (!(xy = hlsl_new_binary_expr(ctx, op, e1->operands[0].node, arg2))) + goto fail; + hlsl_block_add_instr(&block, xy); + + arg1 = xy; + arg2 = e1->operands[1].node; + progress = true; + } + } + + if (!progress && arg1->type != HLSL_IR_CONSTANT && e2 && e2->op == op + && e2->operands[0].node->type != HLSL_IR_CONSTANT && e2->operands[1].node->type == HLSL_IR_CONSTANT) + { + /* x OP (y OP a) -> (x OP y) OP a */ + struct hlsl_ir_node *xy; + + if (!(xy = hlsl_new_binary_expr(ctx, op, arg1, e2->operands[0].node))) + goto fail; + hlsl_block_add_instr(&block, xy); + + arg1 = xy; + arg2 = e2->operands[1].node; + progress = true; + } + + if (!progress && e1 && (tmp = collect_exprs(ctx, &block, instr, op, e1->operands[1].node, arg2))) + { + /* (y OPR (x OPL a)) OPR (x OPL b) -> y OPR (x OPL (a OPR b)) */ + arg1 = e1->operands[0].node; + arg2 = tmp; + progress = true; + } + + if (!progress && is_op_commutative(op) && e1 + && (tmp = collect_exprs(ctx, &block, instr, op, e1->operands[0].node, arg2))) + { + /* ((x OPL a) OPR y) OPR (x OPL b) -> (x OPL (a OPR b)) OPR y */ + arg1 = tmp; + arg2 = e1->operands[1].node; + progress = true; + } + + if (!progress && e2 && (tmp = collect_exprs(ctx, &block, instr, op, arg1, e2->operands[0].node))) + { + /* (x OPL a) OPR ((x OPL b) OPR y) -> (x OPL (a OPR b)) OPR y */ + arg1 = tmp; + arg2 = e2->operands[1].node; + progress = true; + } + + if (!progress && is_op_commutative(op) && e2 + && (tmp = collect_exprs(ctx, &block, instr, op, arg1, e2->operands[1].node))) + { + /* (x OPL a) OPR (y OPR (x OPL b)) -> (x OPL (a OPR b)) OPR y */ + arg1 = tmp; + arg2 = e2->operands[0].node; + progress = true; + } + } + + if (progress) + { + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2}; + struct hlsl_ir_node *res; + + if (!(res = hlsl_new_expr(ctx, op, operands, instr->data_type, &instr->loc))) + goto fail; + hlsl_block_add_instr(&block, res); + + list_move_before(&instr->entry, &block.instrs); + hlsl_replace_node(instr, res); + } + + return progress; + +fail: + hlsl_block_cleanup(&block); + return false; +} + bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_constant_value value; @@ -1559,8 +1787,8 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst return false; src = hlsl_ir_constant(swizzle->val.node);
- for (i = 0; i < swizzle->node.data_type->dimx; ++i) - value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->swizzle, i)]; + for (i = 0; i < swizzle->node.data_type->e.numeric.dimx; ++i) + value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->u.vector, i)];
if (!(dst = hlsl_new_constant(ctx, instr->data_type, &value, &instr->loc))) return false; diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index b0e89bededb..b608fae21ac 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -1,5 +1,6 @@ /* * Copyright 2023 Conor McCarthy for CodeWeavers + * Copyright 2023-2024 Elizabeth Figura for CodeWeavers * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -201,6 +202,14 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32 src->reg.u.immconst_u32[0] = value; }
+static void vsir_src_param_init_io(struct vkd3d_shader_src_param *src, + enum vkd3d_shader_register_type reg_type, const struct signature_element *e, unsigned int idx_count) +{ + vsir_src_param_init(src, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = vsir_swizzle_from_writemask(e->mask); +} + void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) { vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); @@ -214,6 +223,14 @@ static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_ src->reg.idx[0].offset = idx; }
+static void src_param_init_parameter_vec4(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) +{ + vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); + src->reg.idx[0].offset = idx; + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = VKD3D_SHADER_NO_SWIZZLE; +} + static void vsir_src_param_init_resource(struct vkd3d_shader_src_param *src, unsigned int id, unsigned int idx) { vsir_src_param_init(src, VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 2); @@ -243,6 +260,14 @@ static void src_param_init_ssa_float(struct vkd3d_shader_src_param *src, unsigne src->reg.idx[0].offset = idx; }
+static void src_param_init_ssa_float4(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); + src->reg.idx[0].offset = idx; + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = VKD3D_SHADER_NO_SWIZZLE; +} + static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) { vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); @@ -278,6 +303,14 @@ void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader param->shift = 0; }
+static void vsir_dst_param_init_io(struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_register_type reg_type, + const struct signature_element *e, unsigned int idx_count) +{ + vsir_dst_param_init(dst, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); + dst->reg.dimension = VSIR_DIMENSION_VEC4; + dst->write_mask = e->mask; +} + static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) { vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); @@ -290,6 +323,14 @@ static void dst_param_init_ssa_float(struct vkd3d_shader_dst_param *dst, unsigne dst->reg.idx[0].offset = idx; }
+static void dst_param_init_ssa_float4(struct vkd3d_shader_dst_param *dst, unsigned int idx) +{ + vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); + dst->reg.idx[0].offset = idx; + dst->reg.dimension = VSIR_DIMENSION_VEC4; + dst->write_mask = VKD3DSP_WRITEMASK_ALL; +} + static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) { vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); @@ -662,7 +703,56 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog return VKD3D_OK; }
-static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, struct vkd3d_shader_instruction *tex) +static enum vkd3d_result vsir_program_lower_texldp(struct vsir_program *program, + struct vkd3d_shader_instruction *tex, unsigned int *tmp_idx) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_location *location = &tex->location; + struct vkd3d_shader_instruction *div_ins, *tex_ins; + size_t pos = tex - instructions->elements; + unsigned int w_comp; + + w_comp = vsir_swizzle_get_component(tex->src[0].swizzle, 3); + + if (!shader_instruction_array_insert_at(instructions, pos + 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++; + + div_ins = &instructions->elements[pos + 1]; + tex_ins = &instructions->elements[pos + 2]; + + if (!vsir_instruction_init_with_params(program, div_ins, location, VKD3DSIH_DIV, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_dst_param_init(&div_ins->dst[0], VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + div_ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + div_ins->dst[0].reg.idx[0].offset = *tmp_idx; + div_ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; + + div_ins->src[0] = tex->src[0]; + + div_ins->src[1] = tex->src[0]; + div_ins->src[1].swizzle = vkd3d_shader_create_swizzle(w_comp, w_comp, w_comp, w_comp); + + if (!vsir_instruction_init_with_params(program, tex_ins, location, VKD3DSIH_TEX, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + tex_ins->dst[0] = tex->dst[0]; + + tex_ins->src[0].reg = div_ins->dst[0].reg; + tex_ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; + + tex_ins->src[1] = tex->src[1]; + + vkd3d_shader_instruction_make_nop(tex); + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, + struct vkd3d_shader_instruction *tex, struct vkd3d_shader_message_context *message_context) { unsigned int idx = tex->src[1].reg.idx[0].offset; struct vkd3d_shader_src_param *srcs; @@ -670,16 +760,34 @@ static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, st VKD3D_ASSERT(tex->src[1].reg.idx_count == 1); VKD3D_ASSERT(!tex->src[1].reg.idx[0].rel_addr);
- if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) + if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 4))) return VKD3D_ERROR_OUT_OF_MEMORY;
srcs[0] = tex->src[0]; vsir_src_param_init_resource(&srcs[1], idx, idx); vsir_src_param_init_sampler(&srcs[2], idx, idx);
- tex->opcode = VKD3DSIH_SAMPLE; - tex->src = srcs; - tex->src_count = 3; + if (!tex->flags) + { + tex->opcode = VKD3DSIH_SAMPLE; + tex->src = srcs; + tex->src_count = 3; + } + else if (tex->flags == VKD3DSI_TEXLD_BIAS) + { + tex->opcode = VKD3DSIH_SAMPLE_B; + tex->src = srcs; + tex->src_count = 4; + + srcs[3] = tex->src[0]; + srcs[3].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); + } + else + { + vkd3d_shader_error(message_context, &tex->location, + VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Unhandled tex flags %#x.", tex->flags); + return VKD3D_ERROR_NOT_IMPLEMENTED; + }
return VKD3D_OK; } @@ -709,6 +817,76 @@ static enum vkd3d_result vsir_program_lower_texldd(struct vsir_program *program, return VKD3D_OK; }
+static enum vkd3d_result vsir_program_lower_dcl_input(struct vsir_program *program, + struct vkd3d_shader_instruction *ins, struct vsir_transformation_context *ctx) +{ + switch (ins->declaration.dst.reg.type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_OUTPUT: + case VKD3DSPR_PATCHCONST: + case VKD3DSPR_INCONTROLPOINT: + case VKD3DSPR_OUTCONTROLPOINT: + break; + + case VKD3DSPR_PRIMID: + case VKD3DSPR_FORKINSTID: + case VKD3DSPR_JOININSTID: + case VKD3DSPR_THREADID: + case VKD3DSPR_THREADGROUPID: + case VKD3DSPR_LOCALTHREADID: + case VKD3DSPR_LOCALTHREADINDEX: + case VKD3DSPR_COVERAGE: + case VKD3DSPR_TESSCOORD: + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_GSINSTID: + case VKD3DSPR_WAVELANECOUNT: + case VKD3DSPR_WAVELANEINDEX: + bitmap_set(program->io_dcls, ins->declaration.dst.reg.type); + break; + + default: + vkd3d_shader_error(ctx->message_context, &ins->location, + VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Internal compiler error: invalid register type %#x for DCL_INPUT.", + ins->declaration.dst.reg.type); + return VKD3D_ERROR; + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_lower_dcl_output(struct vsir_program *program, + struct vkd3d_shader_instruction *ins, struct vsir_transformation_context *ctx) +{ + switch (ins->declaration.dst.reg.type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_OUTPUT: + case VKD3DSPR_PATCHCONST: + case VKD3DSPR_INCONTROLPOINT: + case VKD3DSPR_OUTCONTROLPOINT: + break; + + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + case VKD3DSPR_OUTSTENCILREF: + bitmap_set(program->io_dcls, ins->declaration.dst.reg.type); + break; + + default: + vkd3d_shader_error(ctx->message_context, &ins->location, + VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Internal compiler error: invalid register type %#x for DCL_OUTPUT.", + ins->declaration.dst.reg.type); + return VKD3D_ERROR; + } + + return VKD3D_OK; +} + static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, struct vsir_transformation_context *ctx) { @@ -743,19 +921,47 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr case VKD3DSIH_DCL_GLOBAL_FLAGS: case VKD3DSIH_DCL_SAMPLER: case VKD3DSIH_DCL_TEMPS: + case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: case VKD3DSIH_DCL_THREAD_GROUP: case VKD3DSIH_DCL_UAV_TYPED: vkd3d_shader_instruction_make_nop(ins); break;
+ case VKD3DSIH_DCL_INPUT: + vsir_program_lower_dcl_input(program, ins, ctx); + vkd3d_shader_instruction_make_nop(ins); + break; + + case VKD3DSIH_DCL_OUTPUT: + vsir_program_lower_dcl_output(program, ins, ctx); + vkd3d_shader_instruction_make_nop(ins); + break; + + case VKD3DSIH_DCL_INPUT_SGV: + case VKD3DSIH_DCL_INPUT_SIV: + case VKD3DSIH_DCL_INPUT_PS: + case VKD3DSIH_DCL_INPUT_PS_SGV: + case VKD3DSIH_DCL_INPUT_PS_SIV: + case VKD3DSIH_DCL_OUTPUT_SIV: + vkd3d_shader_instruction_make_nop(ins); + break; + case VKD3DSIH_SINCOS: if ((ret = vsir_program_lower_sm1_sincos(program, ins)) < 0) return ret; break;
case VKD3DSIH_TEX: - if ((ret = vsir_program_lower_tex(program, ins)) < 0) - return ret; + if (ins->flags == VKD3DSI_TEXLD_PROJECT) + { + if ((ret = vsir_program_lower_texldp(program, ins, &tmp_idx)) < 0) + return ret; + } + else + { + if ((ret = vsir_program_lower_tex(program, ins, message_context)) < 0) + return ret; + } break;
case VKD3DSIH_TEXLDD: @@ -847,11 +1053,36 @@ static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program, return VKD3D_OK; }
+static bool add_signature_element(struct shader_signature *signature, const char *semantic_name, + uint32_t semantic_index, uint32_t mask, uint32_t register_index, + enum vkd3d_shader_interpolation_mode interpolation_mode) +{ + struct signature_element *new_elements, *e; + + if (!(new_elements = vkd3d_realloc(signature->elements, + (signature->element_count + 1) * sizeof(*signature->elements)))) + return false; + signature->elements = new_elements; + e = &signature->elements[signature->element_count++]; + memset(e, 0, sizeof(*e)); + e->semantic_name = vkd3d_strdup(semantic_name); + e->semantic_index = semantic_index; + e->sysval_semantic = VKD3D_SHADER_SV_NONE; + e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; + e->register_count = 1; + e->mask = mask; + e->used_mask = mask; + e->register_index = register_index; + e->target_location = register_index; + e->interpolation_mode = interpolation_mode; + return true; +} + static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *program, struct vsir_transformation_context *ctx) { struct shader_signature *signature = &program->output_signature; - struct signature_element *new_elements, *e; + struct signature_element *e;
if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX) return VKD3D_OK; @@ -864,22 +1095,8 @@ static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *pr return VKD3D_OK; }
- if (!(new_elements = vkd3d_realloc(signature->elements, - (signature->element_count + 1) * sizeof(*signature->elements)))) + if (!add_signature_element(signature, "COLOR", 0, VKD3DSP_WRITEMASK_ALL, SM1_COLOR_REGISTER_OFFSET, VKD3DSIM_NONE)) return VKD3D_ERROR_OUT_OF_MEMORY; - signature->elements = new_elements; - e = &signature->elements[signature->element_count++]; - memset(e, 0, sizeof(*e)); - e->semantic_name = vkd3d_strdup("COLOR"); - e->sysval_semantic = VKD3D_SHADER_SV_NONE; - e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; - e->register_count = 1; - e->mask = VKD3DSP_WRITEMASK_ALL; - e->used_mask = VKD3DSP_WRITEMASK_ALL; - e->register_index = SM1_COLOR_REGISTER_OFFSET; - e->target_location = SM1_COLOR_REGISTER_OFFSET; - e->interpolation_mode = VKD3DSIM_NONE; - return VKD3D_OK; }
@@ -975,6 +1192,7 @@ static void remove_unread_output_components(const struct shader_signature *signa switch (dst->reg.type) { case VKD3DSPR_OUTPUT: + case VKD3DSPR_TEXCRDOUT: e = vsir_signature_find_element_for_reg(signature, dst->reg.idx[0].offset, 0); break;
@@ -1034,6 +1252,9 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program
e->target_location = map->input_register_index;
+ TRACE("Mapping signature index %u (mask %#x) to target location %u (mask %#x).\n", + i, e->mask, map->input_register_index, map->input_mask); + if ((input_mask & e->mask) == input_mask) { ++subset_varying_count; @@ -1054,6 +1275,8 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program } else { + TRACE("Marking signature index %u (mask %#x) as unused.\n", i, e->mask); + e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; }
@@ -1213,12 +1436,6 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal vkd3d_shader_instruction_make_nop(ins); return; } - else if (ins->opcode == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( - &ins->declaration.dst.reg)) - { - vkd3d_shader_instruction_make_nop(ins); - return; - }
if (normaliser->phase == VKD3DSIH_INVALID || vsir_instruction_is_dcl(ins)) return; @@ -1306,6 +1523,8 @@ static enum vkd3d_result vsir_program_flatten_hull_shader_phases(struct vsir_pro flattener.phase = VKD3DSIH_INVALID; for (i = 0, locations.count = 0; i < instructions->count; ++i) flattener_eliminate_phase_related_dcls(&flattener, i, &locations); + bitmap_clear(program->io_dcls, VKD3DSPR_FORKINSTID); + bitmap_clear(program->io_dcls, VKD3DSPR_JOININSTID);
if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) return result; @@ -1369,25 +1588,15 @@ static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param } }
-static void shader_dst_param_io_init(struct vkd3d_shader_dst_param *param, const struct signature_element *e, - enum vkd3d_shader_register_type reg_type, unsigned int idx_count) -{ - param->write_mask = e->mask; - param->modifiers = 0; - param->shift = 0; - vsir_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); -} - static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser, const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst, const struct vkd3d_shader_location *location) { struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_dst_param *param; const struct signature_element *e; - unsigned int i, count; + unsigned int i, count = 2;
- for (i = 0, count = 1; i < s->element_count; ++i) + for (i = 0; i < s->element_count; ++i) count += !!s->elements[i].used_mask;
if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) @@ -1399,7 +1608,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p
ins = &normaliser->instructions.elements[dst]; vsir_instruction_init(ins, location, VKD3DSIH_HS_CONTROL_POINT_PHASE); - ins->flags = 1; + ++ins;
for (i = 0; i < s->element_count; ++i) @@ -1408,26 +1617,35 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p if (!e->used_mask) continue;
- if (e->sysval_semantic != VKD3D_SHADER_SV_NONE) - { - vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT_SIV); - param = &ins->declaration.register_semantic.reg; - ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); - } - else + vsir_instruction_init(ins, location, VKD3DSIH_MOV); + ins->dst = shader_dst_param_allocator_get(&normaliser->instructions.dst_params, 1); + ins->dst_count = 1; + ins->src = shader_src_param_allocator_get(&normaliser->instructions.src_params, 1); + ins->src_count = 1; + + if (!ins->dst || ! ins->src) { - vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT); - param = &ins->declaration.dst; + WARN("Failed to allocate dst/src param.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; }
- shader_dst_param_io_init(param, e, VKD3DSPR_INPUT, 2); - param->reg.idx[0].offset = input_control_point_count; - param->reg.idx[1].offset = e->register_index; - param->write_mask = e->mask; + vsir_dst_param_init_io(&ins->dst[0], VKD3DSPR_OUTPUT, e, 2); + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->dst[0].reg.idx[0].offset = 0; + ins->dst[0].reg.idx[0].rel_addr = normaliser->outpointid_param; + ins->dst[0].reg.idx[1].offset = e->register_index; + + vsir_src_param_init_io(&ins->src[0], VKD3DSPR_INPUT, e, 2); + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].reg.idx[0].offset = 0; + ins->src[0].reg.idx[0].rel_addr = normaliser->outpointid_param; + ins->src[0].reg.idx[1].offset = e->register_index;
++ins; }
+ vsir_instruction_init(ins, location, VKD3DSIH_RET); + return VKD3D_OK; }
@@ -1442,7 +1660,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i enum vkd3d_result ret; unsigned int i, j;
- VKD3D_ASSERT(program->normalisation_level == VSIR_NOT_NORMALISED); + VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM4);
if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL) { @@ -1545,11 +1763,6 @@ static bool io_normaliser_is_in_fork_or_join_phase(const struct io_normaliser *n return normaliser->phase == VKD3DSIH_HS_FORK_PHASE || normaliser->phase == VKD3DSIH_HS_JOIN_PHASE; }
-static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser *normaliser) -{ - return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; -} - static bool shader_signature_find_element_for_reg(const struct shader_signature *signature, unsigned int reg_idx, unsigned int write_mask, unsigned int *element_idx) { @@ -1820,7 +2033,8 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map element_count = s->element_count; if (!(elements = vkd3d_malloc(element_count * sizeof(*elements)))) return false; - memcpy(elements, s->elements, element_count * sizeof(*elements)); + if (element_count) + memcpy(elements, s->elements, element_count * sizeof(*elements));
for (i = 0; i < element_count; ++i) elements[i].sort_index = i; @@ -1920,41 +2134,26 @@ static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_sh { VKD3D_ASSERT(id_idx < ARRAY_SIZE(reg->idx) - 1);
- /* For a relative-addressed register index, move the id up a slot to separate it from the address, - * because rel_addr can be replaced with a constant offset in some cases. */ - if (reg->idx[id_idx].rel_addr) - { - reg->idx[id_idx + 1].rel_addr = NULL; - reg->idx[id_idx + 1].offset = reg->idx[id_idx].offset; - reg->idx[id_idx].offset -= register_index; - if (id_idx) - { - /* idx[id_idx] now contains the array index, which must be moved below the control point id. */ - struct vkd3d_shader_register_index tmp = reg->idx[id_idx]; - reg->idx[id_idx] = reg->idx[id_idx - 1]; - reg->idx[id_idx - 1] = tmp; - } - ++id_idx; - } - /* Otherwise we have no address for the arrayed register, so insert one. This happens e.g. where - * tessellation level registers are merged into an array because they're an array in SPIR-V. */ - else - { - ++id_idx; - memmove(®->idx[1], ®->idx[0], id_idx * sizeof(reg->idx[0])); - reg->idx[0].rel_addr = NULL; - reg->idx[0].offset = reg->idx[id_idx].offset - register_index; - } + /* Make room for the array index at the front of the array. */ + ++id_idx; + memmove(®->idx[1], ®->idx[0], id_idx * sizeof(reg->idx[0])); + + /* The array index inherits the register relative address, but is offsetted + * by the signature element register index. */ + reg->idx[0].rel_addr = reg->idx[id_idx].rel_addr; + reg->idx[0].offset = reg->idx[id_idx].offset - register_index; + reg->idx[id_idx].rel_addr = NULL; + + /* The signature index offset will be fixed in the caller. */
return id_idx; }
-static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, bool is_io_dcl, +static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, struct io_normaliser *normaliser) { unsigned int id_idx, reg_idx, write_mask, element_idx; struct vkd3d_shader_register *reg = &dst_param->reg; - struct vkd3d_shader_dst_param **dcl_params; const struct shader_signature *signature; const struct signature_element *e;
@@ -1970,26 +2169,23 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par /* Convert patch constant outputs to the patch constant register type to avoid the need * to convert compiler symbols when accessed as inputs in a later stage. */ reg->type = VKD3DSPR_PATCHCONST; - dcl_params = normaliser->pc_dcl_params; } else { signature = normaliser->output_signature; - dcl_params = normaliser->output_dcl_params; } break;
case VKD3DSPR_PATCHCONST: reg_idx = reg->idx[reg->idx_count - 1].offset; signature = normaliser->patch_constant_signature; - dcl_params = normaliser->pc_dcl_params; break;
+ case VKD3DSPR_TEXCRDOUT: case VKD3DSPR_COLOROUT: reg_idx = reg->idx[0].offset; signature = normaliser->output_signature; reg->type = VKD3DSPR_OUTPUT; - dcl_params = normaliser->output_dcl_params; break;
case VKD3DSPR_INCONTROLPOINT: @@ -1997,14 +2193,12 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par reg_idx = reg->idx[reg->idx_count - 1].offset; signature = normaliser->input_signature; reg->type = VKD3DSPR_INPUT; - dcl_params = normaliser->input_dcl_params; break;
case VKD3DSPR_ATTROUT: reg_idx = SM1_COLOR_REGISTER_OFFSET + reg->idx[0].offset; signature = normaliser->output_signature; reg->type = VKD3DSPR_OUTPUT; - dcl_params = normaliser->output_dcl_params; break;
case VKD3DSPR_RASTOUT: @@ -2014,7 +2208,6 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par reg_idx = SM1_RASTOUT_REGISTER_OFFSET + reg->idx[0].offset; signature = normaliser->output_signature; reg->type = VKD3DSPR_OUTPUT; - dcl_params = normaliser->output_dcl_params; /* Fog and point size are scalar, but fxc/d3dcompiler emits a full * write mask when writing to them. */ if (reg->idx[0].offset > 0) @@ -2030,54 +2223,8 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par vkd3d_unreachable(); e = &signature->elements[element_idx];
- if (is_io_dcl) - { - /* Validated in the TPF reader. */ - VKD3D_ASSERT(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); - - if (dcl_params[element_idx]) - { - /* Merge split declarations into a single one. */ - dcl_params[element_idx]->write_mask |= dst_param->write_mask; - /* Turn this into a nop. */ - return false; - } - else - { - dcl_params[element_idx] = dst_param; - } - } - - if (io_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) - { - if (is_io_dcl) - { - /* Emit an array size for the control points for consistency with inputs. */ - reg->idx[0].offset = normaliser->output_control_point_count; - } - else - { - /* The control point id param. */ - VKD3D_ASSERT(reg->idx[0].rel_addr); - } - id_idx = 1; - } - if ((e->register_count > 1 || vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) - { - if (is_io_dcl) - { - /* For control point I/O, idx 0 contains the control point count. - * Ensure it is moved up to the next slot. */ - reg->idx[id_idx].offset = reg->idx[0].offset; - reg->idx[0].offset = e->register_count; - ++id_idx; - } - else - { - id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); - } - } + id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index);
/* Replace the register index with the signature element index */ reg->idx[id_idx].offset = element_idx; @@ -2129,6 +2276,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par
case VKD3DSPR_OUTCONTROLPOINT: reg->type = VKD3DSPR_OUTPUT; + if (io_normaliser_is_in_fork_or_join_phase(normaliser)) + normaliser->use_vocp = true; /* fall through */ case VKD3DSPR_OUTPUT: reg_idx = reg->idx[reg->idx_count - 1].offset; @@ -2136,8 +2285,6 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par break;
case VKD3DSPR_TEXTURE: - if (normaliser->shader_type != VKD3D_SHADER_TYPE_PIXEL) - return; reg->type = VKD3DSPR_INPUT; reg_idx = reg->idx[0].offset; signature = normaliser->input_signature; @@ -2169,40 +2316,10 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par static void shader_instruction_normalise_io_params(struct vkd3d_shader_instruction *ins, struct io_normaliser *normaliser) { - struct vkd3d_shader_register *reg; unsigned int i;
switch (ins->opcode) { - case VKD3DSIH_DCL_INPUT: - if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) - { - reg = &ins->declaration.dst.reg; - - if (reg->type == VKD3DSPR_OUTCONTROLPOINT) - normaliser->use_vocp = true; - - /* We don't need to keep OUTCONTROLPOINT or PATCHCONST input declarations since their - * equivalents were declared earlier, but INCONTROLPOINT may be the first occurrence. */ - if (reg->type == VKD3DSPR_OUTCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST) - vkd3d_shader_instruction_make_nop(ins); - else if (reg->type == VKD3DSPR_INCONTROLPOINT) - reg->type = VKD3DSPR_INPUT; - } - /* fall through */ - case VKD3DSIH_DCL_INPUT_PS: - case VKD3DSIH_DCL_OUTPUT: - if (!shader_dst_param_io_normalise(&ins->declaration.dst, true, normaliser)) - vkd3d_shader_instruction_make_nop(ins); - break; - case VKD3DSIH_DCL_INPUT_SGV: - case VKD3DSIH_DCL_INPUT_SIV: - case VKD3DSIH_DCL_INPUT_PS_SGV: - case VKD3DSIH_DCL_INPUT_PS_SIV: - case VKD3DSIH_DCL_OUTPUT_SIV: - if (!shader_dst_param_io_normalise(&ins->declaration.register_semantic.reg, true, normaliser)) - vkd3d_shader_instruction_make_nop(ins); - break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: @@ -2215,7 +2332,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi if (vsir_instruction_is_dcl(ins)) break; for (i = 0; i < ins->dst_count; ++i) - shader_dst_param_io_normalise(&ins->dst[i], false, normaliser); + shader_dst_param_io_normalise(&ins->dst[i], normaliser); for (i = 0; i < ins->src_count; ++i) shader_src_param_io_normalise(&ins->src[i], normaliser); break; @@ -2275,7 +2392,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program
program->instructions = normaliser.instructions; program->use_vocp = normaliser.use_vocp; - program->normalisation_level = VSIR_FULLY_NORMALISED_IO; + program->normalisation_level = VSIR_NORMALISED_SM6; return VKD3D_OK; }
@@ -2299,16 +2416,12 @@ static bool get_flat_constant_register_type(const struct vkd3d_shader_register * { enum vkd3d_shader_register_type type; enum vkd3d_shader_d3dbc_constant_register set; - uint32_t offset; } regs[] = { - {VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 0}, - {VKD3DSPR_CONST2, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048}, - {VKD3DSPR_CONST3, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096}, - {VKD3DSPR_CONST4, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144}, - {VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, 0}, - {VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, 0}, + {VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER}, + {VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER}, + {VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER}, };
unsigned int i; @@ -2324,7 +2437,7 @@ static bool get_flat_constant_register_type(const struct vkd3d_shader_register * }
*set = regs[i].set; - *index = regs[i].offset + reg->idx[0].offset; + *index = reg->idx[0].offset; return true; } } @@ -3726,7 +3839,8 @@ static enum vkd3d_result vsir_cfg_structure_list_append_from_region(struct vsir_ sizeof(*list->structures))) return VKD3D_ERROR_OUT_OF_MEMORY;
- memcpy(&list->structures[list->count], begin, size * sizeof(*begin)); + if (size) + memcpy(&list->structures[list->count], begin, size * sizeof(*begin));
list->count += size;
@@ -4663,7 +4777,8 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ } }
- qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals); + if (cfg->loop_intervals) + qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals);
if (TRACE_ON()) for (i = 0; i < cfg->loop_interval_count; ++i) @@ -6634,52 +6749,486 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr return VKD3D_OK; }
-struct validation_context +static enum vkd3d_result vsir_program_add_fog_input(struct vsir_program *program, + struct vsir_transformation_context *ctx) { - struct vkd3d_shader_message_context *message_context; - const struct vsir_program *program; - size_t instruction_idx; - struct vkd3d_shader_location null_location; - bool invalid_instruction_idx; - enum vkd3d_result status; - bool dcl_temps_found; - enum vkd3d_shader_opcode phase; - bool inside_block; - - struct validation_context_temp_data - { - enum vsir_dimension dimension; - size_t first_seen; - } *temps; + struct shader_signature *signature = &program->input_signature; + uint32_t register_idx = 0;
- struct validation_context_ssa_data - { - enum vsir_dimension dimension; - enum vkd3d_data_type data_type; - size_t first_seen; - uint32_t write_mask; - uint32_t read_mask; - size_t first_assigned; - } *ssas; + if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + return VKD3D_OK;
- enum vkd3d_shader_opcode *blocks; - size_t depth; - size_t blocks_capacity; -}; + if (!vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE)) + return VKD3D_OK;
-static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, - enum vkd3d_shader_error error, const char *format, ...) -{ - struct vkd3d_string_buffer buf; - va_list args; + /* We could check the value and skip this if NONE, but chances are if a + * user specifies the fog fragment mode as a parameter, they'll want to + * enable it dynamically. Always specifying it (and hence always outputting + * it from the VS) avoids an extra VS variant. */
- vkd3d_string_buffer_init(&buf); + if (vsir_signature_find_element_by_name(signature, "FOG", 0)) + return VKD3D_OK;
- va_start(args, format); - vkd3d_string_buffer_vprintf(&buf, format, args); - va_end(args); + for (unsigned int i = 0; i < signature->element_count; ++i) + register_idx = max(register_idx, signature->elements[i].register_index + 1);
- if (ctx->invalid_instruction_idx) + if (!add_signature_element(signature, "FOG", 0, VKD3DSP_WRITEMASK_0, register_idx, VKD3DSIM_LINEAR)) + return VKD3D_ERROR_OUT_OF_MEMORY; + return VKD3D_OK; +} + +static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *program, + const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_fog_fragment_mode mode, + uint32_t fog_signature_idx, uint32_t colour_signature_idx, uint32_t colour_temp, + size_t *ret_pos, struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_location loc = ret->location; + uint32_t ssa_factor = program->ssa_count++; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; + uint32_t ssa_temp, ssa_temp2; + + switch (mode) + { + case VKD3D_SHADER_FOG_FRAGMENT_LINEAR: + /* We generate the following code: + * + * add sr0, FOG_END, -vFOG.x + * mul_sat srFACTOR, sr0, FOG_SCALE + */ + if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) + return VKD3D_ERROR_OUT_OF_MEMORY; + *ret_pos = pos + 4; + + ssa_temp = program->ssa_count++; + + ins = &program->instructions.elements[pos]; + + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_ADD, 1, 2); + dst_param_init_ssa_float(&ins->dst[0], ssa_temp); + src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_END, VKD3D_DATA_FLOAT); + vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1); + ins->src[1].reg.idx[0].offset = fog_signature_idx; + ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + ins->src[1].modifiers = VKD3DSPSM_NEG; + + vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MUL, 1, 2); + dst_param_init_ssa_float(&ins->dst[0], ssa_factor); + ins->dst[0].modifiers = VKD3DSPDM_SATURATE; + src_param_init_ssa_float(&ins->src[0], ssa_temp); + src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); + break; + + case VKD3D_SHADER_FOG_FRAGMENT_EXP: + /* We generate the following code: + * + * mul sr0, FOG_SCALE, vFOG.x + * exp_sat srFACTOR, -sr0 + */ + if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) + return VKD3D_ERROR_OUT_OF_MEMORY; + *ret_pos = pos + 4; + + ssa_temp = program->ssa_count++; + + ins = &program->instructions.elements[pos]; + + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MUL, 1, 2); + dst_param_init_ssa_float(&ins->dst[0], ssa_temp); + src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); + vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1); + ins->src[1].reg.idx[0].offset = fog_signature_idx; + ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + + vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_EXP, 1, 1); + dst_param_init_ssa_float(&ins->dst[0], ssa_factor); + ins->dst[0].modifiers = VKD3DSPDM_SATURATE; + src_param_init_ssa_float(&ins->src[0], ssa_temp); + ins->src[0].modifiers = VKD3DSPSM_NEG; + break; + + case VKD3D_SHADER_FOG_FRAGMENT_EXP2: + /* We generate the following code: + * + * mul sr0, FOG_SCALE, vFOG.x + * mul sr1, sr0, sr0 + * exp_sat srFACTOR, -sr1 + */ + if (!shader_instruction_array_insert_at(&program->instructions, pos, 5)) + return VKD3D_ERROR_OUT_OF_MEMORY; + *ret_pos = pos + 5; + + ssa_temp = program->ssa_count++; + ssa_temp2 = program->ssa_count++; + + ins = &program->instructions.elements[pos]; + + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MUL, 1, 2); + dst_param_init_ssa_float(&ins->dst[0], ssa_temp); + src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); + vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1); + ins->src[1].reg.idx[0].offset = fog_signature_idx; + ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + + vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MUL, 1, 2); + dst_param_init_ssa_float(&ins->dst[0], ssa_temp2); + src_param_init_ssa_float(&ins->src[0], ssa_temp); + src_param_init_ssa_float(&ins->src[1], ssa_temp); + + vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_EXP, 1, 1); + dst_param_init_ssa_float(&ins->dst[0], ssa_factor); + ins->dst[0].modifiers = VKD3DSPDM_SATURATE; + src_param_init_ssa_float(&ins->src[0], ssa_temp2); + ins->src[0].modifiers = VKD3DSPSM_NEG; + break; + + default: + vkd3d_unreachable(); + } + + /* We generate the following code: + * + * add sr0, FRAG_COLOUR, -FOG_COLOUR + * mad oC0, sr0, srFACTOR, FOG_COLOUR + */ + + vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_ADD, 1, 2); + dst_param_init_ssa_float4(&ins->dst[0], program->ssa_count++); + src_param_init_temp_float4(&ins->src[0], colour_temp); + src_param_init_parameter_vec4(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, VKD3D_DATA_FLOAT); + ins->src[1].modifiers = VKD3DSPSM_NEG; + + vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MAD, 1, 3); + dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, colour_signature_idx, + program->output_signature.elements[colour_signature_idx].mask); + src_param_init_ssa_float4(&ins->src[0], program->ssa_count - 1); + src_param_init_ssa_float(&ins->src[1], ssa_factor); + src_param_init_parameter_vec4(&ins->src[2], VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, VKD3D_DATA_FLOAT); + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_insert_fragment_fog(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vkd3d_shader_message_context *message_context = ctx->message_context; + uint32_t colour_signature_idx, fog_signature_idx, colour_temp; + const struct vkd3d_shader_parameter1 *mode_parameter = NULL; + static const struct vkd3d_shader_location no_loc; + const struct signature_element *fog_element; + enum vkd3d_shader_fog_fragment_mode mode; + struct vkd3d_shader_instruction *ins; + size_t new_pos; + int ret; + + if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + return VKD3D_OK; + + if (!vsir_signature_find_sysval(&program->output_signature, VKD3D_SHADER_SV_TARGET, 0, &colour_signature_idx)) + return VKD3D_OK; + + if (!(mode_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE))) + return VKD3D_OK; + + if (mode_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) + { + vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Unsupported fog fragment mode parameter type %#x.", mode_parameter->type); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + if (mode_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) + { + vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid fog fragment mode parameter data type %#x.", mode_parameter->data_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + mode = mode_parameter->u.immediate_constant.u.u32; + + if (mode == VKD3D_SHADER_FOG_FRAGMENT_NONE) + return VKD3D_OK; + + /* Should have been added by vsir_program_add_fog_input(). */ + if (!(fog_element = vsir_signature_find_element_by_name(&program->input_signature, "FOG", 0))) + { + ERR("Fog input not found.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + fog_signature_idx = fog_element - program->input_signature.elements; + + /* We're going to be reading from the output, so we need to go + * through the whole shader and convert it to a temp. */ + colour_temp = program->temp_count++; + + for (size_t i = 0; i < program->instructions.count; ++i) + { + ins = &program->instructions.elements[i]; + + if (vsir_instruction_is_dcl(ins)) + continue; + + if (ins->opcode == VKD3DSIH_RET) + { + if ((ret = insert_fragment_fog_before_ret(program, ins, mode, fog_signature_idx, + colour_signature_idx, colour_temp, &new_pos, message_context)) < 0) + return ret; + i = new_pos; + continue; + } + + for (size_t j = 0; j < ins->dst_count; ++j) + { + struct vkd3d_shader_dst_param *dst = &ins->dst[j]; + + /* Note we run after I/O normalization. */ + if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == colour_signature_idx) + { + dst->reg.type = VKD3DSPR_TEMP; + dst->reg.idx[0].offset = colour_temp; + } + } + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_add_fog_output(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct shader_signature *signature = &program->output_signature; + const struct vkd3d_shader_parameter1 *source_parameter; + uint32_t register_idx = 0; + + if (!is_pre_rasterization_shader(program->shader_version.type)) + return VKD3D_OK; + + if (!(source_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE))) + return VKD3D_OK; + + if (source_parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) + { + enum vkd3d_shader_fog_source source = source_parameter->u.immediate_constant.u.u32; + + if (source == VKD3D_SHADER_FOG_SOURCE_FOG) + return VKD3D_OK; + + if (source == VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W + && !vsir_signature_find_element_by_name(signature, "COLOR", 1)) + return VKD3D_OK; + } + + if (vsir_signature_find_element_by_name(signature, "FOG", 0)) + return VKD3D_OK; + + for (unsigned int i = 0; i < signature->element_count; ++i) + register_idx = max(register_idx, signature->elements[i].register_index + 1); + + if (!add_signature_element(signature, "FOG", 0, VKD3DSP_WRITEMASK_0, register_idx, VKD3DSIM_LINEAR)) + return VKD3D_ERROR_OUT_OF_MEMORY; + return VKD3D_OK; +} + +static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *program, + const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_fog_source source, uint32_t temp, + uint32_t fog_signature_idx, uint32_t source_signature_idx, size_t *ret_pos) +{ + const struct signature_element *e = &program->output_signature.elements[source_signature_idx]; + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; + + if (!shader_instruction_array_insert_at(&program->instructions, pos, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + ins = &program->instructions.elements[pos]; + + /* Write the fog output. */ + vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); + dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, fog_signature_idx, 0x1); + src_param_init_temp_float4(&ins->src[0], temp); + if (source == VKD3D_SHADER_FOG_SOURCE_Z) + ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z); + else /* Position or specular W. */ + ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); + ++ins; + + /* Write the position or specular output. */ + vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); + dst_param_init_output(&ins->dst[0], vkd3d_data_type_from_component_type(e->component_type), + source_signature_idx, e->mask); + src_param_init_temp_float4(&ins->src[0], temp); + ++ins; + + *ret_pos = pos + 2; + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_insert_vertex_fog(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vkd3d_shader_message_context *message_context = ctx->message_context; + const struct vkd3d_shader_parameter1 *source_parameter = NULL; + uint32_t fog_signature_idx, source_signature_idx, temp; + static const struct vkd3d_shader_location no_loc; + enum vkd3d_shader_fog_source source; + const struct signature_element *e; + + if (!is_pre_rasterization_shader(program->shader_version.type)) + return VKD3D_OK; + + if (!(source_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE))) + return VKD3D_OK; + + if (source_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) + { + vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Unsupported fog source parameter type %#x.", source_parameter->type); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + if (source_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) + { + vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid fog source parameter data type %#x.", source_parameter->data_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + source = source_parameter->u.immediate_constant.u.u32; + + TRACE("Fog source %#x.\n", source); + + if (source == VKD3D_SHADER_FOG_SOURCE_FOG) + return VKD3D_OK; + + if (source == VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W) + { + if (program->has_fog || !(e = vsir_signature_find_element_by_name(&program->output_signature, "COLOR", 1))) + return VKD3D_OK; + source_signature_idx = e - program->output_signature.elements; + } + else + { + if (!vsir_signature_find_sysval(&program->output_signature, + VKD3D_SHADER_SV_POSITION, 0, &source_signature_idx)) + { + vkd3d_shader_error(ctx->message_context, &no_loc, + VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, "Shader does not write position."); + return VKD3D_ERROR_INVALID_SHADER; + } + } + + if (!(e = vsir_signature_find_element_by_name(&program->output_signature, "FOG", 0))) + { + ERR("Fog output not found.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + fog_signature_idx = e - program->output_signature.elements; + + temp = program->temp_count++; + + /* Insert a fog write before each ret, and convert either specular or + * position output to a temp. */ + for (size_t i = 0; i < program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + if (vsir_instruction_is_dcl(ins)) + continue; + + if (ins->opcode == VKD3DSIH_RET) + { + size_t new_pos; + int ret; + + if ((ret = insert_vertex_fog_before_ret(program, ins, source, temp, + fog_signature_idx, source_signature_idx, &new_pos)) < 0) + return ret; + i = new_pos; + continue; + } + + for (size_t j = 0; j < ins->dst_count; ++j) + { + struct vkd3d_shader_dst_param *dst = &ins->dst[j]; + + /* Note we run after I/O normalization. */ + if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == source_signature_idx) + { + dst->reg.type = VKD3DSPR_TEMP; + dst->reg.idx[0].offset = temp; + } + } + } + + program->has_fog = true; + + return VKD3D_OK; +} + +struct validation_context +{ + struct vkd3d_shader_message_context *message_context; + const struct vsir_program *program; + size_t instruction_idx; + struct vkd3d_shader_location null_location; + bool invalid_instruction_idx; + enum vkd3d_result status; + bool dcl_temps_found; + enum vkd3d_shader_opcode phase; + bool inside_block; + + struct validation_context_temp_data + { + enum vsir_dimension dimension; + size_t first_seen; + } *temps; + + struct validation_context_ssa_data + { + enum vsir_dimension dimension; + enum vkd3d_data_type data_type; + size_t first_seen; + uint32_t write_mask; + uint32_t read_mask; + size_t first_assigned; + } *ssas; + + enum vkd3d_shader_opcode *blocks; + size_t depth; + size_t blocks_capacity; + + unsigned int outer_tess_idxs[4]; + unsigned int inner_tess_idxs[2]; + + struct validation_context_signature_data + { + struct validation_context_signature_stream_data + { + struct validation_context_signature_register_data + { + struct validation_context_signature_component_data + { + const struct signature_element *element; + } components[VKD3D_VEC4_SIZE]; + } registers[MAX_REG_OUTPUT]; + } streams[VKD3D_MAX_STREAM_COUNT]; + } input_signature_data, output_signature_data, patch_constant_signature_data; +}; + +static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, + enum vkd3d_shader_error error, const char *format, ...) +{ + struct vkd3d_string_buffer buf; + va_list args; + + vkd3d_string_buffer_init(&buf); + + va_start(args, format); + vkd3d_string_buffer_vprintf(&buf, format, args); + va_end(args); + + if (ctx->invalid_instruction_idx) { vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); WARN("VSIR validation error: %s\n", buf.buffer); @@ -6707,76 +7256,240 @@ static void vsir_validate_register_without_indices(struct validation_context *ct reg->idx_count, reg->type); }
-static void vsir_validate_io_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) +enum vsir_signature_type +{ + SIGNATURE_TYPE_INPUT, + SIGNATURE_TYPE_OUTPUT, + SIGNATURE_TYPE_PATCH_CONSTANT, +}; + +enum vsir_io_reg_type { + REG_V, + REG_O, + REG_VPC, + REG_VICP, + REG_VOCP, + REG_COUNT, +}; + +enum vsir_phase +{ + PHASE_NONE, + PHASE_CONTROL_POINT, + PHASE_FORK, + PHASE_JOIN, + PHASE_COUNT, +}; + +struct vsir_io_register_data +{ + unsigned int flags; + enum vsir_signature_type signature_type; const struct shader_signature *signature; - bool has_control_point = false; + unsigned int control_point_count; +};
- switch (reg->type) +enum +{ + INPUT_BIT = (1u << 0), + OUTPUT_BIT = (1u << 1), + CONTROL_POINT_BIT = (1u << 2), +}; + +static const struct vsir_io_register_data vsir_sm4_io_register_data + [VKD3D_SHADER_TYPE_GRAPHICS_COUNT][PHASE_COUNT][REG_COUNT] = +{ + [VKD3D_SHADER_TYPE_PIXEL][PHASE_NONE] = + { + [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_VERTEX][PHASE_NONE] = + { + [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_GEOMETRY][PHASE_NONE] = + { + [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_HULL][PHASE_CONTROL_POINT] = + { + [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_HULL][PHASE_FORK] = + { + [REG_VICP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_VOCP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT}, + /* According to MSDN, vpc is not allowed in fork phases. However we + * don't really distinguish between fork and join phases, so we + * allow it. */ + [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + }, + [VKD3D_SHADER_TYPE_HULL][PHASE_JOIN] = + { + [REG_VICP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_VOCP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT}, + [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + }, + [VKD3D_SHADER_TYPE_DOMAIN][PHASE_NONE] = + { + [REG_VICP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, +}; + +static const struct vsir_io_register_data vsir_sm6_io_register_data + [VKD3D_SHADER_TYPE_GRAPHICS_COUNT][PHASE_COUNT][REG_COUNT] = +{ + [VKD3D_SHADER_TYPE_PIXEL][PHASE_NONE] = + { + [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_VERTEX][PHASE_NONE] = + { + [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_GEOMETRY][PHASE_NONE] = + { + [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_HULL][PHASE_CONTROL_POINT] = + { + [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_HULL][PHASE_FORK] = + { + [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT}, + [REG_VPC] = {INPUT_BIT | OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + }, + [VKD3D_SHADER_TYPE_HULL][PHASE_JOIN] = + { + [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT}, + [REG_VPC] = {INPUT_BIT | OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + }, + [VKD3D_SHADER_TYPE_DOMAIN][PHASE_NONE] = + { + [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, +}; + +static const bool vsir_get_io_register_data(struct validation_context *ctx, + enum vkd3d_shader_register_type register_type, struct vsir_io_register_data *data) +{ + const struct vsir_io_register_data (*signature_register_data) + [VKD3D_SHADER_TYPE_GRAPHICS_COUNT][PHASE_COUNT][REG_COUNT]; + enum vsir_io_reg_type io_reg_type; + enum vsir_phase phase; + + if (ctx->program->shader_version.type >= ARRAY_SIZE(*signature_register_data)) + return NULL; + + if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6) + signature_register_data = &vsir_sm6_io_register_data; + else + signature_register_data = &vsir_sm4_io_register_data; + + switch (register_type) { - case VKD3DSPR_INPUT: - signature = &ctx->program->input_signature; + case VKD3DSPR_INPUT: io_reg_type = REG_V; break; + case VKD3DSPR_OUTPUT: io_reg_type = REG_O; break; + case VKD3DSPR_INCONTROLPOINT: io_reg_type = REG_VICP; break; + case VKD3DSPR_OUTCONTROLPOINT: io_reg_type = REG_VOCP; break; + case VKD3DSPR_PATCHCONST: io_reg_type = REG_VPC; break;
- switch (ctx->program->shader_version.type) - { - case VKD3D_SHADER_TYPE_GEOMETRY: - case VKD3D_SHADER_TYPE_HULL: - case VKD3D_SHADER_TYPE_DOMAIN: - has_control_point = true; - break; + default: + return NULL; + }
- default: - break; - } - break; + switch (ctx->phase) + { + case VKD3DSIH_HS_CONTROL_POINT_PHASE: phase = PHASE_CONTROL_POINT; break; + case VKD3DSIH_HS_FORK_PHASE: phase = PHASE_FORK; break; + case VKD3DSIH_HS_JOIN_PHASE: phase = PHASE_JOIN; break; + case VKD3DSIH_INVALID: phase = PHASE_NONE; break;
- case VKD3DSPR_OUTPUT: - switch (ctx->program->shader_version.type) - { - case VKD3D_SHADER_TYPE_HULL: - if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE - || ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) - { - signature = &ctx->program->output_signature; - has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO; - } - else - { - signature = &ctx->program->patch_constant_signature; - } - break; + default: + vkd3d_unreachable(); + }
- default: - signature = &ctx->program->output_signature; - break; - } - break; + *data = (*signature_register_data)[ctx->program->shader_version.type][phase][io_reg_type];
- case VKD3DSPR_INCONTROLPOINT: - signature = &ctx->program->input_signature; - has_control_point = true; - break; + if (!(data->flags & (INPUT_BIT | OUTPUT_BIT))) + return false;
- case VKD3DSPR_OUTCONTROLPOINT: - signature = &ctx->program->output_signature; - has_control_point = true; - break; + /* VSIR_NORMALISED_HULL_CONTROL_POINT_IO differs from VSIR_NORMALISED_SM4 + * for just a single flag. So we don't keep a whole copy of it, but just + * patch SM4 when needed. */ + if (ctx->program->normalisation_level == VSIR_NORMALISED_HULL_CONTROL_POINT_IO + && ctx->program->shader_version.type == VKD3D_SHADER_TYPE_HULL + && phase == PHASE_CONTROL_POINT && io_reg_type == REG_O) + { + VKD3D_ASSERT(!(data->flags & CONTROL_POINT_BIT)); + data->flags |= CONTROL_POINT_BIT; + }
- case VKD3DSPR_PATCHCONST: - signature = &ctx->program->patch_constant_signature; - break; + switch (data->signature_type) + { + case SIGNATURE_TYPE_INPUT: + data->signature = &ctx->program->input_signature; + data->control_point_count = ctx->program->input_control_point_count; + return true; + + case SIGNATURE_TYPE_OUTPUT: + data->signature = &ctx->program->output_signature; + data->control_point_count = ctx->program->output_control_point_count; + return true; + + case SIGNATURE_TYPE_PATCH_CONSTANT: + data->signature = &ctx->program->patch_constant_signature; + return true;
default: vkd3d_unreachable(); } +}
- if (ctx->program->normalisation_level < VSIR_FULLY_NORMALISED_IO) +static void vsir_validate_io_register(struct validation_context *ctx, const struct vkd3d_shader_register *reg) +{ + unsigned int control_point_index, control_point_count; + const struct shader_signature *signature; + struct vsir_io_register_data io_reg_data; + bool has_control_point; + + if (!vsir_get_io_register_data(ctx, reg->type, &io_reg_data)) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid usage of register type %#x.", reg->type); + return; + } + + signature = io_reg_data.signature; + has_control_point = io_reg_data.flags & CONTROL_POINT_BIT; + control_point_count = io_reg_data.control_point_count; + + if (ctx->program->normalisation_level < VSIR_NORMALISED_SM6) { /* Indices are [register] or [control point, register]. Both are * allowed to have a relative address. */ unsigned int expected_idx_count = 1 + !!has_control_point;
+ control_point_index = 0; + if (reg->idx_count != expected_idx_count) { validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, @@ -6795,7 +7508,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, /* If the signature element is not an array, indices are * [signature] or [control point, signature]. If the signature * element is an array, indices are [array, signature] or - * [control point, array, signature]. In any case `signature' is + * [array, control point, signature]. In any case `signature' is * not allowed to have a relative address, while the others are. */ if (reg->idx_count < 1) @@ -6829,6 +7542,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, is_array = true;
expected_idx_count = 1 + !!has_control_point + !!is_array; + control_point_index = !!is_array;
if (reg->idx_count != expected_idx_count) { @@ -6837,7 +7551,18 @@ static void vsir_validate_io_register(struct validation_context *ctx, reg->idx_count, reg->type); return; } + + if (is_array && !reg->idx[0].rel_addr && reg->idx[0].offset >= element->register_count) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Array index %u exceeds the signature element register count %u in a register of type %#x.", + reg->idx[0].offset, element->register_count, reg->type); } + + if (has_control_point && !reg->idx[control_point_index].rel_addr + && reg->idx[control_point_index].offset >= control_point_count) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Control point index %u exceeds the control point count %u in a register of type %#x.", + reg->idx[control_point_index].offset, control_point_count, reg->type); }
static void vsir_validate_temp_register(struct validation_context *ctx, @@ -7143,8 +7868,26 @@ static void vsir_validate_register(struct validation_context *ctx, for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i) { const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr; - if (reg->idx[i].rel_addr) + if (param) + { vsir_validate_src_param(ctx, param); + + switch (param->reg.type) + { + case VKD3DSPR_TEMP: + case VKD3DSPR_SSA: + case VKD3DSPR_ADDR: + case VKD3DSPR_LOOP: + case VKD3DSPR_OUTPOINTID: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x for a relative address parameter.", + param->reg.type); + break; + } + } }
switch (reg->type) @@ -7185,6 +7928,10 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_register_without_indices(ctx, reg); break;
+ case VKD3DSPR_PRIMID: + vsir_validate_register_without_indices(ctx, reg); + break; + case VKD3DSPR_NULL: vsir_validate_register_without_indices(ctx, reg); break; @@ -7201,6 +7948,18 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_uav_register(ctx, reg); break;
+ case VKD3DSPR_OUTPOINTID: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_FORKINSTID: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_JOININSTID: + vsir_validate_register_without_indices(ctx, reg); + break; + case VKD3DSPR_INCONTROLPOINT: vsir_validate_io_register(ctx, reg); break; @@ -7213,6 +7972,38 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_io_register(ctx, reg); break;
+ case VKD3DSPR_TESSCOORD: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_THREADID: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_THREADGROUPID: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_LOCALTHREADID: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_LOCALTHREADINDEX: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_COVERAGE: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_SAMPLEMASK: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_GSINSTID: + vsir_validate_register_without_indices(ctx, reg); + break; + case VKD3DSPR_DEPTHOUTGE: vsir_validate_register_without_indices(ctx, reg); break; @@ -7221,15 +8012,37 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_register_without_indices(ctx, reg); break;
+ case VKD3DSPR_OUTSTENCILREF: + vsir_validate_register_without_indices(ctx, reg); + break; + case VKD3DSPR_SSA: vsir_validate_ssa_register(ctx, reg); break;
+ case VKD3DSPR_WAVELANECOUNT: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_WAVELANEINDEX: + vsir_validate_register_without_indices(ctx, reg); + break; + default: break; } }
+static void vsir_validate_io_dst_param(struct validation_context *ctx, + const struct vkd3d_shader_dst_param *dst) +{ + struct vsir_io_register_data io_reg_data; + + if (!vsir_get_io_register_data(ctx, dst->reg.type, &io_reg_data) || !(io_reg_data.flags & OUTPUT_BIT)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x used as destination parameter.", dst->reg.type); +} + static void vsir_validate_dst_param(struct validation_context *ctx, const struct vkd3d_shader_dst_param *dst) { @@ -7304,15 +8117,28 @@ static void vsir_validate_dst_param(struct validation_context *ctx, case VKD3DSPR_IMMCONST64: case VKD3DSPR_SAMPLER: case VKD3DSPR_RESOURCE: - case VKD3DSPR_INPUT: validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid %#x register used as destination parameter.", dst->reg.type); break;
+ case VKD3DSPR_INPUT: + vsir_validate_io_dst_param(ctx, dst); + break; + + case VKD3DSPR_OUTPUT: + vsir_validate_io_dst_param(ctx, dst); + break; + + case VKD3DSPR_INCONTROLPOINT: + vsir_validate_io_dst_param(ctx, dst); + break; + + case VKD3DSPR_OUTCONTROLPOINT: + vsir_validate_io_dst_param(ctx, dst); + break; + case VKD3DSPR_PATCHCONST: - if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "PATCHCONST register used as destination parameters are only allowed in Hull Shaders."); + vsir_validate_io_dst_param(ctx, dst); break;
default: @@ -7320,6 +8146,16 @@ static void vsir_validate_dst_param(struct validation_context *ctx, } }
+static void vsir_validate_io_src_param(struct validation_context *ctx, + const struct vkd3d_shader_src_param *src) +{ + struct vsir_io_register_data io_reg_data; + + if (!vsir_get_io_register_data(ctx, src->reg.type, &io_reg_data) || !(io_reg_data.flags & INPUT_BIT)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x used as source parameter.", src->reg.type); +} + static void vsir_validate_src_param(struct validation_context *ctx, const struct vkd3d_shader_src_param *src) { @@ -7355,18 +8191,24 @@ static void vsir_validate_src_param(struct validation_context *ctx, "Invalid NULL register used as source parameter."); break;
+ case VKD3DSPR_INPUT: + vsir_validate_io_src_param(ctx, src); + break; + case VKD3DSPR_OUTPUT: - if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL - || (ctx->phase != VKD3DSIH_HS_FORK_PHASE && ctx->phase != VKD3DSIH_HS_JOIN_PHASE)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid OUTPUT register used as source parameter."); + vsir_validate_io_src_param(ctx, src); + break; + + case VKD3DSPR_INCONTROLPOINT: + vsir_validate_io_src_param(ctx, src); + break; + + case VKD3DSPR_OUTCONTROLPOINT: + vsir_validate_io_src_param(ctx, src); break;
case VKD3DSPR_PATCHCONST: - if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN - && ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "PATCHCONST register used as source parameters are only allowed in Hull and Domain Shaders."); + vsir_validate_io_src_param(ctx, src); break;
default: @@ -7420,13 +8262,6 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, return true; }
-enum vsir_signature_type -{ - SIGNATURE_TYPE_INPUT, - SIGNATURE_TYPE_OUTPUT, - SIGNATURE_TYPE_PATCH_CONSTANT, -}; - static const char * const signature_type_names[] = { [SIGNATURE_TYPE_INPUT] = "input", @@ -7466,17 +8301,37 @@ sysval_validation_data[] = };
static void vsir_validate_signature_element(struct validation_context *ctx, - const struct shader_signature *signature, enum vsir_signature_type signature_type, - unsigned int idx) + const struct shader_signature *signature, struct validation_context_signature_data *signature_data, + enum vsir_signature_type signature_type, unsigned int idx) { + enum vkd3d_tessellator_domain expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_INVALID; + bool integer_type = false, is_outer = false, is_gs_output, require_index = true; const char *signature_type_name = signature_type_names[signature_type]; const struct signature_element *element = &signature->elements[idx]; - bool integer_type = false; + unsigned int semantic_index_max = 0, i, j;
if (element->register_count == 0) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "element %u of %s signature: Invalid zero register count.", idx, signature_type_name);
+ if (ctx->program->normalisation_level < VSIR_NORMALISED_SM6 && element->register_count != 1) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid register count %u.", idx, signature_type_name, + element->register_count); + + if (element->register_index != UINT_MAX && (element->register_index >= MAX_REG_OUTPUT + || MAX_REG_OUTPUT - element->register_index < element->register_count)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid register index %u and count %u.", + idx, signature_type_name, element->register_index, element->register_count); + + is_gs_output = ctx->program->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY + && signature_type == SIGNATURE_TYPE_OUTPUT; + if (element->stream_index >= VKD3D_MAX_STREAM_COUNT || (element->stream_index != 0 && !is_gs_output)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid stream index %u.", + idx, signature_type_name, element->stream_index); + if (element->mask == 0 || (element->mask & ~0xf)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "element %u of %s signature: Invalid mask %#x.", idx, signature_type_name, element->mask); @@ -7486,33 +8341,27 @@ static void vsir_validate_signature_element(struct validation_context *ctx, "element %u of %s signature: Non-contiguous mask %#x.", idx, signature_type_name, element->mask);
- /* Here we'd likely want to validate that the usage mask is a subset of the - * signature mask. Unfortunately the D3DBC parser sometimes violates this. - * For example I've seen a shader like this: - * ps_3_0 - * [...] - * dcl_texcoord0 v0 - * [...] - * texld r2.xyzw, v0.xyzw, s1.xyzw - * [...] - * - * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to - * compute the signature mask, but the texld instruction apparently uses all - * the components. Of course the last two components are ignored, but - * formally they seem to be used. So we end up with a signature element with - * mask .xy and usage mask .xyzw. - * - * The correct fix would probably be to make the D3DBC parser aware of which - * components are really used for each instruction, but that would take some - * time. */ - if (element->used_mask & ~0xf) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "element %u of %s signature: Invalid usage mask %#x.", - idx, signature_type_name, element->used_mask); + if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM4) + { + if ((element->used_mask & element->mask) != element->used_mask) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid usage mask %#x with mask %#x.", + idx, signature_type_name, element->used_mask, element->mask); + } + else + { + if (element->used_mask & ~0xf) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid usage mask %#x.", + idx, signature_type_name, element->used_mask); + }
switch (element->sysval_semantic) { case VKD3D_SHADER_SV_NONE: + case VKD3D_SHADER_SV_TARGET: + break; + case VKD3D_SHADER_SV_POSITION: case VKD3D_SHADER_SV_CLIP_DISTANCE: case VKD3D_SHADER_SV_CULL_DISTANCE: @@ -7523,18 +8372,43 @@ static void vsir_validate_signature_element(struct validation_context *ctx, case VKD3D_SHADER_SV_INSTANCE_ID: case VKD3D_SHADER_SV_IS_FRONT_FACE: case VKD3D_SHADER_SV_SAMPLE_INDEX: + case VKD3D_SHADER_SV_DEPTH: + case VKD3D_SHADER_SV_COVERAGE: + case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: + case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: + case VKD3D_SHADER_SV_STENCIL_REF: + require_index = false; + break; + case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: + expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; + semantic_index_max = 4; + is_outer = true; + break; + case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: + expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; + semantic_index_max = 2; + is_outer = false; + break; + case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: + expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; + semantic_index_max = 3; + is_outer = true; + break; + case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: + expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; + semantic_index_max = 1; + is_outer = false; + break; + case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: - case VKD3D_SHADER_SV_TARGET: - case VKD3D_SHADER_SV_DEPTH: - case VKD3D_SHADER_SV_COVERAGE: - case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: - case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: - case VKD3D_SHADER_SV_STENCIL_REF: + expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_LINE; + semantic_index_max = 2; + is_outer = true; break;
default: @@ -7544,6 +8418,43 @@ static void vsir_validate_signature_element(struct validation_context *ctx, break; }
+ if (require_index && element->register_index == UINT_MAX) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: System value semantic %#x requires a register index.", + idx, signature_type_name, element->sysval_semantic); + + if (expected_tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) + { + if (signature_type != SIGNATURE_TYPE_PATCH_CONSTANT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: System value semantic %#x is only valid " + "in the patch constant signature.", + idx, signature_type_name, element->sysval_semantic); + + if (ctx->program->tess_domain != expected_tess_domain) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid system value semantic %#x for tessellator domain %#x.", + idx, signature_type_name, element->sysval_semantic, ctx->program->tess_domain); + + if (element->semantic_index >= semantic_index_max) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid semantic index %u for system value semantic %#x.", + idx, signature_type_name, element->semantic_index, element->sysval_semantic); + } + else + { + unsigned int *idx_pos = &(is_outer ? ctx->outer_tess_idxs : ctx->inner_tess_idxs)[element->semantic_index]; + + if (*idx_pos != ~0u) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Duplicate semantic index %u for system value semantic %#x.", + idx, signature_type_name, element->semantic_index, element->sysval_semantic); + else + *idx_pos = idx; + } + } + if (element->sysval_semantic < ARRAY_SIZE(sysval_validation_data)) { const struct sysval_validation_data_element *data = &sysval_validation_data[element->sysval_semantic]; @@ -7622,6 +8533,31 @@ static void vsir_validate_signature_element(struct validation_context *ctx, validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "element %u of %s signature: Invalid interpolation mode %#x for integer component type.", idx, signature_type_name, element->interpolation_mode); + + if (element->stream_index >= VKD3D_MAX_STREAM_COUNT || !require_index) + return; + + for (i = element->register_index; i < MAX_REG_OUTPUT + && i - element->register_index < element->register_count; ++i) + { + struct validation_context_signature_stream_data *stream_data = &signature_data->streams[element->stream_index]; + struct validation_context_signature_register_data *register_data = &stream_data->registers[i]; + + for (j = 0; j < VKD3D_VEC4_SIZE; ++j) + { + struct validation_context_signature_component_data *component_data = ®ister_data->components[j]; + + if (!(element->mask & (1u << j))) + continue; + + if (!component_data->element) + component_data->element = element; + else + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Conflict with element %zu.", + idx, signature_type_name, component_data->element - signature->elements); + } + } }
static const unsigned int allowed_signature_phases[] = @@ -7631,8 +8567,8 @@ static const unsigned int allowed_signature_phases[] = [SIGNATURE_TYPE_PATCH_CONSTANT] = HS_BIT | DS_BIT, };
-static void vsir_validate_signature(struct validation_context *ctx, - const struct shader_signature *signature, enum vsir_signature_type signature_type) +static void vsir_validate_signature(struct validation_context *ctx, const struct shader_signature *signature, + struct validation_context_signature_data *signature_data, enum vsir_signature_type signature_type) { unsigned int i;
@@ -7642,7 +8578,110 @@ static void vsir_validate_signature(struct validation_context *ctx, "Unexpected %s signature.", signature_type_names[signature_type]);
for (i = 0; i < signature->element_count; ++i) - vsir_validate_signature_element(ctx, signature, signature_type, i); + vsir_validate_signature_element(ctx, signature, signature_data, signature_type, i); + + if (signature_type == SIGNATURE_TYPE_PATCH_CONSTANT) + { + const struct signature_element *first_element, *element; + unsigned int expected_outer_count = 0; + unsigned int expected_inner_count = 0; + + switch (ctx->program->tess_domain) + { + case VKD3D_TESSELLATOR_DOMAIN_QUAD: + expected_outer_count = 4; + expected_inner_count = 2; + break; + + case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: + expected_outer_count = 3; + expected_inner_count = 1; + break; + + case VKD3D_TESSELLATOR_DOMAIN_LINE: + expected_outer_count = 2; + expected_inner_count = 0; + break; + + default: + break; + } + + /* After I/O normalisation tessellation factors are merged in a single array. */ + if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6) + { + expected_outer_count = min(1, expected_outer_count); + expected_inner_count = min(1, expected_inner_count); + } + + first_element = NULL; + for (i = 0; i < expected_outer_count; ++i) + { + if (ctx->outer_tess_idxs[i] == ~0u) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Missing outer system value semantic %u.", i); + } + else + { + element = &signature->elements[ctx->outer_tess_idxs[i]]; + + if (!first_element) + { + first_element = element; + continue; + } + + if (element->register_index != first_element->register_index + i) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Invalid register index %u for outer system value semantic %u, expected %u.", + element->register_index, i, first_element->register_index + i); + } + + if (element->mask != first_element->mask) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Invalid mask %#x for outer system value semantic %u, expected %#x.", + element->mask, i, first_element->mask); + } + } + } + + first_element = NULL; + for (i = 0; i < expected_inner_count; ++i) + { + if (ctx->inner_tess_idxs[i] == ~0u) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Missing inner system value semantic %u.", i); + } + else + { + element = &signature->elements[ctx->inner_tess_idxs[i]]; + + if (!first_element) + { + first_element = element; + continue; + } + + if (element->register_index != first_element->register_index + i) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Invalid register index %u for inner system value semantic %u, expected %u.", + element->register_index, i, first_element->register_index + i); + } + + if (element->mask != first_element->mask) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Invalid mask %#x for inner system value semantic %u, expected %#x.", + element->mask, i, first_element->mask); + } + } + } + } }
static const char *name_from_cf_type(enum vsir_control_flow_type type) @@ -7754,6 +8793,206 @@ static void vsir_validate_dcl_hs_max_tessfactor(struct validation_context *ctx, instruction->declaration.max_tessellation_factor); }
+static void vsir_validate_dcl_index_range(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + unsigned int i, j, base_register_idx, effective_write_mask = 0, control_point_count, first_component = UINT_MAX; + const struct vkd3d_shader_index_range *range = &instruction->declaration.index_range; + enum vkd3d_shader_sysval_semantic sysval = ~0u; + const struct shader_signature *signature; + struct vsir_io_register_data io_reg_data; + bool has_control_point; + + if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, + "DCL_INDEX_RANGE is not allowed with fully normalised input/output."); + return; + } + + if (range->dst.modifiers != VKD3DSPDM_NONE) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, + "Invalid modifier %#x on a DCL_INDEX_RANGE destination parameter.", range->dst.modifiers); + + if (range->dst.shift != 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, + "Invalid shift %u on a DCL_INDEX_RANGE destination parameter.", range->dst.shift); + + if (!vsir_get_io_register_data(ctx, range->dst.reg.type, &io_reg_data)) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in DCL_INDEX_RANGE instruction.", + range->dst.reg.type); + return; + } + + signature = io_reg_data.signature; + has_control_point = io_reg_data.flags & CONTROL_POINT_BIT; + control_point_count = io_reg_data.control_point_count; + + if (range->dst.reg.idx_count != 1 + !!has_control_point) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u in DCL_INDEX_RANGE instruction.", + range->dst.reg.idx_count); + return; + } + + if (range->dst.reg.idx[0].rel_addr || (has_control_point && range->dst.reg.idx[1].rel_addr)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Invalid relative address in DCL_INDEX_RANGE instruction."); + + if (has_control_point) + { + if (range->dst.reg.idx[0].offset != control_point_count) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Invalid control point index %u in DCL_INDEX_RANGE instruction, expected %u.", + range->dst.reg.idx[0].offset, control_point_count); + } + + base_register_idx = range->dst.reg.idx[1].offset; + } + else + { + base_register_idx = range->dst.reg.idx[0].offset; + } + + if (range->register_count < 2) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_RANGE, + "Invalid register count %u in DCL_INDEX_RANGE instruction, expected at least 2.", + range->register_count); + return; + } + + /* Check that for each register in the range the write mask intersects at + * most one (and possibly zero) signature elements. Keep track of the union + * of all signature element masks. */ + for (i = 0; i < range->register_count; ++i) + { + bool found = false; + + for (j = 0; j < signature->element_count; ++j) + { + const struct signature_element *element = &signature->elements[j]; + + if (base_register_idx + i != element->register_index || !(range->dst.write_mask & element->mask)) + continue; + + if (found) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Invalid write mask %#x on a DCL_INDEX_RANGE destination parameter.", + range->dst.write_mask); + + found = true; + + if (first_component == UINT_MAX) + first_component = vsir_write_mask_get_component_idx(element->mask); + else if (first_component != vsir_write_mask_get_component_idx(element->mask)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Signature masks are not left-aligned within a DCL_INDEX_RANGE."); + + effective_write_mask |= element->mask; + } + } + + /* Check again to have at most one intersection for each register, but this + * time using the effective write mask. Also check that we have stabilized, + * i.e., the effective write mask now contains all the signature element + * masks. This important for being able to merge all the signature elements + * in a single one without conflicts (there is no hard reason why we + * couldn't support an effective write mask that stabilizes after more + * iterations, but the code would be more complicated, and we avoid that if + * we can). */ + for (i = 0; i < range->register_count; ++i) + { + bool found = false; + + for (j = 0; j < signature->element_count; ++j) + { + const struct signature_element *element = &signature->elements[j]; + + if (base_register_idx + i != element->register_index || !(effective_write_mask & element->mask)) + continue; + + if (element->sysval_semantic != VKD3D_SHADER_SV_NONE + && !vsir_sysval_semantic_is_tess_factor(element->sysval_semantic)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Invalid sysval semantic %#x on a signature element touched by DCL_INDEX_RANGE.", + element->sysval_semantic); + + if (sysval == ~0u) + { + sysval = element->sysval_semantic; + /* Line density and line detail can be arrayed together. */ + if (sysval == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN) + sysval = VKD3D_SHADER_SV_TESS_FACTOR_LINEDET; + } + else + { + if (sysval != element->sysval_semantic) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Inconsistent sysval semantic %#x on a signature element touched by DCL_INDEX_RANGE, " + "%#x was already seen.", + element->sysval_semantic, sysval); + } + + if (found) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Invalid write mask %#x on a DCL_INDEX_RANGE destination parameter.", + range->dst.write_mask); + + found = true; + + if (~effective_write_mask & element->mask) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Invalid write mask %#x on a signature element touched by a " + "DCL_INDEX_RANGE instruction with effective write mask %#x.", + element->mask, effective_write_mask); + + if (first_component != vsir_write_mask_get_component_idx(element->mask)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Signature element masks are not left-aligned within a DCL_INDEX_RANGE."); + } + } + + VKD3D_ASSERT(sysval != ~0u); +} + +static void vsir_validate_dcl_input(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.dst.reg.type) + { + /* Signature input registers. */ + case VKD3DSPR_INPUT: + case VKD3DSPR_INCONTROLPOINT: + case VKD3DSPR_OUTCONTROLPOINT: + case VKD3DSPR_PATCHCONST: + /* Non-signature input registers. */ + case VKD3DSPR_PRIMID: + case VKD3DSPR_FORKINSTID: + case VKD3DSPR_JOININSTID: + case VKD3DSPR_THREADID: + case VKD3DSPR_THREADGROUPID: + case VKD3DSPR_LOCALTHREADID: + case VKD3DSPR_LOCALTHREADINDEX: + case VKD3DSPR_COVERAGE: + case VKD3DSPR_TESSCOORD: + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_GSINSTID: + case VKD3DSPR_WAVELANECOUNT: + case VKD3DSPR_WAVELANEINDEX: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT.", + instruction->declaration.dst.reg.type); + } +} + static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { @@ -7763,6 +9002,105 @@ static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, instruction->declaration.primitive_type.type); }
+static void vsir_validate_dcl_input_ps(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.dst.reg.type) + { + case VKD3DSPR_INPUT: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT_PS.", + instruction->declaration.dst.reg.type); + } +} + +static void vsir_validate_dcl_input_ps_sgv(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.register_semantic.reg.reg.type) + { + case VKD3DSPR_INPUT: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT_PS_SGV.", + instruction->declaration.register_semantic.reg.reg.type); + } +} + +static void vsir_validate_dcl_input_ps_siv(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.register_semantic.reg.reg.type) + { + case VKD3DSPR_INPUT: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT_PS_SIV.", + instruction->declaration.register_semantic.reg.reg.type); + } +} + +static void vsir_validate_dcl_input_sgv(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.register_semantic.reg.reg.type) + { + case VKD3DSPR_INPUT: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT_SGV.", + instruction->declaration.register_semantic.reg.reg.type); + } +} + +static void vsir_validate_dcl_input_siv(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.register_semantic.reg.reg.type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_PATCHCONST: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT_SIV.", + instruction->declaration.register_semantic.reg.reg.type); + } +} + +static void vsir_validate_dcl_output(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.dst.reg.type) + { + /* Signature output registers. */ + case VKD3DSPR_OUTPUT: + case VKD3DSPR_PATCHCONST: + /* Non-signature output registers. */ + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + case VKD3DSPR_OUTSTENCILREF: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_OUTPUT.", + instruction->declaration.dst.reg.type); + } +} + static void vsir_validate_dcl_output_control_point_count(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { @@ -7772,6 +9110,22 @@ static void vsir_validate_dcl_output_control_point_count(struct validation_conte instruction->declaration.count); }
+static void vsir_validate_dcl_output_siv(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.register_semantic.reg.reg.type) + { + case VKD3DSPR_OUTPUT: + case VKD3DSPR_PATCHCONST: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_OUTPUT_SIV.", + instruction->declaration.register_semantic.reg.reg.type); + } +} + static void vsir_validate_dcl_output_topology(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { @@ -7801,6 +9155,11 @@ static void vsir_validate_dcl_tessellator_domain(struct validation_context *ctx, || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); + + if (instruction->declaration.tessellator_domain != ctx->program->tess_domain) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "DCL_TESSELLATOR_DOMAIN argument %#x doesn't match the shader tessellator domain %#x.", + instruction->declaration.tessellator_domain, ctx->program->tess_domain); }
static void vsir_validate_dcl_tessellator_output_primitive(struct validation_context *ctx, @@ -8063,8 +9422,17 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ [VKD3DSIH_HS_JOIN_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, [VKD3DSIH_DCL_GS_INSTANCES] = {0, 0, vsir_validate_dcl_gs_instances}, [VKD3DSIH_DCL_HS_MAX_TESSFACTOR] = {0, 0, vsir_validate_dcl_hs_max_tessfactor}, + [VKD3DSIH_DCL_INDEX_RANGE] = {0, 0, vsir_validate_dcl_index_range}, + [VKD3DSIH_DCL_INPUT] = {0, 0, vsir_validate_dcl_input}, [VKD3DSIH_DCL_INPUT_PRIMITIVE] = {0, 0, vsir_validate_dcl_input_primitive}, + [VKD3DSIH_DCL_INPUT_PS] = {0, 0, vsir_validate_dcl_input_ps}, + [VKD3DSIH_DCL_INPUT_PS_SGV] = {0, 0, vsir_validate_dcl_input_ps_sgv}, + [VKD3DSIH_DCL_INPUT_PS_SIV] = {0, 0, vsir_validate_dcl_input_ps_siv}, + [VKD3DSIH_DCL_INPUT_SGV] = {0, 0, vsir_validate_dcl_input_sgv}, + [VKD3DSIH_DCL_INPUT_SIV] = {0, 0, vsir_validate_dcl_input_siv}, + [VKD3DSIH_DCL_OUTPUT] = {0, 0, vsir_validate_dcl_output}, [VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT] = {0, 0, vsir_validate_dcl_output_control_point_count}, + [VKD3DSIH_DCL_OUTPUT_SIV] = {0, 0, vsir_validate_dcl_output_siv}, [VKD3DSIH_DCL_OUTPUT_TOPOLOGY] = {0, 0, vsir_validate_dcl_output_topology}, [VKD3DSIH_DCL_TEMPS] = {0, 0, vsir_validate_dcl_temps}, [VKD3DSIH_DCL_TESSELLATOR_DOMAIN] = {0, 0, vsir_validate_dcl_tessellator_domain}, @@ -8177,6 +9545,12 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c .status = VKD3D_OK, .phase = VKD3DSIH_INVALID, .invalid_instruction_idx = true, + .outer_tess_idxs[0] = ~0u, + .outer_tess_idxs[1] = ~0u, + .outer_tess_idxs[2] = ~0u, + .outer_tess_idxs[3] = ~0u, + .inner_tess_idxs[0] = ~0u, + .inner_tess_idxs[1] = ~0u, }; unsigned int i;
@@ -8187,12 +9561,20 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c { case VKD3D_SHADER_TYPE_HULL: case VKD3D_SHADER_TYPE_DOMAIN: + if (program->tess_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID + || program->tess_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Invalid tessellation domain %#x.", program->tess_domain); break;
default: if (program->patch_constant_signature.element_count != 0) validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "Patch constant signature is only valid for hull and domain shaders."); + + if (program->tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Invalid tessellation domain %#x.", program->tess_domain); }
switch (program->shader_version.type) @@ -8226,9 +9608,47 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c program->output_control_point_count); }
- vsir_validate_signature(&ctx, &program->input_signature, SIGNATURE_TYPE_INPUT); - vsir_validate_signature(&ctx, &program->output_signature, SIGNATURE_TYPE_OUTPUT); - vsir_validate_signature(&ctx, &program->patch_constant_signature, SIGNATURE_TYPE_PATCH_CONSTANT); + vsir_validate_signature(&ctx, &program->input_signature, + &ctx.input_signature_data, SIGNATURE_TYPE_INPUT); + vsir_validate_signature(&ctx, &program->output_signature, + &ctx.output_signature_data, SIGNATURE_TYPE_OUTPUT); + vsir_validate_signature(&ctx, &program->patch_constant_signature, + &ctx.patch_constant_signature_data, SIGNATURE_TYPE_PATCH_CONSTANT); + + for (i = 0; i < sizeof(program->io_dcls) * CHAR_BIT; ++i) + { + if (!bitmap_is_set(program->io_dcls, i)) + continue; + + switch (i) + { + /* Input registers */ + case VKD3DSPR_PRIMID: + case VKD3DSPR_FORKINSTID: + case VKD3DSPR_JOININSTID: + case VKD3DSPR_THREADID: + case VKD3DSPR_THREADGROUPID: + case VKD3DSPR_LOCALTHREADID: + case VKD3DSPR_LOCALTHREADINDEX: + case VKD3DSPR_COVERAGE: + case VKD3DSPR_TESSCOORD: + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_GSINSTID: + case VKD3DSPR_WAVELANECOUNT: + case VKD3DSPR_WAVELANEINDEX: + /* Output registers */ + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + case VKD3DSPR_OUTSTENCILREF: + break; + + default: + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Invalid input/output declaration %u.", i); + } + }
if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) goto fail; @@ -8318,6 +9738,12 @@ enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uin if (program->shader_version.major <= 2) vsir_transform(&ctx, vsir_program_add_diffuse_output);
+ /* For vsir_program_insert_fragment_fog(). */ + vsir_transform(&ctx, vsir_program_add_fog_input); + + /* For vsir_program_insert_vertex_fog(). */ + vsir_transform(&ctx, vsir_program_add_fog_output); + return ctx.result; }
@@ -8372,6 +9798,8 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t vsir_transform(&ctx, vsir_program_insert_point_size); vsir_transform(&ctx, vsir_program_insert_point_size_clamp); vsir_transform(&ctx, vsir_program_insert_point_coord); + vsir_transform(&ctx, vsir_program_insert_fragment_fog); + vsir_transform(&ctx, vsir_program_insert_vertex_fog);
if (TRACE_ON()) vsir_program_trace(program); diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c index df3edeaa4e6..e783128e236 100644 --- a/libs/vkd3d/libs/vkd3d-shader/msl.c +++ b/libs/vkd3d/libs/vkd3d-shader/msl.c @@ -41,6 +41,8 @@ struct msl_generator const char *prefix; bool failed;
+ bool write_depth; + const struct vkd3d_shader_interface_info *interface_info; const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; }; @@ -153,6 +155,71 @@ static void msl_print_register_name(struct vkd3d_string_buffer *buffer, msl_print_register_datatype(buffer, gen, reg->data_type); break;
+ case VKD3DSPR_DEPTHOUT: + if (gen->program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled depth output in shader type #%x.", + gen->program->shader_version.type); + vkd3d_string_buffer_printf(buffer, "o_depth"); + break; + + case VKD3DSPR_IMMCONST: + switch (reg->dimension) + { + case VSIR_DIMENSION_SCALAR: + switch (reg->data_type) + { + case VKD3D_DATA_INT: + vkd3d_string_buffer_printf(buffer, "as_type<int>(%#xu)", reg->u.immconst_u32[0]); + break; + case VKD3D_DATA_UINT: + vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); + break; + case VKD3D_DATA_FLOAT: + vkd3d_string_buffer_printf(buffer, "as_type<float>(%#xu)", reg->u.immconst_u32[0]); + break; + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled immconst datatype %#x.", reg->data_type); + vkd3d_string_buffer_printf(buffer, "<unrecognised immconst datatype %#x>", reg->data_type); + break; + } + break; + + case VSIR_DIMENSION_VEC4: + switch (reg->data_type) + { + case VKD3D_DATA_INT: + vkd3d_string_buffer_printf(buffer, "as_type<int4>(uint4(%#xu, %#xu, %#xu, %#xu))", + reg->u.immconst_u32[0], reg->u.immconst_u32[1], + reg->u.immconst_u32[2], reg->u.immconst_u32[3]); + break; + case VKD3D_DATA_UINT: + vkd3d_string_buffer_printf(buffer, "uint4(%#xu, %#xu, %#xu, %#xu)", + reg->u.immconst_u32[0], reg->u.immconst_u32[1], + reg->u.immconst_u32[2], reg->u.immconst_u32[3]); + break; + case VKD3D_DATA_FLOAT: + vkd3d_string_buffer_printf(buffer, "as_type<float4>(uint4(%#xu, %#xu, %#xu, %#xu))", + reg->u.immconst_u32[0], reg->u.immconst_u32[1], + reg->u.immconst_u32[2], reg->u.immconst_u32[3]); + break; + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled immconst datatype %#x.", reg->data_type); + vkd3d_string_buffer_printf(buffer, "<unrecognised immconst datatype %#x>", reg->data_type); + break; + } + break; + + default: + vkd3d_string_buffer_printf(buffer, "<unhandled_dimension %#x>", reg->dimension); + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled dimension %#x.", reg->dimension); + break; + } + break; + case VKD3DSPR_CONSTBUFFER: if (reg->idx_count != 3) { @@ -215,19 +282,43 @@ static void msl_src_init(struct msl_src *msl_src, struct msl_generator *gen, const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) { const struct vkd3d_shader_register *reg = &vsir_src->reg; + struct vkd3d_string_buffer *str;
msl_src->str = vkd3d_string_buffer_get(&gen->string_buffers);
if (reg->non_uniform) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, "Internal compiler error: Unhandled 'non-uniform' modifier."); - if (vsir_src->modifiers) - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers);
- msl_print_register_name(msl_src->str, gen, reg); + if (!vsir_src->modifiers) + str = msl_src->str; + else + str = vkd3d_string_buffer_get(&gen->string_buffers); + + msl_print_register_name(str, gen, reg); if (reg->dimension == VSIR_DIMENSION_VEC4) - msl_print_swizzle(msl_src->str, vsir_src->swizzle, mask); + msl_print_swizzle(str, vsir_src->swizzle, mask); + + switch (vsir_src->modifiers) + { + case VKD3DSPSM_NONE: + break; + case VKD3DSPSM_NEG: + vkd3d_string_buffer_printf(msl_src->str, "-%s", str->buffer); + break; + case VKD3DSPSM_ABS: + vkd3d_string_buffer_printf(msl_src->str, "abs(%s)", str->buffer); + break; + default: + vkd3d_string_buffer_printf(msl_src->str, "<unhandled modifier %#x>(%s)", + vsir_src->modifiers, str->buffer); + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); + break; + } + + if (str != msl_src->str) + vkd3d_string_buffer_release(&gen->string_buffers, str); }
static void msl_dst_cleanup(struct msl_dst *dst, struct vkd3d_string_buffer_cache *cache) @@ -253,7 +344,8 @@ static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, msl_dst->mask = vkd3d_string_buffer_get(&gen->string_buffers);
msl_print_register_name(msl_dst->register_name, gen, &vsir_dst->reg); - msl_print_write_mask(msl_dst->mask, write_mask); + if (vsir_dst->reg.dimension == VSIR_DIMENSION_VEC4) + msl_print_write_mask(msl_dst->mask, write_mask);
return write_mask; } @@ -261,22 +353,29 @@ static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, static void VKD3D_PRINTF_FUNC(3, 4) msl_print_assignment( struct msl_generator *gen, struct msl_dst *dst, const char *format, ...) { + uint32_t modifiers = dst->vsir->modifiers; va_list args;
if (dst->vsir->shift) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift); - if (dst->vsir->modifiers) + if (modifiers & ~VKD3DSPDM_SATURATE) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers); + "Internal compiler error: Unhandled destination modifier(s) %#x.", modifiers);
msl_print_indent(gen->buffer, gen->indent); vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer);
+ if (modifiers & VKD3DSPDM_SATURATE) + vkd3d_string_buffer_printf(gen->buffer, "saturate("); + va_start(args, format); vkd3d_string_buffer_vprintf(gen->buffer, format, args); va_end(args);
+ if (modifiers & VKD3DSPDM_SATURATE) + vkd3d_string_buffer_printf(gen->buffer, ")"); + vkd3d_string_buffer_printf(gen->buffer, ";\n"); }
@@ -288,6 +387,164 @@ static void msl_unhandled(struct msl_generator *gen, const struct vkd3d_shader_i "Internal compiler error: Unhandled instruction %#x.", ins->opcode); }
+static void msl_binop(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) +{ + struct msl_src src[2]; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src[0], gen, &ins->src[0], mask); + msl_src_init(&src[1], gen, &ins->src[1], mask); + + msl_print_assignment(gen, &dst, "%s %s %s", src[0].str->buffer, op, src[1].str->buffer); + + msl_src_cleanup(&src[1], &gen->string_buffers); + msl_src_cleanup(&src[0], &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void msl_dot(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, uint32_t src_mask) +{ + unsigned int component_count; + struct msl_src src[2]; + struct msl_dst dst; + uint32_t dst_mask; + + dst_mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src[0], gen, &ins->src[0], src_mask); + msl_src_init(&src[1], gen, &ins->src[1], src_mask); + + if ((component_count = vsir_write_mask_component_count(dst_mask)) > 1) + msl_print_assignment(gen, &dst, "float%u(dot(%s, %s))", + component_count, src[0].str->buffer, src[1].str->buffer); + else + msl_print_assignment(gen, &dst, "dot(%s, %s)", src[0].str->buffer, src[1].str->buffer); + + msl_src_cleanup(&src[1], &gen->string_buffers); + msl_src_cleanup(&src[0], &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void msl_intrinsic(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) +{ + struct vkd3d_string_buffer *args; + struct msl_src src; + struct msl_dst dst; + unsigned int i; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + args = vkd3d_string_buffer_get(&gen->string_buffers); + + for (i = 0; i < ins->src_count; ++i) + { + msl_src_init(&src, gen, &ins->src[i], mask); + vkd3d_string_buffer_printf(args, "%s%s", i ? ", " : "", src.str->buffer); + msl_src_cleanup(&src, &gen->string_buffers); + } + + msl_print_assignment(gen, &dst, "%s(%s)", op, args->buffer); + + vkd3d_string_buffer_release(&gen->string_buffers, args); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void msl_relop(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) +{ + unsigned int mask_size; + struct msl_src src[2]; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src[0], gen, &ins->src[0], mask); + msl_src_init(&src[1], gen, &ins->src[1], mask); + + if ((mask_size = vsir_write_mask_component_count(mask)) > 1) + msl_print_assignment(gen, &dst, "select(uint%u(0u), uint%u(0xffffffffu), bool%u(%s %s %s))", + mask_size, mask_size, mask_size, src[0].str->buffer, op, src[1].str->buffer); + else + msl_print_assignment(gen, &dst, "%s %s %s ? 0xffffffffu : 0u", + src[0].str->buffer, op, src[1].str->buffer); + + msl_src_cleanup(&src[1], &gen->string_buffers); + msl_src_cleanup(&src[0], &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void msl_cast(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *constructor) +{ + unsigned int component_count; + struct msl_src src; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src, gen, &ins->src[0], mask); + + if ((component_count = vsir_write_mask_component_count(mask)) > 1) + msl_print_assignment(gen, &dst, "%s%u(%s)", constructor, component_count, src.str->buffer); + else + msl_print_assignment(gen, &dst, "%s(%s)", constructor, src.str->buffer); + + msl_src_cleanup(&src, &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void msl_end_block(struct msl_generator *gen) +{ + --gen->indent; + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "}\n"); +} + +static void msl_begin_block(struct msl_generator *gen) +{ + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "{\n"); + ++gen->indent; +} + +static void msl_if(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + const char *condition; + struct msl_src src; + + msl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0); + + msl_print_indent(gen->buffer, gen->indent); + condition = ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ ? "bool" : "!bool"; + vkd3d_string_buffer_printf(gen->buffer, "if (%s(%s))\n", condition, src.str->buffer); + + msl_src_cleanup(&src, &gen->string_buffers); + + msl_begin_block(gen); +} + +static void msl_else(struct msl_generator *gen) +{ + msl_end_block(gen); + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "else\n"); + msl_begin_block(gen); +} + +static void msl_unary_op(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) +{ + struct msl_src src; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src, gen, &ins->src[0], mask); + + msl_print_assignment(gen, &dst, "%s%s", op, src.str->buffer); + + msl_src_cleanup(&src, &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) { struct msl_src src; @@ -303,6 +560,31 @@ static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruc msl_dst_cleanup(&dst, &gen->string_buffers); }
+static void msl_movc(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + unsigned int component_count; + struct msl_src src[3]; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src[0], gen, &ins->src[0], mask); + msl_src_init(&src[1], gen, &ins->src[1], mask); + msl_src_init(&src[2], gen, &ins->src[2], mask); + + if ((component_count = vsir_write_mask_component_count(mask)) > 1) + msl_print_assignment(gen, &dst, "select(%s, %s, bool%u(%s))", + src[2].str->buffer, src[1].str->buffer, component_count, src[0].str->buffer); + else + msl_print_assignment(gen, &dst, "select(%s, %s, bool(%s))", + src[2].str->buffer, src[1].str->buffer, src[0].str->buffer); + + msl_src_cleanup(&src[2], &gen->string_buffers); + msl_src_cleanup(&src[1], &gen->string_buffers); + msl_src_cleanup(&src[0], &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + static void msl_ret(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) { msl_print_indent(gen->buffer, gen->indent); @@ -315,17 +597,119 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d
switch (ins->opcode) { - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_OUTPUT: - case VKD3DSIH_DCL_OUTPUT_SIV: + case VKD3DSIH_ADD: + msl_binop(gen, ins, "+"); + break; + case VKD3DSIH_AND: + msl_binop(gen, ins, "&"); + break; case VKD3DSIH_NOP: break; + case VKD3DSIH_DIV: + msl_binop(gen, ins, "/"); + break; + case VKD3DSIH_DP2: + msl_dot(gen, ins, vkd3d_write_mask_from_component_count(2)); + break; + case VKD3DSIH_DP3: + msl_dot(gen, ins, vkd3d_write_mask_from_component_count(3)); + break; + case VKD3DSIH_DP4: + msl_dot(gen, ins, VKD3DSP_WRITEMASK_ALL); + break; + case VKD3DSIH_ELSE: + msl_else(gen); + break; + case VKD3DSIH_ENDIF: + msl_end_block(gen); + break; + case VKD3DSIH_IEQ: + msl_relop(gen, ins, "=="); + break; + case VKD3DSIH_EXP: + msl_intrinsic(gen, ins, "exp2"); + break; + case VKD3DSIH_FRC: + msl_intrinsic(gen, ins, "fract"); + break; + case VKD3DSIH_FTOI: + msl_cast(gen, ins, "int"); + break; + case VKD3DSIH_FTOU: + msl_cast(gen, ins, "uint"); + break; + case VKD3DSIH_GEO: + msl_relop(gen, ins, ">="); + break; + case VKD3DSIH_IF: + msl_if(gen, ins); + break; + case VKD3DSIH_ISHL: + msl_binop(gen, ins, "<<"); + break; + case VKD3DSIH_ISHR: + case VKD3DSIH_USHR: + msl_binop(gen, ins, ">>"); + break; + case VKD3DSIH_LTO: + msl_relop(gen, ins, "<"); + break; + case VKD3DSIH_MAD: + msl_intrinsic(gen, ins, "fma"); + break; + case VKD3DSIH_MAX: + msl_intrinsic(gen, ins, "max"); + break; + case VKD3DSIH_MIN: + msl_intrinsic(gen, ins, "min"); + break; + case VKD3DSIH_INE: + case VKD3DSIH_NEU: + msl_relop(gen, ins, "!="); + break; + case VKD3DSIH_ITOF: + case VKD3DSIH_UTOF: + msl_cast(gen, ins, "float"); + break; + case VKD3DSIH_LOG: + msl_intrinsic(gen, ins, "log2"); + break; case VKD3DSIH_MOV: msl_mov(gen, ins); break; + case VKD3DSIH_MOVC: + msl_movc(gen, ins); + break; + case VKD3DSIH_MUL: + msl_binop(gen, ins, "*"); + break; + case VKD3DSIH_NOT: + msl_unary_op(gen, ins, "~"); + break; + case VKD3DSIH_OR: + msl_binop(gen, ins, "|"); + break; case VKD3DSIH_RET: msl_ret(gen, ins); break; + case VKD3DSIH_ROUND_NE: + msl_intrinsic(gen, ins, "rint"); + break; + case VKD3DSIH_ROUND_NI: + msl_intrinsic(gen, ins, "floor"); + break; + case VKD3DSIH_ROUND_PI: + msl_intrinsic(gen, ins, "ceil"); + break; + case VKD3DSIH_ROUND_Z: + msl_intrinsic(gen, ins, "trunc"); + break; + case VKD3DSIH_RSQ: + msl_intrinsic(gen, ins, "rsqrt"); + break; + case VKD3DSIH_SQRT: + msl_intrinsic(gen, ins, "sqrt"); + break; default: msl_unhandled(gen, ins); break; @@ -489,6 +873,16 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen)
if (e->sysval_semantic) { + if (e->sysval_semantic == VKD3D_SHADER_SV_IS_FRONT_FACE) + { + if (type != VKD3D_SHADER_TYPE_PIXEL) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled SV_IS_FRONT_FACE in shader type #%x.", type); + + msl_print_indent(gen->buffer, 1); + vkd3d_string_buffer_printf(buffer, "bool is_front_face [[front_facing]];\n"); + continue; + } msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic); continue; @@ -501,13 +895,6 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) continue; }
- if (e->interpolation_mode != VKD3DSIM_NONE) - { - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); - continue; - } - if(e->register_count > 1) { msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, @@ -551,6 +938,18 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) break; }
+ switch (e->interpolation_mode) + { + /* The default interpolation attribute. */ + case VKD3DSIM_LINEAR: + case VKD3DSIM_NONE: + break; + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); + break; + } + vkd3d_string_buffer_printf(buffer, ";\n"); }
@@ -602,6 +1001,14 @@ static void msl_generate_output_struct_declarations(struct msl_generator *gen) { e = &signature->elements[i];
+ if (e->sysval_semantic == VKD3D_SHADER_SV_DEPTH) + { + gen->write_depth = true; + msl_print_indent(gen->buffer, 1); + vkd3d_string_buffer_printf(buffer, "float shader_out_depth [[depth(any)]];\n"); + continue; + } + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) continue;
@@ -690,6 +1097,10 @@ static void msl_generate_entrypoint_prologue(struct msl_generator *gen) vkd3d_string_buffer_printf(buffer, " = input.shader_in_%u", i); msl_print_write_mask(buffer, e->mask); } + else if (e->sysval_semantic == VKD3D_SHADER_SV_IS_FRONT_FACE) + { + vkd3d_string_buffer_printf(buffer, ".u = uint4(input.is_front_face ? 0xffffffffu : 0u, 0, 0, 0)"); + } else { vkd3d_string_buffer_printf(buffer, " = <unhandled sysval %#x>", e->sysval_semantic); @@ -711,6 +1122,12 @@ static void msl_generate_entrypoint_epilogue(struct msl_generator *gen) { e = &signature->elements[i];
+ if (e->sysval_semantic == VKD3D_SHADER_SV_DEPTH) + { + vkd3d_string_buffer_printf(buffer, " output.shader_out_depth = shader_out_depth;\n"); + continue; + } + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) continue;
@@ -770,9 +1187,14 @@ static void msl_generate_entrypoint(struct msl_generator *gen) vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 %s_out[%u];\n", gen->prefix, 32); vkd3d_string_buffer_printf(gen->buffer, " vkd3d_%s_out output;\n", gen->prefix);
+ if (gen->write_depth) + vkd3d_string_buffer_printf(gen->buffer, " float shader_out_depth;\n"); + msl_generate_entrypoint_prologue(gen);
vkd3d_string_buffer_printf(gen->buffer, " %s_main(%s_in, %s_out", gen->prefix, gen->prefix, gen->prefix); + if (gen->write_depth) + vkd3d_string_buffer_printf(gen->buffer, ", shader_out_depth"); if (gen->descriptor_info->descriptor_count) vkd3d_string_buffer_printf(gen->buffer, ", descriptors"); vkd3d_string_buffer_printf(gen->buffer, ");\n"); @@ -790,6 +1212,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n");
vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); + vkd3d_string_buffer_printf(gen->buffer, "#include <metal_common>\n\n"); + vkd3d_string_buffer_printf(gen->buffer, "using namespace metal;\n\n");
if (gen->program->global_flags) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, @@ -808,6 +1232,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader "void %s_main(thread vkd3d_vec4 *v, " "thread vkd3d_vec4 *o", gen->prefix); + if (gen->write_depth) + vkd3d_string_buffer_printf(gen->buffer, ", thread float& o_depth"); if (gen->descriptor_info->descriptor_count) vkd3d_string_buffer_printf(gen->buffer, ", constant vkd3d_%s_descriptors& descriptors", gen->prefix); vkd3d_string_buffer_printf(gen->buffer, ")\n{\n"); @@ -887,7 +1313,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) return ret;
- VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); + VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6);
if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0) return ret; diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index bdfd632ad12..db7ebab742d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -34,6 +34,32 @@ # include "vulkan/GLSL.std.450.h" #endif /* HAVE_SPIRV_UNIFIED1_GLSL_STD_450_H */
+#define VKD3D_SPIRV_VERSION_1_0 0x00010000 +#define VKD3D_SPIRV_VERSION_1_3 0x00010300 +#define VKD3D_SPIRV_GENERATOR_ID 18 +#define VKD3D_SPIRV_GENERATOR_VERSION 15 +#define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) +#ifndef VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER +# define VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER 0 +#endif + +#define VKD3D_SPIRV_HEADER_SIZE 5 + +#define VKD3D_SPIRV_VERSION_MAJOR_SHIFT 16u +#define VKD3D_SPIRV_VERSION_MAJOR_MASK (0xffu << VKD3D_SPIRV_VERSION_MAJOR_SHIFT) +#define VKD3D_SPIRV_VERSION_MINOR_SHIFT 8u +#define VKD3D_SPIRV_VERSION_MINOR_MASK (0xffu << VKD3D_SPIRV_VERSION_MINOR_SHIFT) + +#define VKD3D_SPIRV_GENERATOR_ID_SHIFT 16u +#define VKD3D_SPIRV_GENERATOR_ID_MASK (0xffffu << VKD3D_SPIRV_GENERATOR_ID_SHIFT) +#define VKD3D_SPIRV_GENERATOR_VERSION_SHIFT 0u +#define VKD3D_SPIRV_GENERATOR_VERSION_MASK (0xffffu << VKD3D_SPIRV_GENERATOR_VERSION_SHIFT) + +#define VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_SHIFT 16u +#define VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_MASK (0xffffu << VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_SHIFT) +#define VKD3D_SPIRV_INSTRUCTION_OP_SHIFT 0u +#define VKD3D_SPIRV_INSTRUCTION_OP_MASK (0xffffu << VKD3D_SPIRV_INSTRUCTION_OP_SHIFT) + #ifdef HAVE_SPIRV_TOOLS # include "spirv-tools/libspirv.h"
@@ -82,7 +108,7 @@ static uint32_t get_binary_to_text_options(enum vkd3d_shader_compile_option_form return out; }
-static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv, +static enum vkd3d_result spirv_tools_binary_to_text(const struct vkd3d_shader_code *spirv, enum vkd3d_shader_spirv_environment environment, enum vkd3d_shader_compile_option_formatting_flags formatting, struct vkd3d_shader_code *out) { @@ -143,20 +169,6 @@ static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_co return result; }
-static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, - enum vkd3d_shader_spirv_environment environment) -{ - static const enum vkd3d_shader_compile_option_formatting_flags formatting - = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT | VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER; - struct vkd3d_shader_code text; - - if (!vkd3d_spirv_binary_to_text(spirv, environment, formatting, &text)) - { - vkd3d_shader_trace_text(text.code, text.size); - vkd3d_shader_free_shader_code(&text); - } -} - static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struct vkd3d_shader_code *spirv, enum vkd3d_shader_spirv_environment environment) { @@ -180,14 +192,13 @@ static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struc
#else
-static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv, +static enum vkd3d_result spirv_tools_binary_to_text(const struct vkd3d_shader_code *spirv, enum vkd3d_shader_spirv_environment environment, enum vkd3d_shader_compile_option_formatting_flags formatting, struct vkd3d_shader_code *out) { return VKD3D_ERROR; } -static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, - enum vkd3d_shader_spirv_environment environment) {} + static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struct vkd3d_shader_code *spirv, enum vkd3d_shader_spirv_environment environment) { @@ -196,6 +207,312 @@ static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struc
#endif /* HAVE_SPIRV_TOOLS */
+struct spirv_colours +{ + const char *reset; + const char *comment; +}; + +struct spirv_parser +{ + struct vkd3d_string_buffer_cache string_buffers; + struct vkd3d_shader_location location; + struct vkd3d_shader_message_context *message_context; + enum vkd3d_shader_compile_option_formatting_flags formatting; + struct spirv_colours colours; + bool failed; + + const uint32_t *code; + size_t pos; + size_t size; + + struct vkd3d_string_buffer *text; +}; + +static void VKD3D_PRINTF_FUNC(3, 4) spirv_parser_error(struct spirv_parser *parser, + enum vkd3d_shader_error error, const char *format, ...) +{ + va_list args; + + va_start(args, format); + vkd3d_shader_verror(parser->message_context, &parser->location, error, format, args); + va_end(args); + parser->failed = true; +} + +static uint32_t spirv_parser_read_u32(struct spirv_parser *parser) +{ + if (parser->pos >= parser->size) + { + parser->failed = true; + return 0; + } + + return parser->code[parser->pos++]; +} + +static void VKD3D_PRINTF_FUNC(2, 3) spirv_parser_print_comment(struct spirv_parser *parser, const char *format, ...) +{ + va_list args; + + if (!parser->text) + return; + + va_start(args, format); + vkd3d_string_buffer_printf(parser->text, "%s; ", parser->colours.comment); + vkd3d_string_buffer_vprintf(parser->text, format, args); + vkd3d_string_buffer_printf(parser->text, "%s\n", parser->colours.reset); + va_end(args); +} + +static void spirv_parser_print_generator(struct spirv_parser *parser, uint32_t magic) +{ + unsigned int id, version; + const char *name; + + id = (magic & VKD3D_SPIRV_GENERATOR_ID_MASK) >> VKD3D_SPIRV_GENERATOR_ID_SHIFT; + version = (magic & VKD3D_SPIRV_GENERATOR_VERSION_MASK) >> VKD3D_SPIRV_GENERATOR_VERSION_SHIFT; + + switch (id) + { + case VKD3D_SPIRV_GENERATOR_ID: + name = "Wine VKD3D Shader Compiler"; + break; + + default: + name = NULL; + break; + } + + if (name) + spirv_parser_print_comment(parser, "Generator: %s; %u", name, version); + else + spirv_parser_print_comment(parser, "Generator: Unknown (%#x); %u", id, version); +} + +static enum vkd3d_result spirv_parser_read_header(struct spirv_parser *parser) +{ + uint32_t magic, version, generator, bound, schema; + unsigned int major, minor; + + if (parser->pos > parser->size || parser->size - parser->pos < VKD3D_SPIRV_HEADER_SIZE) + { + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, + "Unexpected end while reading the SPIR-V header."); + return VKD3D_ERROR_INVALID_SHADER; + } + + magic = spirv_parser_read_u32(parser); + version = spirv_parser_read_u32(parser); + generator = spirv_parser_read_u32(parser); + bound = spirv_parser_read_u32(parser); + schema = spirv_parser_read_u32(parser); + + if (magic != SpvMagicNumber) + { + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, + "Invalid magic number %#08x.", magic); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (version & ~(VKD3D_SPIRV_VERSION_MAJOR_MASK | VKD3D_SPIRV_VERSION_MINOR_MASK)) + { + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, + "Invalid version token %#08x.", version); + return VKD3D_ERROR_INVALID_SHADER; + } + + major = (version & VKD3D_SPIRV_VERSION_MAJOR_MASK) >> VKD3D_SPIRV_VERSION_MAJOR_SHIFT; + minor = (version & VKD3D_SPIRV_VERSION_MINOR_MASK) >> VKD3D_SPIRV_VERSION_MINOR_SHIFT; + if (major != 1 || minor > 0) + { + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "Unable to parse SPIR-V version %u.%u.", major, minor); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + + if (!bound) + { + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, + "Invalid zero id bound."); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (schema) + { + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "Unable to handle instruction schema %#08x.", schema); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + + if (parser->formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER) + { + spirv_parser_print_comment(parser, "SPIR-V"); + spirv_parser_print_comment(parser, "Version: %u.%u", major, minor); + spirv_parser_print_generator(parser, generator); + spirv_parser_print_comment(parser, "Bound: %u", bound); + spirv_parser_print_comment(parser, "Schema: %u", schema); + } + + return VKD3D_OK; +} + +static enum vkd3d_result spirv_parser_parse_instruction(struct spirv_parser *parser) +{ + struct vkd3d_string_buffer *buffer; + uint16_t op, count; + unsigned int i; + uint32_t word; + + word = spirv_parser_read_u32(parser); + count = (word & VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_MASK) >> VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_SHIFT; + op = (word & VKD3D_SPIRV_INSTRUCTION_OP_MASK) >> VKD3D_SPIRV_INSTRUCTION_OP_SHIFT; + + if (!count) + { + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, + "Invalid word count %u.", count); + return VKD3D_ERROR_INVALID_SHADER; + } + + --count; + buffer = vkd3d_string_buffer_get(&parser->string_buffers); + for (i = 0; i < count; ++i) + { + word = spirv_parser_read_u32(parser); + vkd3d_string_buffer_printf(buffer, " 0x%08x", word); + } + spirv_parser_print_comment(parser, "<unrecognised instruction %#x>%s", op, buffer->buffer); + vkd3d_string_buffer_release(&parser->string_buffers, buffer); + + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "Unrecognised instruction %#x.", op); + + return VKD3D_OK; +} + +static enum vkd3d_result spirv_parser_parse(struct spirv_parser *parser, struct vkd3d_shader_code *text) +{ + enum vkd3d_result ret; + + if (text) + parser->text = vkd3d_string_buffer_get(&parser->string_buffers); + + if ((ret = spirv_parser_read_header(parser)) < 0) + goto fail; + while (parser->pos < parser->size) + { + ++parser->location.line; + if ((ret = spirv_parser_parse_instruction(parser)) < 0) + goto fail; + } + + if (parser->failed) + { + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + if (text) + vkd3d_shader_code_from_string_buffer(text, parser->text); + + return VKD3D_OK; + +fail: + if (parser->text) + { + if (TRACE_ON()) + vkd3d_string_buffer_trace(parser->text); + vkd3d_string_buffer_release(&parser->string_buffers, parser->text); + } + return ret; +} + +static void spirv_parser_cleanup(struct spirv_parser *parser) +{ + vkd3d_string_buffer_cache_cleanup(&parser->string_buffers); +} + +static enum vkd3d_result spirv_parser_init(struct spirv_parser *parser, const struct vkd3d_shader_code *source, + const char *source_name, enum vkd3d_shader_compile_option_formatting_flags formatting, + struct vkd3d_shader_message_context *message_context) +{ + static const struct spirv_colours no_colours = + { + .reset = "", + .comment = "", + }; + static const struct spirv_colours colours = + { + .reset = "\x1b[m", + .comment = "\x1b[36m", + }; + + memset(parser, 0, sizeof(*parser)); + parser->location.source_name = source_name; + parser->message_context = message_context; + vkd3d_string_buffer_cache_init(&parser->string_buffers); + + if (source->size % 4) + { + vkd3d_string_buffer_cache_cleanup(&parser->string_buffers); + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, + "Shader size %zu is not a multiple of four.", source->size); + return VKD3D_ERROR_INVALID_SHADER; + } + + parser->formatting = formatting; + if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_COLOUR) + parser->colours = colours; + else + parser->colours = no_colours; + parser->code = source->code; + parser->size = source->size / 4; + + return VKD3D_OK; +} + +static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv, + const char *source_name, enum vkd3d_shader_spirv_environment environment, + enum vkd3d_shader_compile_option_formatting_flags formatting, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +{ + struct spirv_parser parser; + enum vkd3d_result ret; + + if (!VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) + return spirv_tools_binary_to_text(spirv, environment, formatting, out); + + MESSAGE("Creating a SPIR-V parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); + + if ((ret = spirv_parser_init(&parser, spirv, source_name, formatting, message_context)) < 0) + return ret; + + ret = spirv_parser_parse(&parser, out); + + spirv_parser_cleanup(&parser); + + return ret; +} + +static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, enum vkd3d_shader_spirv_environment environment) +{ + static const enum vkd3d_shader_compile_option_formatting_flags formatting + = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT | VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER; + struct vkd3d_shader_message_context message_context; + struct vkd3d_shader_code text; + + vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); + + if (!vkd3d_spirv_binary_to_text(spirv, NULL, environment, formatting, &text, &message_context)) + { + vkd3d_shader_trace_text(text.code, text.size); + vkd3d_shader_free_shader_code(&text); + } + + vkd3d_shader_message_context_cleanup(&message_context); +} + enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, unsigned int index) { @@ -247,12 +564,6 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d } }
-#define VKD3D_SPIRV_VERSION_1_0 0x00010000 -#define VKD3D_SPIRV_VERSION_1_3 0x00010300 -#define VKD3D_SPIRV_GENERATOR_ID 18 -#define VKD3D_SPIRV_GENERATOR_VERSION 14 -#define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) - struct vkd3d_spirv_stream { uint32_t *words; @@ -2406,6 +2717,7 @@ struct vkd3d_hull_shader_variables struct ssa_register_info { enum vkd3d_data_type data_type; + uint8_t write_mask; uint32_t id; };
@@ -2471,7 +2783,6 @@ struct spirv_compiler bool emit_point_size;
enum vkd3d_shader_opcode phase; - bool emit_default_control_point_phase; struct vkd3d_shader_phase control_point_phase; struct vkd3d_shader_phase patch_constant_phase;
@@ -3316,13 +3627,19 @@ static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, static const struct vkd3d_spec_constant_info { enum vkd3d_shader_parameter_name name; - uint32_t default_value; + union + { + uint32_t u; + float f; + } default_value; const char *debug_name; } vkd3d_shader_parameters[] = { - {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, 1, "sample_count"}, - {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, 0, "alpha_test_ref"}, + {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, {.u = 1}, "sample_count"}, + {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, {.f = 0.0f}, "alpha_test_ref"}, + {VKD3D_SHADER_PARAMETER_NAME_FOG_END, {.f = 1.0f}, "fog_end"}, + {VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, {.f = 1.0f}, "fog_scale"}, };
static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_shader_parameter_name name) @@ -3383,7 +3700,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile const struct vkd3d_spec_constant_info *info;
info = get_spec_constant_info(name); - default_value = info ? info->default_value : 0; + default_value = info ? info->default_value.u : 0;
scalar_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); vector_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), component_count); @@ -3574,6 +3891,24 @@ static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, register_info->is_aggregate = false; return true; } + else if (reg->type == VKD3DSPR_SSA) + { + const struct ssa_register_info *ssa = &compiler->ssa_register_info[reg->idx[0].offset]; + + if (!ssa->id) + { + /* Should only be from a missing instruction implementation. */ + VKD3D_ASSERT(compiler->failed); + return 0; + } + + memset(register_info, 0, sizeof(*register_info)); + register_info->id = ssa->id; + register_info->storage_class = SpvStorageClassMax; + register_info->component_type = vkd3d_component_type_from_data_type(ssa->data_type); + register_info->write_mask = ssa->write_mask; + return true; + }
vkd3d_symbol_make_register(®_symbol, reg); if (!(entry = rb_get(&compiler->symbol_table, ®_symbol))) @@ -4181,67 +4516,14 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil return const_id; }
-static const struct ssa_register_info *spirv_compiler_get_ssa_register_info(const struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg) -{ - VKD3D_ASSERT(reg->idx[0].offset < compiler->ssa_register_count); - VKD3D_ASSERT(reg->idx_count == 1); - return &compiler->ssa_register_info[reg->idx[0].offset]; -} - static void spirv_compiler_set_ssa_register_info(const struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t val_id) + const struct vkd3d_shader_register *reg, uint32_t write_mask, uint32_t val_id) { unsigned int i = reg->idx[0].offset; VKD3D_ASSERT(i < compiler->ssa_register_count); compiler->ssa_register_info[i].data_type = reg->data_type; compiler->ssa_register_info[i].id = val_id; -} - -static uint32_t spirv_compiler_emit_load_ssa_reg(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, enum vkd3d_shader_component_type component_type, - uint32_t swizzle) -{ - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - enum vkd3d_shader_component_type reg_component_type; - const struct ssa_register_info *ssa; - unsigned int component_idx; - uint32_t type_id, val_id; - - ssa = spirv_compiler_get_ssa_register_info(compiler, reg); - val_id = ssa->id; - if (!val_id) - { - /* Should only be from a missing instruction implementation. */ - VKD3D_ASSERT(compiler->failed); - return 0; - } - VKD3D_ASSERT(vkd3d_swizzle_is_scalar(swizzle, reg)); - - reg_component_type = vkd3d_component_type_from_data_type(ssa->data_type); - - if (reg->dimension == VSIR_DIMENSION_SCALAR) - { - if (component_type != reg_component_type) - { - type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); - val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); - } - - return val_id; - } - - if (component_type != reg_component_type) - { - /* Required for resource loads with sampled type int, because DXIL has no signedness. - * Only 128-bit vector sizes are used. */ - type_id = vkd3d_spirv_get_type_id(builder, component_type, VKD3D_VEC4_SIZE); - val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); - } - - type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); - component_idx = vsir_swizzle_get_component(swizzle, 0); - return vkd3d_spirv_build_op_composite_extract1(builder, type_id, val_id, component_idx); + compiler->ssa_register_info[i].write_mask = write_mask; }
static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, @@ -4267,9 +4549,6 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, component_count = vsir_write_mask_component_count(write_mask); component_type = vkd3d_component_type_from_data_type(reg->data_type);
- if (reg->type == VKD3DSPR_SSA) - return spirv_compiler_emit_load_ssa_reg(compiler, reg, component_type, swizzle); - if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) { type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); @@ -4294,9 +4573,9 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, type_id = vkd3d_spirv_get_type_id(builder, reg_info.component_type, vsir_write_mask_component_count(reg_info.write_mask)); val_id = vkd3d_spirv_build_op_load(builder, type_id, reg_info.id, SpvMemoryAccessMaskNone); + swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; }
- swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; val_id = spirv_compiler_emit_swizzle(compiler, val_id, reg_info.write_mask, reg_info.component_type, swizzle, val_write_mask);
@@ -4497,7 +4776,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler,
if (reg->type == VKD3DSPR_SSA) { - spirv_compiler_set_ssa_register_info(compiler, reg, val_id); + spirv_compiler_set_ssa_register_info(compiler, reg, write_mask, val_id); return; }
@@ -4883,35 +5162,36 @@ static const struct vkd3d_spirv_builtin vkd3d_output_point_size_builtin = static const struct { enum vkd3d_shader_register_type reg_type; + SpvStorageClass storage_class; struct vkd3d_spirv_builtin builtin; } vkd3d_register_builtins[] = { - {VKD3DSPR_THREADID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInGlobalInvocationId}}, - {VKD3DSPR_LOCALTHREADID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInLocalInvocationId}}, - {VKD3DSPR_LOCALTHREADINDEX, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInLocalInvocationIndex}}, - {VKD3DSPR_THREADGROUPID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInWorkgroupId}}, + {VKD3DSPR_THREADID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInGlobalInvocationId}}, + {VKD3DSPR_LOCALTHREADID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInLocalInvocationId}}, + {VKD3DSPR_LOCALTHREADINDEX, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInLocalInvocationIndex}}, + {VKD3DSPR_THREADGROUPID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInWorkgroupId}},
- {VKD3DSPR_GSINSTID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, - {VKD3DSPR_OUTPOINTID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, + {VKD3DSPR_GSINSTID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, + {VKD3DSPR_OUTPOINTID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}},
- {VKD3DSPR_PRIMID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInPrimitiveId}}, + {VKD3DSPR_PRIMID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInPrimitiveId}},
- {VKD3DSPR_TESSCOORD, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}}, + {VKD3DSPR_TESSCOORD, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}},
- {VKD3DSPR_POINT_COORD, {VKD3D_SHADER_COMPONENT_FLOAT, 2, SpvBuiltInPointCoord}}, + {VKD3DSPR_POINT_COORD, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_FLOAT, 2, SpvBuiltInPointCoord}},
- {VKD3DSPR_COVERAGE, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, - {VKD3DSPR_SAMPLEMASK, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, + {VKD3DSPR_COVERAGE, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, + {VKD3DSPR_SAMPLEMASK, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}},
- {VKD3DSPR_DEPTHOUT, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, - {VKD3DSPR_DEPTHOUTGE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, - {VKD3DSPR_DEPTHOUTLE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, + {VKD3DSPR_DEPTHOUT, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, + {VKD3DSPR_DEPTHOUTGE, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, + {VKD3DSPR_DEPTHOUTLE, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}},
- {VKD3DSPR_OUTSTENCILREF, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}}, + {VKD3DSPR_OUTSTENCILREF, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}},
- {VKD3DSPR_WAVELANECOUNT, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}}, - {VKD3DSPR_WAVELANEINDEX, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}}, + {VKD3DSPR_WAVELANECOUNT, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}}, + {VKD3DSPR_WAVELANEINDEX, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}}, };
static void spirv_compiler_emit_register_execution_mode(struct spirv_compiler *compiler, @@ -4970,14 +5250,18 @@ static const struct vkd3d_spirv_builtin *get_spirv_builtin_for_sysval( }
static const struct vkd3d_spirv_builtin *get_spirv_builtin_for_register( - enum vkd3d_shader_register_type reg_type) + enum vkd3d_shader_register_type reg_type, SpvStorageClass *storage_class) { unsigned int i;
for (i = 0; i < ARRAY_SIZE(vkd3d_register_builtins); ++i) { if (vkd3d_register_builtins[i].reg_type == reg_type) + { + if (storage_class) + *storage_class = vkd3d_register_builtins[i].storage_class; return &vkd3d_register_builtins[i].builtin; + } }
return NULL; @@ -4990,7 +5274,7 @@ static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct sp
if ((builtin = get_spirv_builtin_for_sysval(compiler, sysval))) return builtin; - if ((builtin = get_spirv_builtin_for_register(reg_type))) + if ((builtin = get_spirv_builtin_for_register(reg_type, NULL))) return builtin;
if ((sysval != VKD3D_SHADER_SV_NONE && sysval != VKD3D_SHADER_SV_TARGET) @@ -5322,21 +5606,26 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, return input_id; }
-static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, +static void spirv_compiler_emit_io_register(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_register *reg = &dst->reg; const struct vkd3d_spirv_builtin *builtin; struct vkd3d_symbol reg_symbol; + SpvStorageClass storage_class; + uint32_t write_mask, id; struct rb_entry *entry; - uint32_t write_mask; - uint32_t input_id;
VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); VKD3D_ASSERT(reg->idx_count < 2);
- if (!(builtin = get_spirv_builtin_for_register(reg->type))) + if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) + { + builtin = &vkd3d_output_point_size_builtin; + storage_class = SpvStorageClassOutput; + } + else if (!(builtin = get_spirv_builtin_for_register(reg->type, &storage_class))) { FIXME("Unhandled register %#x.\n", reg->type); return; @@ -5347,14 +5636,15 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) return;
- input_id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassInput, 0); + id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, 0);
write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); - vkd3d_symbol_set_register_info(®_symbol, input_id, - SpvStorageClassInput, builtin->component_type, write_mask); + vkd3d_symbol_set_register_info(®_symbol, id, + storage_class, builtin->component_type, write_mask); reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; spirv_compiler_put_symbol(compiler, ®_symbol); - spirv_compiler_emit_register_debug_name(builder, input_id, reg); + spirv_compiler_emit_register_execution_mode(compiler, reg->type); + spirv_compiler_emit_register_debug_name(builder, id, reg); }
static unsigned int get_shader_output_swizzle(const struct spirv_compiler *compiler, @@ -5458,41 +5748,6 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * } }
-static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst) -{ - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_register *reg = &dst->reg; - const struct vkd3d_spirv_builtin *builtin; - struct vkd3d_symbol reg_symbol; - uint32_t write_mask; - uint32_t output_id; - - VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); - VKD3D_ASSERT(reg->idx_count < 2); - - if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) - { - builtin = &vkd3d_output_point_size_builtin; - } - else if (!(builtin = get_spirv_builtin_for_register(reg->type))) - { - FIXME("Unhandled register %#x.\n", reg->type); - return; - } - - output_id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassOutput, 0); - - vkd3d_symbol_make_register(®_symbol, reg); - write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); - vkd3d_symbol_set_register_info(®_symbol, output_id, - SpvStorageClassOutput, builtin->component_type, write_mask); - reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; - spirv_compiler_put_symbol(compiler, ®_symbol); - spirv_compiler_emit_register_execution_mode(compiler, reg->type); - spirv_compiler_emit_register_debug_name(builder, output_id, reg); -} - static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_compiler *compiler, const struct vkd3d_spirv_builtin *builtin, const unsigned int *array_sizes, unsigned int size_count) { @@ -5857,16 +6112,6 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * compiler->epilogue_function_id = 0; }
-static void spirv_compiler_emit_hull_shader_builtins(struct spirv_compiler *compiler) -{ - struct vkd3d_shader_dst_param dst; - - memset(&dst, 0, sizeof(dst)); - vsir_register_init(&dst.reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_FLOAT, 0); - dst.write_mask = VKD3DSP_WRITEMASK_0; - spirv_compiler_emit_input_register(compiler, &dst); -} - static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler) { const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; @@ -5879,7 +6124,6 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp break; case VKD3D_SHADER_TYPE_HULL: vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationControl); - spirv_compiler_emit_hull_shader_builtins(compiler); break; case VKD3D_SHADER_TYPE_DOMAIN: vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationEvaluation); @@ -6699,27 +6943,6 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init); }
-static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; - - /* INPUT and PATCHCONST are handled in spirv_compiler_emit_io_declarations(). - * OUTPOINTID is handled in spirv_compiler_emit_hull_shader_builtins(). */ - if (dst->reg.type != VKD3DSPR_INPUT && dst->reg.type != VKD3DSPR_PATCHCONST - && dst->reg.type != VKD3DSPR_OUTPOINTID) - spirv_compiler_emit_input_register(compiler, dst); -} - -static void spirv_compiler_emit_dcl_output(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; - - if (dst->reg.type != VKD3DSPR_OUTPUT && dst->reg.type != VKD3DSPR_PATCHCONST) - spirv_compiler_emit_output_register(compiler, dst); -} - static void spirv_compiler_emit_dcl_stream(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { @@ -6822,15 +7045,11 @@ static void spirv_compiler_emit_dcl_gs_instances(struct spirv_compiler *compiler compiler->spirv_builder.invocation_count = instruction->declaration.count; }
-static void spirv_compiler_emit_dcl_tessellator_domain(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) +static void spirv_compiler_emit_tessellator_domain(struct spirv_compiler *compiler, + enum vkd3d_tessellator_domain domain) { - enum vkd3d_tessellator_domain domain = instruction->declaration.tessellator_domain; SpvExecutionMode mode;
- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && spirv_compiler_is_opengl_target(compiler)) - return; - switch (domain) { case VKD3D_TESSELLATOR_DOMAIN_LINE: @@ -6916,15 +7135,10 @@ static void spirv_compiler_emit_thread_group_size(struct spirv_compiler *compile SpvExecutionModeLocalSize, local_size, ARRAY_SIZE(local_size)); }
-static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler); - static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
- if (is_in_control_point_phase(compiler) && compiler->emit_default_control_point_phase) - spirv_compiler_emit_default_control_point_phase(compiler); - vkd3d_spirv_build_op_function_end(builder);
if (is_in_control_point_phase(compiler)) @@ -6969,9 +7183,6 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, phase->function_id = function_id; /* The insertion location must be set after the label is emitted. */ phase->function_location = 0; - - if (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) - compiler->emit_default_control_point_phase = instruction->flags; }
static void spirv_compiler_initialise_block(struct spirv_compiler *compiler) @@ -7000,63 +7211,6 @@ static void spirv_compiler_initialise_block(struct spirv_compiler *compiler) } }
-static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler) -{ - const struct shader_signature *output_signature = &compiler->output_signature; - const struct shader_signature *input_signature = &compiler->input_signature; - uint32_t type_id, output_ptr_type_id, input_id, dst_id, invocation_id; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - enum vkd3d_shader_component_type component_type; - struct vkd3d_shader_src_param invocation; - struct vkd3d_shader_register input_reg; - unsigned int component_count; - unsigned int i; - - vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); - spirv_compiler_initialise_block(compiler); - invocation_id = spirv_compiler_emit_load_invocation_id(compiler); - - memset(&invocation, 0, sizeof(invocation)); - vsir_register_init(&invocation.reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_INT, 0); - invocation.swizzle = VKD3D_SHADER_NO_SWIZZLE; - - vsir_register_init(&input_reg, VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 2); - input_reg.idx[0].offset = 0; - input_reg.idx[0].rel_addr = &invocation; - input_reg.idx[1].offset = 0; - input_id = spirv_compiler_get_register_id(compiler, &input_reg); - - VKD3D_ASSERT(input_signature->element_count == output_signature->element_count); - for (i = 0; i < output_signature->element_count; ++i) - { - const struct signature_element *output = &output_signature->elements[i]; - const struct signature_element *input = &input_signature->elements[i]; - struct vkd3d_shader_register_info output_reg_info; - struct vkd3d_shader_register output_reg; - - VKD3D_ASSERT(input->mask == output->mask); - VKD3D_ASSERT(input->component_type == output->component_type); - - input_reg.idx[1].offset = i; - input_id = spirv_compiler_get_register_id(compiler, &input_reg); - - vsir_register_init(&output_reg, VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); - output_reg.idx[0].offset = i; - spirv_compiler_get_register_info(compiler, &output_reg, &output_reg_info); - - component_type = output->component_type; - component_count = vsir_write_mask_component_count(output->mask); - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - output_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); - - dst_id = vkd3d_spirv_build_op_access_chain1(builder, output_ptr_type_id, output_reg_info.id, invocation_id); - - vkd3d_spirv_build_op_copy_memory(builder, dst_id, input_id, SpvMemoryAccessMaskNone); - } - - vkd3d_spirv_build_op_return(builder); -} - static void spirv_compiler_emit_barrier(struct spirv_compiler *compiler, SpvScope execution_scope, SpvScope memory_scope, SpvMemorySemanticsMask semantics) { @@ -7105,12 +7259,13 @@ static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler
static void spirv_compiler_emit_hull_shader_main(struct spirv_compiler *compiler) { + size_t table_count = compiler->offset_info.descriptor_table_count; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t void_id;
/* If a patch constant function used descriptor indexing the offsets must be reloaded. */ - memset(compiler->descriptor_offset_ids, 0, compiler->offset_info.descriptor_table_count - * sizeof(*compiler->descriptor_offset_ids)); + if (table_count) + memset(compiler->descriptor_offset_ids, 0, table_count * sizeof(*compiler->descriptor_offset_ids)); vkd3d_spirv_builder_begin_main_function(builder); vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder));
@@ -7147,7 +7302,6 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru {VKD3DSIH_DDIV, SpvOpFDiv}, {VKD3DSIH_DIV, SpvOpFDiv}, {VKD3DSIH_DMUL, SpvOpFMul}, - {VKD3DSIH_DTOF, SpvOpFConvert}, {VKD3DSIH_DTOI, SpvOpConvertFToS}, {VKD3DSIH_DTOU, SpvOpConvertFToU}, {VKD3DSIH_FREM, SpvOpFRem}, @@ -7501,7 +7655,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler,
general_implementation: write_mask = dst->write_mask; - if (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) + if (data_type_is_64_bit(src->reg.data_type) && !data_type_is_64_bit(dst->reg.data_type)) write_mask = vsir_write_mask_64_from_32(write_mask); else if (!data_type_is_64_bit(src->reg.data_type) && data_type_is_64_bit(dst->reg.data_type)) write_mask = vsir_write_mask_32_from_64(write_mask); @@ -7785,6 +7939,7 @@ static void spirv_compiler_emit_ftoi(struct spirv_compiler *compiler, uint32_t src_type_id, dst_type_id, condition_type_id; enum vkd3d_shader_component_type component_type; unsigned int component_count; + uint32_t write_mask;
VKD3D_ASSERT(instruction->dst_count == 1); VKD3D_ASSERT(instruction->src_count == 1); @@ -7794,21 +7949,23 @@ static void spirv_compiler_emit_ftoi(struct spirv_compiler *compiler, * and for NaN to yield zero. */
component_count = vsir_write_mask_component_count(dst->write_mask); - src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, dst->write_mask); - dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); - src_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask);
if (src->reg.data_type == VKD3D_DATA_DOUBLE) { + write_mask = vkd3d_write_mask_from_component_count(component_count); int_min_id = spirv_compiler_get_constant_double_vector(compiler, -2147483648.0, component_count); float_max_id = spirv_compiler_get_constant_double_vector(compiler, 2147483648.0, component_count); } else { + write_mask = dst->write_mask; int_min_id = spirv_compiler_get_constant_float_vector(compiler, -2147483648.0f, component_count); float_max_id = spirv_compiler_get_constant_float_vector(compiler, 2147483648.0f, component_count); }
+ src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, write_mask); + dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); + src_id = spirv_compiler_emit_load_src(compiler, src, write_mask); val_id = vkd3d_spirv_build_op_glsl_std450_max(builder, src_type_id, src_id, int_min_id);
/* VSIR allows the destination of a signed conversion to be unsigned. */ @@ -7838,6 +7995,7 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler, const struct vkd3d_shader_src_param *src = instruction->src; uint32_t src_type_id, dst_type_id, condition_type_id; unsigned int component_count; + uint32_t write_mask;
VKD3D_ASSERT(instruction->dst_count == 1); VKD3D_ASSERT(instruction->src_count == 1); @@ -7847,21 +8005,23 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler, * and for NaN to yield zero. */
component_count = vsir_write_mask_component_count(dst->write_mask); - src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, dst->write_mask); - dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); - src_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask);
if (src->reg.data_type == VKD3D_DATA_DOUBLE) { + write_mask = vkd3d_write_mask_from_component_count(component_count); zero_id = spirv_compiler_get_constant_double_vector(compiler, 0.0, component_count); float_max_id = spirv_compiler_get_constant_double_vector(compiler, 4294967296.0, component_count); } else { + write_mask = dst->write_mask; zero_id = spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count); float_max_id = spirv_compiler_get_constant_float_vector(compiler, 4294967296.0f, component_count); }
+ src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, write_mask); + dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); + src_id = spirv_compiler_emit_load_src(compiler, src, write_mask); val_id = vkd3d_spirv_build_op_glsl_std450_max(builder, src_type_id, src_id, zero_id);
uint_max_id = spirv_compiler_get_constant_uint_vector(compiler, UINT_MAX, component_count); @@ -7875,6 +8035,29 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler, spirv_compiler_emit_store_dst(compiler, dst, val_id); }
+static void spirv_compiler_emit_dtof(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, val_id, src_id; + unsigned int component_count; + uint32_t write_mask; + + component_count = vsir_write_mask_component_count(dst->write_mask); + write_mask = vkd3d_write_mask_from_component_count(component_count); + + src_id = spirv_compiler_emit_load_src(compiler, src, write_mask); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, component_count); + val_id = vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpFConvert, type_id, src_id); + if (instruction->flags & VKD3DSI_PRECISE_XYZW) + vkd3d_spirv_build_op_decorate(builder, val_id, SpvDecorationNoContraction, NULL, 0); + + spirv_compiler_emit_store_dst(compiler, dst, val_id); +} + static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { @@ -8010,6 +8193,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; uint32_t src0_id, src1_id, type_id, result_id; + uint32_t write_mask = dst->write_mask; unsigned int component_count; SpvOp op;
@@ -8040,8 +8224,21 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co
component_count = vsir_write_mask_component_count(dst->write_mask);
- src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); - src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); + switch (instruction->opcode) + { + case VKD3DSIH_DEQO: + case VKD3DSIH_DGEO: + case VKD3DSIH_DLT: + case VKD3DSIH_DNE: + write_mask = vkd3d_write_mask_from_component_count(component_count); + break; + + default: + break; + } + + src0_id = spirv_compiler_emit_load_src(compiler, &src[0], write_mask); + src1_id = spirv_compiler_emit_load_src(compiler, &src[1], write_mask);
type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); result_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, @@ -10214,13 +10411,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_TGSM_STRUCTURED: spirv_compiler_emit_dcl_tgsm_structured(compiler, instruction); break; - case VKD3DSIH_DCL_INPUT_PS: - case VKD3DSIH_DCL_INPUT: - spirv_compiler_emit_dcl_input(compiler, instruction); - break; - case VKD3DSIH_DCL_OUTPUT: - spirv_compiler_emit_dcl_output(compiler, instruction); - break; case VKD3DSIH_DCL_STREAM: spirv_compiler_emit_dcl_stream(compiler, instruction); break; @@ -10239,9 +10429,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: spirv_compiler_emit_output_vertex_count(compiler, instruction); break; - case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: - spirv_compiler_emit_dcl_tessellator_domain(compiler, instruction); - break; case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: spirv_compiler_emit_tessellator_output_primitive(compiler, instruction->declaration.tessellator_output_primitive); @@ -10275,7 +10462,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DDIV: case VKD3DSIH_DIV: case VKD3DSIH_DMUL: - case VKD3DSIH_DTOF: case VKD3DSIH_FREM: case VKD3DSIH_FTOD: case VKD3DSIH_IADD: @@ -10363,6 +10549,9 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_FTOU: spirv_compiler_emit_ftou(compiler, instruction); break; + case VKD3DSIH_DTOF: + spirv_compiler_emit_dtof(compiler, instruction); + break; case VKD3DSIH_DEQO: case VKD3DSIH_DGEO: case VKD3DSIH_DLT: @@ -10561,11 +10750,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, break; case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: - case VKD3DSIH_DCL_INPUT_SGV: - case VKD3DSIH_DCL_INPUT_SIV: - case VKD3DSIH_DCL_INPUT_PS_SGV: - case VKD3DSIH_DCL_INPUT_PS_SIV: - case VKD3DSIH_DCL_OUTPUT_SIV: case VKD3DSIH_DCL_RESOURCE_RAW: case VKD3DSIH_DCL_RESOURCE_STRUCTURED: case VKD3DSIH_DCL_UAV_RAW: @@ -10586,6 +10770,8 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler,
static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) { + struct vkd3d_shader_dst_param dst; + for (unsigned int i = 0; i < compiler->input_signature.element_count; ++i) spirv_compiler_emit_input(compiler, VKD3DSPR_INPUT, i);
@@ -10609,19 +10795,27 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler)
if (compiler->program->has_point_size) { - struct vkd3d_shader_dst_param dst; - vsir_dst_param_init(&dst, VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); dst.reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; - spirv_compiler_emit_output_register(compiler, &dst); + spirv_compiler_emit_io_register(compiler, &dst); }
if (compiler->program->has_point_coord) { - struct vkd3d_shader_dst_param dst; - vsir_dst_param_init(&dst, VKD3DSPR_POINT_COORD, VKD3D_DATA_FLOAT, 0); - spirv_compiler_emit_input_register(compiler, &dst); + spirv_compiler_emit_io_register(compiler, &dst); + } + + for (unsigned int i = 0; i < sizeof(compiler->program->io_dcls) * CHAR_BIT; ++i) + { + /* For hull shaders we internally generate references to OUTPOINTID, + * so that must always be enabled. */ + if (bitmap_is_set(compiler->program->io_dcls, i) + || (compiler->program->shader_version.type == VKD3D_SHADER_TYPE_HULL && i == VKD3DSPR_OUTPOINTID)) + { + vsir_dst_param_init(&dst, i, VKD3D_DATA_FLOAT, 0); + spirv_compiler_emit_io_register(compiler, &dst); + } } }
@@ -10677,7 +10871,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct compile_info, compiler->message_context)) < 0) return result;
- VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); + VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6);
max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) @@ -10743,6 +10937,10 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct compiler->input_control_point_count = program->input_control_point_count; compiler->output_control_point_count = program->output_control_point_count;
+ if (program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN + || (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler))) + spirv_compiler_emit_tessellator_domain(compiler, program->tess_domain); + if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) spirv_compiler_emit_shader_signature_outputs(compiler);
@@ -10823,7 +11021,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct if (compile_info->target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT) { struct vkd3d_shader_code text; - if (vkd3d_spirv_binary_to_text(spirv, environment, compiler->formatting, &text) != VKD3D_OK) + if (vkd3d_spirv_binary_to_text(spirv, compile_info->source_name, environment, + compiler->formatting, &text, compiler->message_context) != VKD3D_OK) return VKD3D_ERROR; vkd3d_shader_free_shader_code(spirv); *spirv = text; diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index 9c41e2c2053..82302aac666 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -21,9 +21,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */
-#include "hlsl.h" #include "vkd3d_shader_private.h" -#include "d3dcommon.h"
#define SM4_MAX_SRC_COUNT 6 #define SM4_MAX_DST_COUNT 2 @@ -163,24 +161,6 @@ STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT);
#define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu)
-/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ -#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 - -#define VKD3D_SM4_REQUIRES_DOUBLES 0x00000001 -#define VKD3D_SM4_REQUIRES_EARLY_DEPTH_STENCIL 0x00000002 -#define VKD3D_SM4_REQUIRES_UAVS_AT_EVERY_STAGE 0x00000004 -#define VKD3D_SM4_REQUIRES_64_UAVS 0x00000008 -#define VKD3D_SM4_REQUIRES_MINIMUM_PRECISION 0x00000010 -#define VKD3D_SM4_REQUIRES_11_1_DOUBLE_EXTENSIONS 0x00000020 -#define VKD3D_SM4_REQUIRES_11_1_SHADER_EXTENSIONS 0x00000040 -#define VKD3D_SM4_REQUIRES_LEVEL_9_COMPARISON_FILTERING 0x00000080 -#define VKD3D_SM4_REQUIRES_TILED_RESOURCES 0x00000100 -#define VKD3D_SM4_REQUIRES_STENCIL_REF 0x00000200 -#define VKD3D_SM4_REQUIRES_INNER_COVERAGE 0x00000400 -#define VKD3D_SM4_REQUIRES_TYPED_UAV_LOAD_ADDITIONAL_FORMATS 0x00000800 -#define VKD3D_SM4_REQUIRES_ROVS 0x00001000 -#define VKD3D_SM4_REQUIRES_VIEWPORT_AND_RT_ARRAY_INDEX_FROM_ANY_SHADER_FEEDING_RASTERIZER 0x00002000 - enum vkd3d_sm4_opcode { VKD3D_SM4_OP_ADD = 0x00, @@ -707,6 +687,7 @@ struct vkd3d_sm4_opcode_info char src_info[SM4_MAX_SRC_COUNT]; void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); + bool is_conditional_op; };
static const enum vkd3d_primitive_type output_primitive_type_table[] = @@ -1268,6 +1249,7 @@ static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instructi { ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; + priv->p.program->tess_domain = ins->declaration.tessellator_domain; }
static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1275,6 +1257,7 @@ static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_ins { ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; + priv->p.program->tess_partitioning = ins->declaration.tessellator_partitioning; }
static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1282,6 +1265,7 @@ static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader { ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; + priv->p.program->tess_output_primitive = ins->declaration.tessellator_output_primitive; }
static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1407,8 +1391,6 @@ struct sm4_stat
struct tpf_compiler { - /* OBJECTIVE: We want to get rid of this HLSL IR specific field. */ - struct hlsl_ctx *ctx; struct vsir_program *program; struct vkd3d_sm4_lookup_tables lookup; struct sm4_stat *stat; @@ -1439,18 +1421,18 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", - shader_sm4_read_conditional_op}, + shader_sm4_read_conditional_op, true}, {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u", shader_sm4_read_case_condition}, {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", - shader_sm4_read_conditional_op}, + shader_sm4_read_conditional_op, true}, {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, {VKD3D_SM4_OP_DISCARD, VKD3DSIH_DISCARD, "", "u", - shader_sm4_read_conditional_op}, + shader_sm4_read_conditional_op, true}, {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, @@ -1468,7 +1450,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_GE, VKD3DSIH_GEO, "u", "ff"}, {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", - shader_sm4_read_conditional_op}, + shader_sm4_read_conditional_op, true}, {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, @@ -1502,7 +1484,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "i*"}, {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", - shader_sm4_read_conditional_op}, + shader_sm4_read_conditional_op, true}, {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, @@ -1967,16 +1949,6 @@ static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_t return lookup->register_type_info_from_vkd3d[vkd3d_type]; }
-static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( - const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_register_type vkd3d_type) -{ - const struct vkd3d_sm4_register_type_info *register_type_info = - get_info_from_vkd3d_register_type(lookup, vkd3d_type); - - VKD3D_ASSERT(register_type_info); - return register_type_info->default_src_swizzle_type; -} - static enum vkd3d_sm4_stat_field get_stat_field_from_sm4_opcode( const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode) { @@ -2816,7 +2788,7 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro
/* Estimate instruction count to avoid reallocation in most shaders. */ if (!vsir_program_init(program, compile_info, - &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) + &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) return false; vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); sm4->ptr = sm4->start; @@ -2925,6 +2897,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con program->input_signature = dxbc_desc.input_signature; program->output_signature = dxbc_desc.output_signature; program->patch_constant_signature = dxbc_desc.patch_constant_signature; + program->features = dxbc_desc.features; memset(&dxbc_desc, 0, sizeof(dxbc_desc));
/* DXBC stores used masks inverted for output signatures, for some reason. @@ -2993,8 +2966,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con return VKD3D_OK; }
-static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block); - bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) { @@ -3099,7 +3070,7 @@ static bool get_insidetessfactor_sysval_semantic(enum vkd3d_shader_sysval_semant
bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, - const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func) + const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func, bool is_patch) { unsigned int i;
@@ -3160,7 +3131,21 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, }; - bool needs_compat_mapping = ascii_strncasecmp(semantic_name, "sv_", 3); + bool has_sv_prefix = !ascii_strncasecmp(semantic_name, "sv_", 3); + + if (is_patch) + { + VKD3D_ASSERT(!output); + + if (!ascii_strcasecmp(semantic_name, "sv_position") + || (semantic_compat_mapping && !ascii_strcasecmp(semantic_name, "position"))) + *sysval_semantic = VKD3D_SHADER_SV_POSITION; + else if (has_sv_prefix) + return false; + else + *sysval_semantic = VKD3D_SHADER_SV_NONE; + return true; + }
if (is_patch_constant_func) { @@ -3202,7 +3187,7 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s { if (!ascii_strcasecmp(semantic_name, semantics[i].name) && output == semantics[i].output - && (semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) + && (semantic_compat_mapping || has_sv_prefix) && version->type == semantics[i].shader_type) { *sysval_semantic = semantics[i].semantic; @@ -3210,25 +3195,24 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s } }
- if (!needs_compat_mapping) + if (has_sv_prefix) return false;
*sysval_semantic = VKD3D_SHADER_SV_NONE; return true; }
-static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - uint32_t tag, struct vkd3d_bytecode_buffer *buffer) +static void add_section(struct tpf_compiler *tpf, uint32_t tag, struct vkd3d_bytecode_buffer *buffer) { /* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN * sections to be aligned. Without this, the sections themselves will be * aligned, but their reported sizes won't. */ size_t size = bytecode_align(buffer);
- dxbc_writer_add_section(dxbc, tag, buffer->data, size); + dxbc_writer_add_section(&tpf->dxbc, tag, buffer->data, size);
if (buffer->status < 0) - ctx->result = buffer->status; + tpf->result = buffer->status; }
static int signature_element_pointer_compare(const void *x, const void *y) @@ -3289,2327 +3273,648 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si set_u32(&buffer, (2 + i * 6) * sizeof(uint32_t), string_offset); }
- add_section(tpf->ctx, &tpf->dxbc, tag, &buffer); + add_section(tpf, tag, &buffer); vkd3d_free(sorted_elements); }
-static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) +static enum vkd3d_sm4_resource_type sm4_resource_dimension(enum vkd3d_shader_resource_type resource_type) { - switch (type->class) + switch (resource_type) { - case HLSL_CLASS_MATRIX: - VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3D_SVC_MATRIX_COLUMNS; - else - return D3D_SVC_MATRIX_ROWS; - case HLSL_CLASS_SCALAR: - return D3D_SVC_SCALAR; - case HLSL_CLASS_VECTOR: - return D3D_SVC_VECTOR; - - case HLSL_CLASS_ARRAY: - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_ERROR: - case HLSL_CLASS_STRUCT: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: - case HLSL_CLASS_CONSTANT_BUFFER: - case HLSL_CLASS_COMPUTE_SHADER: - case HLSL_CLASS_DOMAIN_SHADER: - case HLSL_CLASS_HULL_SHADER: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: - case HLSL_CLASS_NULL: - break; - } - vkd3d_unreachable(); -} - -static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) -{ - switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: - return D3D_SVT_BOOL; - case HLSL_TYPE_DOUBLE: - return D3D_SVT_DOUBLE; - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return D3D_SVT_FLOAT; - case HLSL_TYPE_INT: - return D3D_SVT_INT; - case HLSL_TYPE_UINT: - return D3D_SVT_UINT; + case VKD3D_SHADER_RESOURCE_TEXTURE_1D: + return VKD3D_SM4_RESOURCE_TEXTURE_1D; + case VKD3D_SHADER_RESOURCE_TEXTURE_2D: + return VKD3D_SM4_RESOURCE_TEXTURE_2D; + case VKD3D_SHADER_RESOURCE_TEXTURE_3D: + return VKD3D_SM4_RESOURCE_TEXTURE_3D; + case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: + return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; + case VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; + case VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; + case VKD3D_SHADER_RESOURCE_TEXTURE_2DMS: + return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; + case VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; + case VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; + case VKD3D_SHADER_RESOURCE_BUFFER: + return VKD3D_SM4_RESOURCE_BUFFER; default: vkd3d_unreachable(); } }
-static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) +struct sm4_instruction_modifier { - const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); - const char *name = array_type->name ? array_type->name : "<unnamed>"; - const struct hlsl_profile_info *profile = ctx->profile; - unsigned int array_size = 0; - size_t name_offset = 0; - size_t i; - - if (type->bytecode_offset) - return; - - if (profile->major_version >= 5) - name_offset = put_string(buffer, name); - - if (type->class == HLSL_CLASS_ARRAY) - array_size = hlsl_get_multiarray_size(type); + enum vkd3d_sm4_instruction_modifier type;
- if (array_type->class == HLSL_CLASS_STRUCT) + union { - unsigned int field_count = 0; - size_t fields_offset = 0; - - for (i = 0; i < array_type->e.record.field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) - continue; - - field->name_bytecode_offset = put_string(buffer, field->name); - write_sm4_type(ctx, buffer, field->type); - ++field_count; - } - - fields_offset = bytecode_align(buffer); - - for (i = 0; i < array_type->e.record.field_count; ++i) + struct { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) - continue; - - put_u32(buffer, field->name_bytecode_offset); - put_u32(buffer, field->type->bytecode_offset); - put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float)); - } - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID)); - put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); - put_u32(buffer, vkd3d_make_u32(array_size, field_count)); - put_u32(buffer, fields_offset); - } - else - { - VKD3D_ASSERT(array_type->class <= HLSL_CLASS_LAST_NUMERIC); - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); - put_u32(buffer, vkd3d_make_u32(array_type->dimy, array_type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, 0)); - put_u32(buffer, 1); - } - - if (profile->major_version >= 5) - { - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, name_offset); - } -} + int u, v, w; + } aoffimmi; + } u; +};
-static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) +static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_modifier *imod) { - switch (type->class) - { - case HLSL_CLASS_SAMPLER: - return D3D_SIT_SAMPLER; - case HLSL_CLASS_TEXTURE: - return D3D_SIT_TEXTURE; - case HLSL_CLASS_UAV: - return D3D_SIT_UAV_RWTYPED; - default: - break; - } - - vkd3d_unreachable(); -} + uint32_t word = 0;
-static enum vkd3d_sm4_data_type sm4_data_type(const struct hlsl_type *type) -{ - const struct hlsl_type *format = type->e.resource.format; + word |= VKD3D_SM4_MODIFIER_MASK & imod->type;
- switch (format->e.numeric.type) + switch (imod->type) { - case HLSL_TYPE_DOUBLE: - return VKD3D_SM4_DATA_DOUBLE; - - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - if (format->modifiers & HLSL_MODIFIER_UNORM) - return VKD3D_SM4_DATA_UNORM; - if (format->modifiers & HLSL_MODIFIER_SNORM) - return VKD3D_SM4_DATA_SNORM; - return VKD3D_SM4_DATA_FLOAT; - - case HLSL_TYPE_INT: - return VKD3D_SM4_DATA_INT; + case VKD3D_SM4_MODIFIER_AOFFIMMI: + VKD3D_ASSERT(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); + VKD3D_ASSERT(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); + VKD3D_ASSERT(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); + word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; + word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; + word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; break;
- case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - return VKD3D_SM4_DATA_UINT; - default: vkd3d_unreachable(); } -}
-static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) -{ - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3D_SRV_DIMENSION_TEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3D_SRV_DIMENSION_TEXTURE2D; - case HLSL_SAMPLER_DIM_3D: - return D3D_SRV_DIMENSION_TEXTURE3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3D_SRV_DIMENSION_TEXTURECUBE; - case HLSL_SAMPLER_DIM_1DARRAY: - return D3D_SRV_DIMENSION_TEXTURE1DARRAY; - case HLSL_SAMPLER_DIM_2DARRAY: - return D3D_SRV_DIMENSION_TEXTURE2DARRAY; - case HLSL_SAMPLER_DIM_2DMS: - return D3D_SRV_DIMENSION_TEXTURE2DMS; - case HLSL_SAMPLER_DIM_2DMSARRAY: - return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; - case HLSL_SAMPLER_DIM_CUBEARRAY: - return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; - case HLSL_SAMPLER_DIM_BUFFER: - case HLSL_SAMPLER_DIM_RAW_BUFFER: - case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: - return D3D_SRV_DIMENSION_BUFFER; - default: - vkd3d_unreachable(); - } + return word; }
-struct extern_resource +struct sm4_instruction { - /* var is only not NULL if this resource is a whole variable, so it may be responsible for more - * than one component. */ - const struct hlsl_ir_var *var; - const struct hlsl_buffer *buffer; - - char *name; - bool is_user_packed; - - /* The data type of a single component of the resource. - * This might be different from the data type of the resource itself in 4.0 - * profiles, where an array (or multi-dimensional array) is handled as a - * single resource, unlike in 5.0. */ - struct hlsl_type *component_type; + enum vkd3d_sm4_opcode opcode; + uint32_t extra_bits;
- enum hlsl_regset regset; - unsigned int id, space, index, bind_count; + struct sm4_instruction_modifier modifiers[1]; + unsigned int modifier_count;
- struct vkd3d_shader_location loc; -}; + struct vkd3d_shader_dst_param dsts[2]; + unsigned int dst_count;
-static int sm4_compare_extern_resources(const void *a, const void *b) -{ - const struct extern_resource *aa = (const struct extern_resource *)a; - const struct extern_resource *bb = (const struct extern_resource *)b; - int r; + struct vkd3d_shader_src_param srcs[5]; + unsigned int src_count;
- if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) - return r; + unsigned int byte_stride;
- if ((r = vkd3d_u32_compare(aa->space, bb->space))) - return r; + uint32_t idx[3]; + unsigned int idx_count;
- return vkd3d_u32_compare(aa->index, bb->index); -} + struct vkd3d_shader_src_param idx_srcs[7]; + unsigned int idx_src_count; +};
-static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) +static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_register *reg, + unsigned int i) { - unsigned int i; - - for (i = 0; i < count; ++i) - vkd3d_free(extern_resources[i].name); - vkd3d_free(extern_resources); -} + if (reg->idx[i].rel_addr) + { + if (reg->idx[i].offset == 0) + return VKD3D_SM4_ADDRESSING_RELATIVE; + else + return VKD3D_SM4_ADDRESSING_RELATIVE | VKD3D_SM4_ADDRESSING_OFFSET; + }
-static const char *string_skip_tag(const char *string) -{ - if (!strncmp(string, "<resource>", strlen("<resource>"))) - return string + strlen("<resource>"); - return string; + return 0; }
-static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) +static uint32_t sm4_encode_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, + enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle) { - bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; - struct extern_resource *extern_resources = NULL; - const struct hlsl_ir_var *var; - struct hlsl_buffer *buffer; - enum hlsl_regset regset; - size_t capacity = 0; - char *name; - - *count = 0; + const struct vkd3d_sm4_register_type_info *register_type_info; + uint32_t sm4_reg_type, sm4_reg_dim; + uint32_t token = 0;
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, reg->type); + if (!register_type_info) { - if (separate_components) - { - unsigned int component_count = hlsl_type_component_count(var->data_type); - unsigned int k, regset_offset; - - for (k = 0; k < component_count; ++k) - { - struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); - struct vkd3d_string_buffer *name_buffer; - - if (!hlsl_type_is_resource(component_type)) - continue; - - regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); - - if (regset_offset > var->regs[regset].allocation_size) - continue; - - if (var->objects_usage[regset][regset_offset].used) - { - if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, - sizeof(*extern_resources)))) - { - sm4_free_extern_resources(extern_resources, *count); - *count = 0; - return NULL; - } - - if (!(name_buffer = hlsl_component_to_string(ctx, var, k))) - { - sm4_free_extern_resources(extern_resources, *count); - *count = 0; - return NULL; - } - if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer)))) - { - sm4_free_extern_resources(extern_resources, *count); - *count = 0; - hlsl_release_string_buffer(ctx, name_buffer); - return NULL; - } - hlsl_release_string_buffer(ctx, name_buffer); - - extern_resources[*count].var = NULL; - extern_resources[*count].buffer = NULL; - - extern_resources[*count].name = name; - extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; + FIXME("Unhandled vkd3d-shader register type %#x.\n", reg->type); + sm4_reg_type = VKD3D_SM4_RT_TEMP; + if (sm4_swizzle_type == VKD3D_SM4_SWIZZLE_DEFAULT) + sm4_swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + } + else + { + sm4_reg_type = register_type_info->sm4_type; + if (sm4_swizzle_type == VKD3D_SM4_SWIZZLE_DEFAULT) + sm4_swizzle_type = register_type_info->default_src_swizzle_type; + } + sm4_reg_dim = sm4_dimension_from_vsir_dimension(reg->dimension);
- extern_resources[*count].component_type = component_type; + token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; + token |= reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; + token |= sm4_reg_dim << VKD3D_SM4_DIMENSION_SHIFT; + if (reg->idx_count > 0) + token |= sm4_get_index_addressing_from_reg(reg, 0) << VKD3D_SM4_ADDRESSING_SHIFT0; + if (reg->idx_count > 1) + token |= sm4_get_index_addressing_from_reg(reg, 1) << VKD3D_SM4_ADDRESSING_SHIFT1; + if (reg->idx_count > 2) + token |= sm4_get_index_addressing_from_reg(reg, 2) << VKD3D_SM4_ADDRESSING_SHIFT2;
- extern_resources[*count].regset = regset; - extern_resources[*count].id = var->regs[regset].id; - extern_resources[*count].space = var->regs[regset].space; - extern_resources[*count].index = var->regs[regset].index + regset_offset; - extern_resources[*count].bind_count = 1; - extern_resources[*count].loc = var->loc; + if (sm4_reg_dim == VKD3D_SM4_DIMENSION_VEC4) + { + token |= (uint32_t)sm4_swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT;
- ++*count; - } - } - } - else + switch (sm4_swizzle_type) { - unsigned int r; - - if (!hlsl_type_is_resource(var->data_type)) - continue; - - for (r = 0; r <= HLSL_REGSET_LAST; ++r) - { - if (!var->regs[r].allocated) - continue; - - if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, - sizeof(*extern_resources)))) - { - sm4_free_extern_resources(extern_resources, *count); - *count = 0; - return NULL; - } - - if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name)))) - { - sm4_free_extern_resources(extern_resources, *count); - *count = 0; - return NULL; - } - - extern_resources[*count].var = var; - extern_resources[*count].buffer = NULL; - - extern_resources[*count].name = name; - /* For some reason 5.1 resources aren't marked as - * user-packed, but cbuffers still are. */ - extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1) - && !!var->reg_reservation.reg_type; + case VKD3D_SM4_SWIZZLE_NONE: + VKD3D_ASSERT(sm4_swizzle || register_is_constant(reg)); + token |= (sm4_swizzle << VKD3D_SM4_WRITEMASK_SHIFT) & VKD3D_SM4_WRITEMASK_MASK; + break;
- extern_resources[*count].component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); + case VKD3D_SM4_SWIZZLE_VEC4: + token |= (sm4_swizzle << VKD3D_SM4_SWIZZLE_SHIFT) & VKD3D_SM4_SWIZZLE_MASK; + break;
- extern_resources[*count].regset = r; - extern_resources[*count].id = var->regs[r].id; - extern_resources[*count].space = var->regs[r].space; - extern_resources[*count].index = var->regs[r].index; - extern_resources[*count].bind_count = var->bind_count[r]; - extern_resources[*count].loc = var->loc; + case VKD3D_SM4_SWIZZLE_SCALAR: + token |= (sm4_swizzle << VKD3D_SM4_SCALAR_DIM_SHIFT) & VKD3D_SM4_SCALAR_DIM_MASK; + break;
- ++*count; - } + default: + vkd3d_unreachable(); } }
- LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) + return token; +} + +static void sm4_write_register_index(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, + unsigned int j) +{ + unsigned int addressing = sm4_get_index_addressing_from_reg(reg, j); + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + unsigned int k; + + if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) { - if (!buffer->reg.allocated) - continue; + const struct vkd3d_shader_src_param *idx_src = reg->idx[j].rel_addr; + uint32_t idx_src_token;
- if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, - sizeof(*extern_resources)))) - { - sm4_free_extern_resources(extern_resources, *count); - *count = 0; - return NULL; - } + VKD3D_ASSERT(idx_src); + VKD3D_ASSERT(!idx_src->modifiers); + VKD3D_ASSERT(idx_src->reg.type != VKD3DSPR_IMMCONST); + idx_src_token = sm4_encode_register(tpf, &idx_src->reg, VKD3D_SM4_SWIZZLE_SCALAR, idx_src->swizzle);
- if (!(name = hlsl_strdup(ctx, buffer->name))) + put_u32(buffer, idx_src_token); + for (k = 0; k < idx_src->reg.idx_count; ++k) { - sm4_free_extern_resources(extern_resources, *count); - *count = 0; - return NULL; + put_u32(buffer, idx_src->reg.idx[k].offset); + VKD3D_ASSERT(!idx_src->reg.idx[k].rel_addr); } - - extern_resources[*count].var = NULL; - extern_resources[*count].buffer = buffer; - - extern_resources[*count].name = name; - extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type; - - extern_resources[*count].component_type = NULL; - - extern_resources[*count].regset = HLSL_REGSET_NUMERIC; - extern_resources[*count].id = buffer->reg.id; - extern_resources[*count].space = buffer->reg.space; - extern_resources[*count].index = buffer->reg.index; - extern_resources[*count].bind_count = 1; - extern_resources[*count].loc = buffer->loc; - - ++*count; } - - qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); - return extern_resources; + else + { + put_u32(tpf->buffer, reg->idx[j].offset); + } }
-/* For some reason, for matrices, values from default value initializers end up in different - * components than from regular initializers. Default value initializers fill the matrix in - * vertical reading order (left-to-right top-to-bottom) instead of regular reading order - * (top-to-bottom left-to-right), so they have to be adjusted. - * An exception is that the order of matrix initializers for function parameters are row-major - * (top-to-bottom left-to-right). */ -static unsigned int get_component_index_from_default_initializer_index(struct hlsl_type *type, unsigned int index) +static void sm4_write_dst_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_dst_param *dst) { - unsigned int element_comp_count, element, x, y, i; - unsigned int base = 0; - - switch (type->class) - { - case HLSL_CLASS_MATRIX: - x = index / type->dimy; - y = index % type->dimy; - return y * type->dimx + x; - - case HLSL_CLASS_ARRAY: - element_comp_count = hlsl_type_component_count(type->e.array.type); - element = index / element_comp_count; - base = element * element_comp_count; - return base + get_component_index_from_default_initializer_index(type->e.array.type, index - base); - - case HLSL_CLASS_STRUCT: - for (i = 0; i < type->e.record.field_count; ++i) - { - struct hlsl_type *field_type = type->e.record.fields[i].type; + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + uint32_t token = 0; + unsigned int j;
- element_comp_count = hlsl_type_component_count(field_type); - if (index - base < element_comp_count) - return base + get_component_index_from_default_initializer_index(field_type, index - base); - base += element_comp_count; - } - break; + token = sm4_encode_register(tpf, &dst->reg, VKD3D_SM4_SWIZZLE_NONE, dst->write_mask); + put_u32(buffer, token);
- default: - return index; - } - vkd3d_unreachable(); + for (j = 0; j < dst->reg.idx_count; ++j) + sm4_write_register_index(tpf, &dst->reg, j); }
-static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +static void sm4_write_src_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_src_param *src) { - uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); - size_t cbuffers_offset, resources_offset, creator_offset, string_offset; - unsigned int cbuffer_count = 0, extern_resources_count, i, j; - size_t cbuffer_position, resource_position, creator_position; - const struct hlsl_profile_info *profile = ctx->profile; - struct vkd3d_bytecode_buffer buffer = {0}; - struct extern_resource *extern_resources; - const struct hlsl_buffer *cbuffer; - const struct hlsl_ir_var *var; - - static const uint16_t target_types[] = - { - 0xffff, /* PIXEL */ - 0xfffe, /* VERTEX */ - 0x4753, /* GEOMETRY */ - 0x4853, /* HULL */ - 0x4453, /* DOMAIN */ - 0x4353, /* COMPUTE */ - }; - - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) - ++cbuffer_count; - } - - put_u32(&buffer, cbuffer_count); - cbuffer_position = put_u32(&buffer, 0); - put_u32(&buffer, extern_resources_count); - resource_position = put_u32(&buffer, 0); - put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), - target_types[profile->type])); - put_u32(&buffer, 0); /* FIXME: compilation flags */ - creator_position = put_u32(&buffer, 0); - - if (profile->major_version >= 5) - { - put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); - put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ - put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ - put_u32(&buffer, binding_desc_size); /* size of binding desc */ - put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ - put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ - put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ - put_u32(&buffer, 0); /* unknown; possibly a null terminator */ - } - - /* Bound resources. */ - - resources_offset = bytecode_align(&buffer); - set_u32(&buffer, resource_position, resources_offset); - - for (i = 0; i < extern_resources_count; ++i) - { - const struct extern_resource *resource = &extern_resources[i]; - uint32_t flags = 0; - - if (resource->is_user_packed) - flags |= D3D_SIF_USERPACKED; - - put_u32(&buffer, 0); /* name */ - if (resource->buffer) - put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); - else - put_u32(&buffer, sm4_resource_type(resource->component_type)); - if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS) - { - unsigned int dimx = resource->component_type->e.resource.format->dimx; - - put_u32(&buffer, sm4_data_type(resource->component_type)); - put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type)); - put_u32(&buffer, ~0u); /* FIXME: multisample count */ - flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; - } - else - { - put_u32(&buffer, 0); - put_u32(&buffer, 0); - put_u32(&buffer, 0); - } - put_u32(&buffer, resource->index); - put_u32(&buffer, resource->bind_count); - put_u32(&buffer, flags); - - if (hlsl_version_ge(ctx, 5, 1)) - { - put_u32(&buffer, resource->space); - put_u32(&buffer, resource->id); - } - } - - for (i = 0; i < extern_resources_count; ++i) - { - const struct extern_resource *resource = &extern_resources[i]; - - string_offset = put_string(&buffer, resource->name); - set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset); - } - - /* Buffers. */ - - cbuffers_offset = bytecode_align(&buffer); - set_u32(&buffer, cbuffer_position, cbuffers_offset); - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - unsigned int var_count = 0; - - if (!cbuffer->reg.allocated) - continue; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) - ++var_count; - } - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, var_count); - put_u32(&buffer, 0); /* variable offset */ - put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); - put_u32(&buffer, 0); /* FIXME: flags */ - put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); - } - - i = 0; - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (!cbuffer->reg.allocated) - continue; - - string_offset = put_string(&buffer, cbuffer->name); - set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); - } - - i = 0; - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - size_t vars_start = bytecode_align(&buffer); - - if (!cbuffer->reg.allocated) - continue; - - set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) - { - uint32_t flags = 0; - - if (var->is_read) - flags |= D3D_SVF_USED; - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, var->buffer_offset * sizeof(float)); - put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); - put_u32(&buffer, flags); - put_u32(&buffer, 0); /* type */ - put_u32(&buffer, 0); /* default value */ - - if (profile->major_version >= 5) - { - put_u32(&buffer, 0); /* texture start */ - put_u32(&buffer, 0); /* texture count */ - put_u32(&buffer, 0); /* sampler start */ - put_u32(&buffer, 0); /* sampler count */ - } - } - } - - j = 0; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) - { - const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); - size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); - - string_offset = put_string(&buffer, var->name); - set_u32(&buffer, var_offset, string_offset); - write_sm4_type(ctx, &buffer, var->data_type); - set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); - - if (var->default_values) - { - unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; - unsigned int comp_count = hlsl_type_component_count(var->data_type); - unsigned int default_value_offset; - unsigned int k; - - default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t)); - set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset); - - for (k = 0; k < comp_count; ++k) - { - struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); - unsigned int comp_offset, comp_index; - enum hlsl_regset regset; - - if (comp_type->class == HLSL_CLASS_STRING) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Cannot write string default value."); - continue; - } - - comp_index = get_component_index_from_default_initializer_index(var->data_type, k); - comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, comp_index, ®set); - if (regset == HLSL_REGSET_NUMERIC) - { - if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) - hlsl_fixme(ctx, &var->loc, "Write double default values."); - - set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t), - var->default_values[k].number.u); - } - } - } - ++j; - } - } - } - - creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); - set_u32(&buffer, creator_position, creator_offset); - - add_section(ctx, dxbc, TAG_RDEF, &buffer); - - sm4_free_extern_resources(extern_resources, extern_resources_count); -} - -static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) -{ - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return VKD3D_SM4_RESOURCE_TEXTURE_1D; - case HLSL_SAMPLER_DIM_2D: - return VKD3D_SM4_RESOURCE_TEXTURE_2D; - case HLSL_SAMPLER_DIM_3D: - return VKD3D_SM4_RESOURCE_TEXTURE_3D; - case HLSL_SAMPLER_DIM_CUBE: - return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; - case HLSL_SAMPLER_DIM_1DARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; - case HLSL_SAMPLER_DIM_2DARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; - case HLSL_SAMPLER_DIM_2DMS: - return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; - case HLSL_SAMPLER_DIM_2DMSARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; - case HLSL_SAMPLER_DIM_CUBEARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; - case HLSL_SAMPLER_DIM_BUFFER: - case HLSL_SAMPLER_DIM_RAW_BUFFER: - case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: - return VKD3D_SM4_RESOURCE_BUFFER; - default: - vkd3d_unreachable(); - } -} - -struct sm4_instruction_modifier -{ - enum vkd3d_sm4_instruction_modifier type; - - union - { - struct - { - int u, v, w; - } aoffimmi; - } u; -}; - -static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_modifier *imod) -{ - uint32_t word = 0; - - word |= VKD3D_SM4_MODIFIER_MASK & imod->type; - - switch (imod->type) - { - case VKD3D_SM4_MODIFIER_AOFFIMMI: - VKD3D_ASSERT(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); - VKD3D_ASSERT(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); - VKD3D_ASSERT(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); - word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; - word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; - word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; - break; - - default: - vkd3d_unreachable(); - } - - return word; -} - -struct sm4_instruction -{ - enum vkd3d_sm4_opcode opcode; - uint32_t extra_bits; - - struct sm4_instruction_modifier modifiers[1]; - unsigned int modifier_count; - - struct vkd3d_shader_dst_param dsts[2]; - unsigned int dst_count; - - struct vkd3d_shader_src_param srcs[5]; - unsigned int src_count; - - unsigned int byte_stride; - - uint32_t idx[3]; - unsigned int idx_count; - - struct vkd3d_shader_src_param idx_srcs[7]; - unsigned int idx_src_count; -}; - -static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, - const struct hlsl_ir_node *instr) -{ - VKD3D_ASSERT(instr->reg.allocated); - reg->type = VKD3DSPR_TEMP; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = instr->reg.id; - reg->idx_count = 1; - *writemask = instr->reg.writemask; -} - -static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, - enum vkd3d_shader_register_type type, uint32_t *writemask, const struct hlsl_deref *deref, - struct sm4_instruction *sm4_instr) -{ - const struct hlsl_ir_var *var = deref->var; - unsigned int offset_const_deref; - - reg->type = type; - reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; - reg->dimension = VSIR_DIMENSION_VEC4; - - VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); - - if (!var->indexable) - { - offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); - reg->idx[0].offset += offset_const_deref / 4; - reg->idx_count = 1; - } - else - { - offset_const_deref = deref->const_offset; - reg->idx[1].offset = offset_const_deref / 4; - reg->idx_count = 2; - - if (deref->rel_offset.node) - { - struct vkd3d_shader_src_param *idx_src; - unsigned int idx_writemask; - - VKD3D_ASSERT(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); - idx_src = &sm4_instr->idx_srcs[sm4_instr->idx_src_count++]; - memset(idx_src, 0, sizeof(*idx_src)); - - reg->idx[1].rel_addr = idx_src; - sm4_register_from_node(&idx_src->reg, &idx_writemask, deref->rel_offset.node); - VKD3D_ASSERT(idx_writemask != 0); - idx_src->swizzle = swizzle_from_sm4(hlsl_swizzle_from_writemask(idx_writemask)); - } - } - - *writemask = 0xf & (0xf << (offset_const_deref % 4)); - if (var->regs[HLSL_REGSET_NUMERIC].writemask) - *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); -} - -static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_register *reg, - uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr) -{ - const struct vkd3d_shader_version *version = &tpf->program->shader_version; - const struct hlsl_type *data_type = hlsl_deref_get_type(tpf->ctx, deref); - const struct hlsl_ir_var *var = deref->var; - struct hlsl_ctx *ctx = tpf->ctx; - - if (var->is_uniform) - { - enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); - - if (regset == HLSL_REGSET_TEXTURES) - { - reg->type = VKD3DSPR_RESOURCE; - reg->dimension = VSIR_DIMENSION_VEC4; - if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; - reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ - reg->idx_count = 2; - } - else - { - reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - reg->idx_count = 1; - } - VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_UAVS) - { - reg->type = VKD3DSPR_UAV; - reg->dimension = VSIR_DIMENSION_VEC4; - if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; - reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ - reg->idx_count = 2; - } - else - { - reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - reg->idx_count = 1; - } - VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_SAMPLERS) - { - reg->type = VKD3DSPR_SAMPLER; - reg->dimension = VSIR_DIMENSION_NONE; - if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; - reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ - reg->idx_count = 2; - } - else - { - reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - reg->idx_count = 1; - } - VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; - - VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); - reg->type = VKD3DSPR_CONSTBUFFER; - reg->dimension = VSIR_DIMENSION_VEC4; - if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->buffer->reg.id; - reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ - reg->idx[2].offset = offset / 4; - reg->idx_count = 3; - } - else - { - reg->idx[0].offset = var->buffer->reg.index; - reg->idx[1].offset = offset / 4; - reg->idx_count = 2; - } - *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); - } - } - else if (var->is_input_semantic) - { - bool has_idx; - - if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - - if (has_idx) - { - reg->idx[0].offset = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - - if (shader_sm4_is_scalar_register(reg)) - reg->dimension = VSIR_DIMENSION_SCALAR; - else - reg->dimension = VSIR_DIMENSION_VEC4; - *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); - } - else - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - VKD3D_ASSERT(hlsl_reg.allocated); - - if (version->type == VKD3D_SHADER_TYPE_DOMAIN) - reg->type = VKD3DSPR_PATCHCONST; - else - reg->type = VKD3DSPR_INPUT; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } - } - else if (var->is_output_semantic) - { - bool has_idx; - - if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - - if (has_idx) - { - reg->idx[0].offset = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - - if (shader_sm4_is_scalar_register(reg)) - reg->dimension = VSIR_DIMENSION_SCALAR; - else - reg->dimension = VSIR_DIMENSION_VEC4; - *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); - } - else - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - VKD3D_ASSERT(hlsl_reg.allocated); - reg->type = VKD3DSPR_OUTPUT; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } - } - else - { - enum vkd3d_shader_register_type type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; - - sm4_numeric_register_from_deref(ctx, reg, type, writemask, deref, sm4_instr); - } -} - -static void sm4_src_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, - const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr) -{ - unsigned int hlsl_swizzle; - uint32_t writemask; - - sm4_register_from_deref(tpf, &src->reg, &writemask, deref, sm4_instr); - if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) - { - hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); - src->swizzle = swizzle_from_sm4(hlsl_swizzle); - } -} - -static void sm4_dst_from_node(struct vkd3d_shader_dst_param *dst, const struct hlsl_ir_node *instr) -{ - sm4_register_from_node(&dst->reg, &dst->write_mask, instr); -} - -static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src, - const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) -{ - src->swizzle = 0; - src->reg.type = VKD3DSPR_IMMCONST; - if (width == 1) - { - src->reg.dimension = VSIR_DIMENSION_SCALAR; - src->reg.u.immconst_u32[0] = value->u[0].u; - } - else - { - unsigned int i, j = 0; - - src->reg.dimension = VSIR_DIMENSION_VEC4; - for (i = 0; i < 4; ++i) - { - if ((map_writemask & (1u << i)) && (j < width)) - src->reg.u.immconst_u32[i] = value->u[j++].u; - else - src->reg.u.immconst_u32[i] = 0; - } - } -} - -static void sm4_src_from_node(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, - const struct hlsl_ir_node *instr, uint32_t map_writemask) -{ - unsigned int hlsl_swizzle; - uint32_t writemask; - - if (instr->type == HLSL_IR_CONSTANT) - { - struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - - sm4_src_from_constant_value(src, &constant->value, instr->data_type->dimx, map_writemask); - return; - } - - sm4_register_from_node(&src->reg, &writemask, instr); - if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) - { - hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); - src->swizzle = swizzle_from_sm4(hlsl_swizzle); - } -} - -static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_register *reg, - unsigned int i) -{ - if (reg->idx[i].rel_addr) - { - if (reg->idx[i].offset == 0) - return VKD3D_SM4_ADDRESSING_RELATIVE; - else - return VKD3D_SM4_ADDRESSING_RELATIVE | VKD3D_SM4_ADDRESSING_OFFSET; - } - - return 0; -} - -static uint32_t sm4_encode_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, - enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle) -{ - const struct vkd3d_sm4_register_type_info *register_type_info; - uint32_t sm4_reg_type, sm4_reg_dim; - uint32_t token = 0; - - register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, reg->type); - if (!register_type_info) - { - FIXME("Unhandled vkd3d-shader register type %#x.\n", reg->type); - sm4_reg_type = VKD3D_SM4_RT_TEMP; - if (sm4_swizzle_type == VKD3D_SM4_SWIZZLE_DEFAULT) - sm4_swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - } - else - { - sm4_reg_type = register_type_info->sm4_type; - if (sm4_swizzle_type == VKD3D_SM4_SWIZZLE_DEFAULT) - sm4_swizzle_type = register_type_info->default_src_swizzle_type; - } - sm4_reg_dim = sm4_dimension_from_vsir_dimension(reg->dimension); - - token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; - token |= reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; - token |= sm4_reg_dim << VKD3D_SM4_DIMENSION_SHIFT; - if (reg->idx_count > 0) - token |= sm4_get_index_addressing_from_reg(reg, 0) << VKD3D_SM4_ADDRESSING_SHIFT0; - if (reg->idx_count > 1) - token |= sm4_get_index_addressing_from_reg(reg, 1) << VKD3D_SM4_ADDRESSING_SHIFT1; - if (reg->idx_count > 2) - token |= sm4_get_index_addressing_from_reg(reg, 2) << VKD3D_SM4_ADDRESSING_SHIFT2; - - if (sm4_reg_dim == VKD3D_SM4_DIMENSION_VEC4) - { - token |= (uint32_t)sm4_swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; - - switch (sm4_swizzle_type) - { - case VKD3D_SM4_SWIZZLE_NONE: - VKD3D_ASSERT(sm4_swizzle || register_is_constant(reg)); - token |= (sm4_swizzle << VKD3D_SM4_WRITEMASK_SHIFT) & VKD3D_SM4_WRITEMASK_MASK; - break; - - case VKD3D_SM4_SWIZZLE_VEC4: - token |= (sm4_swizzle << VKD3D_SM4_SWIZZLE_SHIFT) & VKD3D_SM4_SWIZZLE_MASK; - break; - - case VKD3D_SM4_SWIZZLE_SCALAR: - token |= (sm4_swizzle << VKD3D_SM4_SCALAR_DIM_SHIFT) & VKD3D_SM4_SCALAR_DIM_MASK; - break; - - default: - vkd3d_unreachable(); - } - } - - return token; -} - -static void sm4_write_register_index(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, - unsigned int j) -{ - unsigned int addressing = sm4_get_index_addressing_from_reg(reg, j); - struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - unsigned int k; - - if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) - { - const struct vkd3d_shader_src_param *idx_src = reg->idx[j].rel_addr; - uint32_t idx_src_token; - - VKD3D_ASSERT(idx_src); - VKD3D_ASSERT(!idx_src->modifiers); - VKD3D_ASSERT(idx_src->reg.type != VKD3DSPR_IMMCONST); - idx_src_token = sm4_encode_register(tpf, &idx_src->reg, VKD3D_SM4_SWIZZLE_SCALAR, idx_src->swizzle); - - put_u32(buffer, idx_src_token); - for (k = 0; k < idx_src->reg.idx_count; ++k) - { - put_u32(buffer, idx_src->reg.idx[k].offset); - VKD3D_ASSERT(!idx_src->reg.idx[k].rel_addr); - } - } - else - { - put_u32(tpf->buffer, reg->idx[j].offset); - } -} - -static void sm4_write_dst_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_dst_param *dst) -{ - struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - uint32_t token = 0; - unsigned int j; - - token = sm4_encode_register(tpf, &dst->reg, VKD3D_SM4_SWIZZLE_NONE, dst->write_mask); - put_u32(buffer, token); - - for (j = 0; j < dst->reg.idx_count; ++j) - sm4_write_register_index(tpf, &dst->reg, j); -} - -static void sm4_write_src_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_src_param *src) -{ - struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - uint32_t token = 0, mod_token = 0; - unsigned int j; - - token = sm4_encode_register(tpf, &src->reg, VKD3D_SM4_SWIZZLE_DEFAULT, swizzle_to_sm4(src->swizzle)); - - switch (src->modifiers) - { - case VKD3DSPSM_NONE: - mod_token = VKD3D_SM4_REGISTER_MODIFIER_NONE; - break; - - case VKD3DSPSM_ABS: - mod_token = (VKD3D_SM4_REGISTER_MODIFIER_ABS << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) - | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER; - break; - - case VKD3DSPSM_NEG: - mod_token = (VKD3D_SM4_REGISTER_MODIFIER_NEGATE << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) - | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER; - break; - - case VKD3DSPSM_ABSNEG: - mod_token = (VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) - | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER; - break; - - default: - ERR("Unhandled register modifier %#x.\n", src->modifiers); - vkd3d_unreachable(); - break; - } - - if (src->modifiers) - { - token |= VKD3D_SM4_EXTENDED_OPERAND; - put_u32(buffer, token); - put_u32(buffer, mod_token); - } - else - { - put_u32(buffer, token); - } - - for (j = 0; j < src->reg.idx_count; ++j) - sm4_write_register_index(tpf, &src->reg, j); - - if (src->reg.type == VKD3DSPR_IMMCONST) - { - put_u32(buffer, src->reg.u.immconst_u32[0]); - if (src->reg.dimension == VSIR_DIMENSION_VEC4) - { - put_u32(buffer, src->reg.u.immconst_u32[1]); - put_u32(buffer, src->reg.u.immconst_u32[2]); - put_u32(buffer, src->reg.u.immconst_u32[3]); - } - } -} - -static void sm4_update_stat_counters(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) -{ - enum vkd3d_shader_type shader_type = tpf->program->shader_version.type; - enum vkd3d_sm4_stat_field stat_field; - uint32_t opcode; - - ++tpf->stat->fields[VKD3D_STAT_INSTR_COUNT]; - - opcode = instr->opcode & VKD3D_SM4_OPCODE_MASK; - stat_field = get_stat_field_from_sm4_opcode(&tpf->lookup, opcode); - - switch (opcode) - { - case VKD3D_SM4_OP_DCL_TEMPS: - tpf->stat->fields[stat_field] = max(tpf->stat->fields[stat_field], instr->idx[0]); - break; - case VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY: - case VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE: - tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM4_PRIMITIVE_TYPE_MASK) - >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; - break; - case VKD3D_SM4_OP_DCL_VERTICES_OUT: - case VKD3D_SM5_OP_DCL_GS_INSTANCES: - tpf->stat->fields[stat_field] = instr->idx[0]; - break; - case VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN: - case VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING: - case VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: - tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; - break; - case VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT: - case VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT: - if ((shader_type == VKD3D_SHADER_TYPE_HULL && opcode == VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT) - || (shader_type == VKD3D_SHADER_TYPE_DOMAIN - && opcode == VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT)) - { - tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) - >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; - } - break; - default: - ++tpf->stat->fields[stat_field]; - } -} - -static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) -{ - uint32_t token = instr->opcode | instr->extra_bits; - struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - unsigned int size, i, j; - size_t token_position; - - if (instr->modifier_count > 0) - token |= VKD3D_SM4_INSTRUCTION_MODIFIER; - - token_position = put_u32(buffer, 0); - - for (i = 0; i < instr->modifier_count; ++i) - { - uint32_t modifier_token = sm4_encode_instruction_modifier(&instr->modifiers[i]); - - if (instr->modifier_count > i + 1) - modifier_token |= VKD3D_SM4_INSTRUCTION_MODIFIER; - put_u32(buffer, modifier_token); - } - - for (i = 0; i < instr->dst_count; ++i) - sm4_write_dst_register(tpf, &instr->dsts[i]); - - for (i = 0; i < instr->src_count; ++i) - sm4_write_src_register(tpf, &instr->srcs[i]); - - if (instr->byte_stride) - put_u32(buffer, instr->byte_stride); - - for (j = 0; j < instr->idx_count; ++j) - put_u32(buffer, instr->idx[j]); - - size = (bytecode_get_size(buffer) - token_position) / sizeof(uint32_t); - token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); - set_u32(buffer, token_position, token); - - sm4_update_stat_counters(tpf, instr); -} - -static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, - const struct hlsl_ir_node *texel_offset) -{ - struct sm4_instruction_modifier modif; - struct hlsl_ir_constant *offset; - - if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) - return false; - offset = hlsl_ir_constant(texel_offset); - - modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; - modif.u.aoffimmi.u = offset->value.u[0].i; - modif.u.aoffimmi.v = 0; - modif.u.aoffimmi.w = 0; - if (offset->node.data_type->dimx > 1) - modif.u.aoffimmi.v = offset->value.u[1].i; - if (offset->node.data_type->dimx > 2) - modif.u.aoffimmi.w = offset->value.u[2].i; - if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 - || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 - || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) - return false; - - instr->modifiers[instr->modifier_count++] = modif; - return true; -} - -static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer) -{ - size_t size = (cbuffer->used_size + 3) / 4; - - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, - - .srcs[0].reg.dimension = VSIR_DIMENSION_VEC4, - .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, - .srcs[0].swizzle = VKD3D_SHADER_NO_SWIZZLE, - .src_count = 1, - }; - - if (hlsl_version_ge(tpf->ctx, 5, 1)) - { - instr.srcs[0].reg.idx[0].offset = cbuffer->reg.id; - instr.srcs[0].reg.idx[1].offset = cbuffer->reg.index; - instr.srcs[0].reg.idx[2].offset = cbuffer->reg.index; /* FIXME: array end */ - instr.srcs[0].reg.idx_count = 3; - - instr.idx[0] = size; - instr.idx[1] = cbuffer->reg.space; - instr.idx_count = 2; - } - else - { - instr.srcs[0].reg.idx[0].offset = cbuffer->reg.index; - instr.srcs[0].reg.idx[1].offset = size; - instr.srcs[0].reg.idx_count = 2; - } - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_dcl_samplers(const struct tpf_compiler *tpf, const struct extern_resource *resource) -{ - unsigned int i; - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_SAMPLER, - - .dsts[0].reg.type = VKD3DSPR_SAMPLER, - .dst_count = 1, - }; - - VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS); - - if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) - instr.extra_bits |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; - - for (i = 0; i < resource->bind_count; ++i) - { - if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) - continue; - - if (hlsl_version_ge(tpf->ctx, 5, 1)) - { - VKD3D_ASSERT(!i); - instr.dsts[0].reg.idx[0].offset = resource->id; - instr.dsts[0].reg.idx[1].offset = resource->index; - instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ - instr.dsts[0].reg.idx_count = 3; - - instr.idx[0] = resource->space; - instr.idx_count = 1; - } - else - { - instr.dsts[0].reg.idx[0].offset = resource->index + i; - instr.dsts[0].reg.idx_count = 1; - } - write_sm4_instruction(tpf, &instr); - } -} - -static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct extern_resource *resource, - bool uav) -{ - const struct vkd3d_shader_version *version = &tpf->program->shader_version; - enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; - struct hlsl_type *component_type; - struct sm4_instruction instr; - bool multisampled; - unsigned int i; - - VKD3D_ASSERT(resource->regset == regset); - - component_type = resource->component_type; - - for (i = 0; i < resource->bind_count; ++i) - { - if (resource->var && !resource->var->objects_usage[regset][i].used) - continue; - - instr = (struct sm4_instruction) - { - .dsts[0].reg.type = uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, - .dsts[0].reg.idx[0].offset = resource->id + i, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - - .idx[0] = sm4_data_type(component_type) * 0x1111, - .idx_count = 1, - }; - - multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS - || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; - - if (!vkd3d_shader_ver_ge(version, 4, 1) && multisampled && !component_type->sample_count) - { - hlsl_error(tpf->ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Multisampled texture object declaration needs sample count for profile %u.%u.", - version->major, version->minor); - } - - if (vkd3d_shader_ver_ge(version, 5, 1)) - { - VKD3D_ASSERT(!i); - instr.dsts[0].reg.idx[0].offset = resource->id; - instr.dsts[0].reg.idx[1].offset = resource->index; - instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ - instr.dsts[0].reg.idx_count = 3; - - instr.idx[1] = resource->space; - instr.idx_count = 2; - } - else - { - instr.dsts[0].reg.idx[0].offset = resource->index + i; - instr.dsts[0].reg.idx_count = 1; - } - - if (uav) - { - switch (component_type->sampler_dim) - { - case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: - instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; - instr.byte_stride = component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC] * 4; - break; - case HLSL_SAMPLER_DIM_RAW_BUFFER: - instr.opcode = VKD3D_SM5_OP_DCL_UAV_RAW; - break; - default: - instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; - break; - } - - if (component_type->e.resource.rasteriser_ordered) - instr.opcode |= VKD3DSUF_RASTERISER_ORDERED_VIEW << VKD3D_SM5_UAV_FLAGS_SHIFT; - } - else - { - switch (component_type->sampler_dim) - { - case HLSL_SAMPLER_DIM_RAW_BUFFER: - instr.opcode = VKD3D_SM5_OP_DCL_RESOURCE_RAW; - break; - default: - instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; - break; - } - } - instr.extra_bits |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); - - if (multisampled) - instr.extra_bits |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; - - write_sm4_instruction(tpf, &instr); - } -} - -static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_TEMPS, - - .idx = {count}, - .idx_count = 1, - }; - - write_sm4_instruction(tpf, &instr); -} - -static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, - - .idx = {temp->register_idx, temp->register_size, temp->component_count}, - .idx_count = 3, - }; - - write_sm4_instruction(tpf, &instr); -} - -static void tpf_dcl_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct vkd3d_shader_dst_param *dst, uint32_t interpolation_flags) -{ - struct sm4_instruction instr = - { - .opcode = opcode, - - .dsts[0] = *dst, - .dst_count = 1, - - .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT, - }; - - write_sm4_instruction(tpf, &instr); -} - -static void tpf_dcl_siv_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct vkd3d_shader_register_semantic *semantic, uint32_t interpolation_flags) -{ - struct sm4_instruction instr = - { - .opcode = opcode, - - .dsts[0] = semantic->reg, - .dst_count = 1, - - .idx[0] = semantic->sysval_semantic, - .idx_count = 1, - - .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT, - }; - - write_sm4_instruction(tpf, &instr); -} - -static void tpf_dcl_thread_group(const struct tpf_compiler *tpf, const struct vsir_thread_group_size *group_size) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, - - .idx = {group_size->x, group_size->y, group_size->z}, - .idx_count = 3, - }; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, - .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT, - }; - - write_sm4_instruction(tpf, &instr); -} - -static void tpf_write_hs_decls(const struct tpf_compiler *tpf) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_HS_DECLS, - }; - - write_sm4_instruction(tpf, &instr); -} - -static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, - }; - - write_sm4_instruction(tpf, &instr); -} - -static void tpf_write_hs_fork_phase(const struct tpf_compiler *tpf) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_HS_FORK_PHASE, - }; - - write_sm4_instruction(tpf, &instr); -} - -static void tpf_write_dcl_input_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, - .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, - }; - - write_sm4_instruction(tpf, &instr); -} - -static void tpf_write_dcl_output_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, - .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, - }; - - write_sm4_instruction(tpf, &instr); -} - -static void tpf_write_dcl_tessellator_domain(const struct tpf_compiler *tpf, enum vkd3d_tessellator_domain domain) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, - .extra_bits = domain << VKD3D_SM5_TESSELLATOR_SHIFT, - }; - - write_sm4_instruction(tpf, &instr); -} - -static void tpf_write_dcl_tessellator_partitioning(const struct tpf_compiler *tpf, - enum vkd3d_shader_tessellator_partitioning partitioning) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, - .extra_bits = partitioning << VKD3D_SM5_TESSELLATOR_SHIFT, - }; + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + uint32_t token = 0, mod_token = 0; + unsigned int j;
- write_sm4_instruction(tpf, &instr); -} + token = sm4_encode_register(tpf, &src->reg, VKD3D_SM4_SWIZZLE_DEFAULT, swizzle_to_sm4(src->swizzle));
-static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler *tpf, - enum vkd3d_shader_tessellator_output_primitive output_primitive) -{ - struct sm4_instruction instr = + switch (src->modifiers) { - .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, - .extra_bits = output_primitive << VKD3D_SM5_TESSELLATOR_SHIFT, - }; - - write_sm4_instruction(tpf, &instr); -} + case VKD3DSPSM_NONE: + mod_token = VKD3D_SM4_REGISTER_MODIFIER_NONE; + break;
-static void write_sm4_ret(const struct tpf_compiler *tpf) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_RET, - }; + case VKD3DSPSM_ABS: + mod_token = (VKD3D_SM4_REGISTER_MODIFIER_ABS << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) + | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER; + break;
- write_sm4_instruction(tpf, &instr); -} + case VKD3DSPSM_NEG: + mod_token = (VKD3D_SM4_REGISTER_MODIFIER_NEGATE << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) + | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER; + break;
-static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, - const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, - enum hlsl_sampler_dim dim) -{ - const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); - bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE - && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); - bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); - const struct vkd3d_shader_version *version = &tpf->program->shader_version; - bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; - unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; - struct sm4_instruction instr; + case VKD3DSPSM_ABSNEG: + mod_token = (VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) + | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER; + break;
- memset(&instr, 0, sizeof(instr)); - if (uav) - instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; - else if (raw) - instr.opcode = VKD3D_SM5_OP_LD_RAW; - else - instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; + default: + ERR("Unhandled register modifier %#x.\n", src->modifiers); + vkd3d_unreachable(); + break; + }
- if (texel_offset) + if (src->modifiers) { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } + token |= VKD3D_SM4_EXTENDED_OPERAND; + put_u32(buffer, token); + put_u32(buffer, mod_token); } - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - if (!uav) + else { - /* Mipmap level is in the last component in the IR, but needs to be in the W - * component in the instruction. */ - unsigned int dim_count = hlsl_sampler_dim_count(dim); - - if (dim_count == 1) - coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; - if (dim_count == 2) - coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; + put_u32(buffer, token); }
- sm4_src_from_node(tpf, &instr.srcs[0], coords, coords_writemask); - - sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); - - instr.src_count = 2; + for (j = 0; j < src->reg.idx_count; ++j) + sm4_write_register_index(tpf, &src->reg, j);
- if (multisampled) + if (src->reg.type == VKD3DSPR_IMMCONST) { - if (sample_index->type == HLSL_IR_CONSTANT) - { - struct vkd3d_shader_register *reg = &instr.srcs[2].reg; - struct hlsl_ir_constant *index; - - index = hlsl_ir_constant(sample_index); - - memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); - reg->type = VKD3DSPR_IMMCONST; - reg->dimension = VSIR_DIMENSION_SCALAR; - reg->u.immconst_u32[0] = index->value.u[0].u; - } - else if (version->major == 4 && version->minor == 0) - { - hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); - } - else + put_u32(buffer, src->reg.u.immconst_u32[0]); + if (src->reg.dimension == VSIR_DIMENSION_VEC4) { - sm4_src_from_node(tpf, &instr.srcs[2], sample_index, 0); + put_u32(buffer, src->reg.u.immconst_u32[1]); + put_u32(buffer, src->reg.u.immconst_u32[2]); + put_u32(buffer, src->reg.u.immconst_u32[3]); } - - ++instr.src_count; } - - write_sm4_instruction(tpf, &instr); }
-static void write_sm4_sample(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) +static void sm4_update_stat_counters(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) { - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *coords = load->coords.node; - const struct hlsl_deref *resource = &load->resource; - const struct hlsl_deref *sampler = &load->sampler; - const struct hlsl_ir_node *dst = &load->node; - struct sm4_instruction instr; + enum vkd3d_shader_type shader_type = tpf->program->shader_version.type; + enum vkd3d_sm4_stat_field stat_field; + uint32_t opcode;
- memset(&instr, 0, sizeof(instr)); - switch (load->load_type) - { - case HLSL_RESOURCE_SAMPLE: - instr.opcode = VKD3D_SM4_OP_SAMPLE; - break; + ++tpf->stat->fields[VKD3D_STAT_INSTR_COUNT];
- case HLSL_RESOURCE_SAMPLE_CMP: - instr.opcode = VKD3D_SM4_OP_SAMPLE_C; - break; + opcode = instr->opcode & VKD3D_SM4_OPCODE_MASK; + stat_field = get_stat_field_from_sm4_opcode(&tpf->lookup, opcode);
- case HLSL_RESOURCE_SAMPLE_CMP_LZ: - instr.opcode = VKD3D_SM4_OP_SAMPLE_C_LZ; + switch (opcode) + { + case VKD3D_SM4_OP_DCL_TEMPS: + tpf->stat->fields[stat_field] = max(tpf->stat->fields[stat_field], instr->idx[0]); break; - - case HLSL_RESOURCE_SAMPLE_LOD: - instr.opcode = VKD3D_SM4_OP_SAMPLE_LOD; + case VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY: + case VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE: + tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM4_PRIMITIVE_TYPE_MASK) + >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; break; - - case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - instr.opcode = VKD3D_SM4_OP_SAMPLE_B; + case VKD3D_SM4_OP_DCL_VERTICES_OUT: + case VKD3D_SM5_OP_DCL_GS_INSTANCES: + tpf->stat->fields[stat_field] = instr->idx[0]; break; - - case HLSL_RESOURCE_SAMPLE_GRAD: - instr.opcode = VKD3D_SM4_OP_SAMPLE_GRAD; + case VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN: + case VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING: + case VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: + tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; + break; + case VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT: + case VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT: + if ((shader_type == VKD3D_SHADER_TYPE_HULL && opcode == VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT) + || (shader_type == VKD3D_SHADER_TYPE_DOMAIN + && opcode == VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT)) + { + tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) + >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; + } break; - default: - vkd3d_unreachable(); + ++tpf->stat->fields[stat_field]; } +}
- if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } - } +static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) +{ + uint32_t token = instr->opcode | instr->extra_bits; + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + unsigned int size, i, j; + size_t token_position;
- sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; + if (instr->modifier_count > 0) + token |= VKD3D_SM4_INSTRUCTION_MODIFIER;
- sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); - sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr); - instr.src_count = 3; + token_position = put_u32(buffer, 0);
- if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD - || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) - { - sm4_src_from_node(tpf, &instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL); - ++instr.src_count; - } - else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) - { - sm4_src_from_node(tpf, &instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_node(tpf, &instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL); - instr.src_count += 2; - } - else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP - || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ) + for (i = 0; i < instr->modifier_count; ++i) { - sm4_src_from_node(tpf, &instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL); - ++instr.src_count; - } + uint32_t modifier_token = sm4_encode_instruction_modifier(&instr->modifiers[i]);
- write_sm4_instruction(tpf, &instr); -} + if (instr->modifier_count > i + 1) + modifier_token |= VKD3D_SM4_INSTRUCTION_MODIFIER; + put_u32(buffer, modifier_token); + }
-static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) -{ - const struct hlsl_deref *resource = &load->resource; - const struct hlsl_ir_node *dst = &load->node; - struct sm4_instruction instr; + for (i = 0; i < instr->dst_count; ++i) + sm4_write_dst_register(tpf, &instr->dsts[i]);
- VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); + for (i = 0; i < instr->src_count; ++i) + sm4_write_src_register(tpf, &instr->srcs[i]);
- memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; - if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) - instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + if (instr->byte_stride) + put_u32(buffer, instr->byte_stride);
- sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; + for (j = 0; j < instr->idx_count; ++j) + put_u32(buffer, instr->idx[j]);
- sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr); - instr.src_count = 1; + size = (bytecode_get_size(buffer) - token_position) / sizeof(uint32_t); + token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); + set_u32(buffer, token_position, token);
- write_sm4_instruction(tpf, &instr); + sm4_update_stat_counters(tpf, instr); }
-static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) +static void tpf_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) { - const struct hlsl_deref *resource = &load->resource; - const struct hlsl_ir_node *dst = &load->node; - struct sm4_instruction instr; + const struct vkd3d_shader_constant_buffer *cb = &ins->declaration.cb; + size_t size = (cb->size + 3) / 4;
- if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER - || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + struct sm4_instruction instr = { - hlsl_fixme(tpf->ctx, &load->node.loc, "resinfo for buffers."); - return; - } + .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER,
- VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); + .srcs[0] = cb->src, + .src_count = 1, + };
- memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_RESINFO; - if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) - instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 1)) + { + instr.srcs[0].reg.idx[0].offset = cb->src.reg.idx[0].offset; + instr.srcs[0].reg.idx[1].offset = cb->range.first; + instr.srcs[0].reg.idx[2].offset = cb->range.last; + instr.srcs[0].reg.idx_count = 3;
- sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; + instr.idx[0] = size; + instr.idx[1] = cb->range.space; + instr.idx_count = 2; + } + else + { + instr.srcs[0].reg.idx[0].offset = cb->range.first; + instr.srcs[0].reg.idx[1].offset = size; + instr.srcs[0].reg.idx_count = 2; + }
- sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); - instr.src_count = 2; + if (ins->flags & VKD3DSI_INDEXED_DYNAMIC) + instr.extra_bits |= VKD3D_SM4_INDEX_TYPE_MASK;
write_sm4_instruction(tpf, &instr); }
-static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) +static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) { struct sm4_instruction instr = { - .opcode = VKD3D_SM4_OP_IF, - .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, - .src_count = 1, - }; - - VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); - - sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); - write_sm4_instruction(tpf, &instr); - - write_sm4_block(tpf, &iff->then_block); - - if (!list_empty(&iff->else_block.instrs)) - { - instr.opcode = VKD3D_SM4_OP_ELSE; - instr.src_count = 0; - write_sm4_instruction(tpf, &instr); + .opcode = VKD3D_SM4_OP_DCL_TEMPS,
- write_sm4_block(tpf, &iff->else_block); - } + .idx = {count}, + .idx_count = 1, + };
- instr.opcode = VKD3D_SM4_OP_ENDIF; - instr.src_count = 0; write_sm4_instruction(tpf, &instr); }
-static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) +static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp) { - struct sm4_instruction instr = {0}; - - switch (jump->type) + struct sm4_instruction instr = { - case HLSL_IR_JUMP_BREAK: - instr.opcode = VKD3D_SM4_OP_BREAK; - break; - - case HLSL_IR_JUMP_CONTINUE: - instr.opcode = VKD3D_SM4_OP_CONTINUE; - break; - - case HLSL_IR_JUMP_DISCARD_NZ: - { - instr.opcode = VKD3D_SM4_OP_DISCARD; - instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; - - memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); - instr.src_count = 1; - sm4_src_from_node(tpf, &instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); - break; - } - - case HLSL_IR_JUMP_RETURN: - vkd3d_unreachable(); + .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP,
- default: - hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); - return; - } + .idx = {temp->register_idx, temp->register_size, temp->component_count}, + .idx_count = 3, + };
write_sm4_instruction(tpf, &instr); }
-/* Does this variable's data come directly from the API user, rather than being - * temporary or from a previous shader stage? - * I.e. is it a uniform or VS input? */ -static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) +static void tpf_dcl_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, + const struct vkd3d_shader_dst_param *dst, uint32_t interpolation_flags) { - if (var->is_uniform) - return true; - - return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; -} + struct sm4_instruction instr = + { + .opcode = opcode,
-static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_load *load) -{ - const struct vkd3d_shader_version *version = &tpf->program->shader_version; - const struct hlsl_type *type = load->node.data_type; - struct sm4_instruction instr; + .dsts[0] = *dst, + .dst_count = 1,
- memset(&instr, 0, sizeof(instr)); + .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT, + };
- sm4_dst_from_node(&instr.dsts[0], &load->node); - instr.dst_count = 1; + write_sm4_instruction(tpf, &instr); +}
- VKD3D_ASSERT(hlsl_is_numeric_type(type)); - if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) +static void tpf_dcl_siv_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, + const struct vkd3d_shader_register_semantic *semantic, uint32_t interpolation_flags) +{ + struct sm4_instruction instr = { - struct hlsl_constant_value value; + .opcode = opcode,
- /* Uniform bools can be specified as anything, but internal bools always - * have 0 for false and ~0 for true. Normalize that here. */ + .dsts[0] = semantic->reg, + .dst_count = 1,
- instr.opcode = VKD3D_SM4_OP_MOVC; + .idx[0] = semantic->sysval_semantic, + .idx_count = 1,
- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); + .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT, + };
- memset(&value, 0xff, sizeof(value)); - sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].write_mask); - memset(&value, 0, sizeof(value)); - sm4_src_from_constant_value(&instr.srcs[2], &value, type->dimx, instr.dsts[0].write_mask); - instr.src_count = 3; - } - else + write_sm4_instruction(tpf, &instr); +} + +static void tpf_dcl_thread_group(const struct tpf_compiler *tpf, const struct vsir_thread_group_size *group_size) +{ + struct sm4_instruction instr = { - instr.opcode = VKD3D_SM4_OP_MOV; + .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP,
- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); - instr.src_count = 1; - } + .idx = {group_size->x, group_size->y, group_size->z}, + .idx_count = 3, + };
write_sm4_instruction(tpf, &instr); }
-static void write_sm4_loop(struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) +static void tpf_dcl_sampler(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) { + const struct vkd3d_shader_sampler *sampler = &ins->declaration.sampler; struct sm4_instruction instr = { - .opcode = VKD3D_SM4_OP_LOOP, + .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + .extra_bits = ins->flags << VKD3D_SM4_SAMPLER_MODE_SHIFT, + + .dsts[0].reg.type = VKD3DSPR_SAMPLER, + .dst_count = 1, };
- write_sm4_instruction(tpf, &instr); + if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 1)) + { + instr.dsts[0].reg.idx[0].offset = sampler->src.reg.idx[0].offset; + instr.dsts[0].reg.idx[1].offset = sampler->range.first; + instr.dsts[0].reg.idx[2].offset = sampler->range.last; + instr.dsts[0].reg.idx_count = 3;
- write_sm4_block(tpf, &loop->body); + instr.idx[0] = ins->declaration.sampler.range.space; + instr.idx_count = 1; + } + else + { + instr.dsts[0].reg.idx[0].offset = sampler->range.first; + instr.dsts[0].reg.idx_count = 1; + }
- instr.opcode = VKD3D_SM4_OP_ENDLOOP; write_sm4_instruction(tpf, &instr); }
-static void write_sm4_gather(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_deref *sampler, - const struct hlsl_ir_node *coords, uint32_t swizzle, const struct hlsl_ir_node *texel_offset) +static void tpf_dcl_texture(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) { + const struct vkd3d_shader_structured_resource *structured_resource = &ins->declaration.structured_resource; + const struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; const struct vkd3d_shader_version *version = &tpf->program->shader_version; - struct vkd3d_shader_src_param *src; - struct sm4_instruction instr; + const struct vkd3d_sm4_opcode_info *info; + struct sm4_instruction instr = {0}; + unsigned int i, k; + bool uav;
- memset(&instr, 0, sizeof(instr)); + info = get_info_from_vsir_opcode(&tpf->lookup, ins->opcode); + VKD3D_ASSERT(info);
- instr.opcode = VKD3D_SM4_OP_GATHER4; + uav = ins->opcode == VKD3DSIH_DCL_UAV_TYPED + || ins->opcode == VKD3DSIH_DCL_UAV_RAW + || ins->opcode == VKD3DSIH_DCL_UAV_STRUCTURED;
- sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; + instr.opcode = info->opcode;
- sm4_src_from_node(tpf, &instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); + instr.dsts[0] = semantic->resource.reg; + instr.dst_count = 1;
- if (texel_offset) + for (k = 0; k < 4; ++k) { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + for (i = ARRAY_SIZE(data_type_table) - 1; i < ARRAY_SIZE(data_type_table); --i) { - if (!vkd3d_shader_ver_ge(version, 5, 0)) + if (semantic->resource_data_type[k] == data_type_table[i]) { - hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); - return; + instr.idx[0] |= i << (4 * k); + break; } - instr.opcode = VKD3D_SM5_OP_GATHER4_PO; - sm4_src_from_node(tpf, &instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); } } + instr.idx_count = 1;
- sm4_src_from_deref(tpf, &instr.srcs[instr.src_count++], resource, instr.dsts[0].write_mask, &instr); - - src = &instr.srcs[instr.src_count++]; - sm4_src_from_deref(tpf, src, sampler, VKD3DSP_WRITEMASK_ALL, &instr); - src->reg.dimension = VSIR_DIMENSION_VEC4; - src->swizzle = swizzle; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_resource_load(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) -{ - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *sample_index = load->sample_index.node; - const struct hlsl_ir_node *coords = load->coords.node; - - if (load->sampler.var && !load->sampler.var->is_uniform) + if (vkd3d_shader_ver_ge(version, 5, 1)) { - hlsl_fixme(tpf->ctx, &load->node.loc, "Sample using non-uniform sampler variable."); - return; - } + instr.dsts[0].reg.idx[0].offset = semantic->resource.reg.reg.idx[0].offset; + instr.dsts[0].reg.idx[1].offset = semantic->resource.range.first; + instr.dsts[0].reg.idx[2].offset = semantic->resource.range.last; + instr.dsts[0].reg.idx_count = 3;
- if (!load->resource.var->is_uniform) - { - hlsl_fixme(tpf->ctx, &load->node.loc, "Load from non-uniform resource variable."); - return; + instr.idx[1] = semantic->resource.range.space; + instr.idx_count = 2; } - - switch (load->load_type) + else { - case HLSL_RESOURCE_LOAD: - write_sm4_ld(tpf, &load->node, &load->resource, - coords, sample_index, texel_offset, load->sampling_dim); - break; - - case HLSL_RESOURCE_SAMPLE: - case HLSL_RESOURCE_SAMPLE_CMP: - case HLSL_RESOURCE_SAMPLE_CMP_LZ: - case HLSL_RESOURCE_SAMPLE_LOD: - case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - case HLSL_RESOURCE_SAMPLE_GRAD: - /* Combined sample expressions were lowered. */ - VKD3D_ASSERT(load->sampler.var); - write_sm4_sample(tpf, load); - break; - - case HLSL_RESOURCE_GATHER_RED: - write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - VKD3D_SHADER_SWIZZLE(X, X, X, X), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_GREEN: - write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_BLUE: - write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z), texel_offset); - break; + instr.dsts[0].reg.idx[0].offset = semantic->resource.range.first; + instr.dsts[0].reg.idx_count = 1; + }
- case HLSL_RESOURCE_GATHER_ALPHA: - write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - VKD3D_SHADER_SWIZZLE(W, W, W, W), texel_offset); - break; + if (uav) + instr.extra_bits |= ins->flags << VKD3D_SM5_UAV_FLAGS_SHIFT;
- case HLSL_RESOURCE_SAMPLE_INFO: - write_sm4_sampleinfo(tpf, load); - break; + instr.extra_bits |= (sm4_resource_dimension(ins->resource_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); + instr.extra_bits |= semantic->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT;
- case HLSL_RESOURCE_RESINFO: - write_sm4_resinfo(tpf, load); - break; + if (ins->structured) + instr.byte_stride = structured_resource->byte_stride;
- case HLSL_RESOURCE_SAMPLE_PROJ: - vkd3d_unreachable(); - } + write_sm4_instruction(tpf, &instr); }
-static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_store *store) +static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) { - struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); - struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; - struct sm4_instruction instr; - - if (!store->resource.var->is_uniform) - { - hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); - return; - } - - if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + struct sm4_instruction instr = { - hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); - return; - } + .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, + .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT, + };
- memset(&instr, 0, sizeof(instr)); + write_sm4_instruction(tpf, &instr); +}
- sm4_register_from_deref(tpf, &instr.dsts[0].reg, &instr.dsts[0].write_mask, &store->resource, &instr); - instr.dst_count = 1; - if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) - { - instr.opcode = VKD3D_SM5_OP_STORE_RAW; - instr.dsts[0].write_mask = vkd3d_write_mask_from_component_count(value->data_type->dimx); - } - else +static void tpf_write_hs_decls(const struct tpf_compiler *tpf) +{ + struct sm4_instruction instr = { - instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; - } - - sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_node(tpf, &instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 2; + .opcode = VKD3D_SM5_OP_HS_DECLS, + };
write_sm4_instruction(tpf, &instr); }
-static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir_store *store) +static void tpf_write_dcl_input_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) { - const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm4_instruction instr; - uint32_t writemask; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_register_from_deref(tpf, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); - instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[0], rhs, instr.dsts[0].write_mask); - instr.src_count = 1; + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, + .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, + };
write_sm4_instruction(tpf, &instr); }
-static void write_sm4_switch(struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) +static void tpf_write_dcl_output_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) { - const struct hlsl_ir_node *selector = s->selector.node; - struct hlsl_ir_switch_case *c; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_SWITCH; - - sm4_src_from_node(tpf, &instr.srcs[0], selector, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 1; + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, + .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, + };
write_sm4_instruction(tpf, &instr); +}
- LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) +static void tpf_write_dcl_tessellator_domain(const struct tpf_compiler *tpf, enum vkd3d_tessellator_domain domain) +{ + struct sm4_instruction instr = { - memset(&instr, 0, sizeof(instr)); - if (c->is_default) - { - instr.opcode = VKD3D_SM4_OP_DEFAULT; - } - else - { - struct hlsl_constant_value value = { .u[0].u = c->value }; - - instr.opcode = VKD3D_SM4_OP_CASE; - sm4_src_from_constant_value(&instr.srcs[0], &value, 1, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 1; - } - - write_sm4_instruction(tpf, &instr); - write_sm4_block(tpf, &c->body); - } - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_ENDSWITCH; + .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, + .extra_bits = domain << VKD3D_SM5_TESSELLATOR_SHIFT, + };
write_sm4_instruction(tpf, &instr); }
-static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ir_swizzle *swizzle) +static void tpf_write_dcl_tessellator_partitioning(const struct tpf_compiler *tpf, + enum vkd3d_shader_tessellator_partitioning partitioning) { - unsigned int hlsl_swizzle; - struct sm4_instruction instr; - uint32_t writemask; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, + .extra_bits = partitioning << VKD3D_SM5_TESSELLATOR_SHIFT, + };
- sm4_dst_from_node(&instr.dsts[0], &swizzle->node); - instr.dst_count = 1; + write_sm4_instruction(tpf, &instr); +}
- sm4_register_from_node(&instr.srcs[0].reg, &writemask, swizzle->val.node); - hlsl_swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), - swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].write_mask); - instr.srcs[0].swizzle = swizzle_from_sm4(hlsl_swizzle); - instr.src_count = 1; +static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler *tpf, + enum vkd3d_shader_tessellator_output_primitive output_primitive) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, + .extra_bits = output_primitive << VKD3D_SM5_TESSELLATOR_SHIFT, + };
write_sm4_instruction(tpf, &instr); }
static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) { + struct sm4_instruction_modifier *modifier; const struct vkd3d_sm4_opcode_info *info; struct sm4_instruction instr = {0}; unsigned int dst_count, src_count; @@ -5655,6 +3960,22 @@ static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_ for (unsigned int i = 0; i < ins->src_count; ++i) instr.srcs[i] = ins->src[i];
+ if (ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w) + { + VKD3D_ASSERT(instr.modifier_count < ARRAY_SIZE(instr.modifiers)); + modifier = &instr.modifiers[instr.modifier_count++]; + modifier->type = VKD3D_SM4_MODIFIER_AOFFIMMI; + modifier->u.aoffimmi.u = ins->texel_offset.u; + modifier->u.aoffimmi.v = ins->texel_offset.v; + modifier->u.aoffimmi.w = ins->texel_offset.w; + } + + if (info->is_conditional_op) + { + if (ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ) + instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; + } + write_sm4_instruction(tpf, &instr); }
@@ -5662,6 +3983,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ { switch (ins->opcode) { + case VKD3DSIH_DCL_CONSTANT_BUFFER: + tpf_dcl_constant_buffer(tpf, ins); + break; + case VKD3DSIH_DCL_TEMPS: tpf_dcl_temps(tpf, ins->declaration.count); break; @@ -5702,8 +4027,34 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT_SIV, &ins->declaration.register_semantic, 0); break;
+ case VKD3DSIH_DCL_SAMPLER: + tpf_dcl_sampler(tpf, ins); + break; + + case VKD3DSIH_DCL: + case VKD3DSIH_DCL_RESOURCE_RAW: + case VKD3DSIH_DCL_UAV_RAW: + case VKD3DSIH_DCL_UAV_STRUCTURED: + case VKD3DSIH_DCL_UAV_TYPED: + tpf_dcl_texture(tpf, ins); + break; + case VKD3DSIH_ADD: + case VKD3DSIH_ATOMIC_AND: + case VKD3DSIH_ATOMIC_CMP_STORE: + case VKD3DSIH_ATOMIC_IADD: + case VKD3DSIH_ATOMIC_IMAX: + case VKD3DSIH_ATOMIC_IMIN: + case VKD3DSIH_ATOMIC_UMAX: + case VKD3DSIH_ATOMIC_UMIN: + case VKD3DSIH_ATOMIC_OR: + case VKD3DSIH_ATOMIC_XOR: case VKD3DSIH_AND: + case VKD3DSIH_BREAK: + case VKD3DSIH_CASE: + case VKD3DSIH_CONTINUE: + case VKD3DSIH_DEFAULT: + case VKD3DSIH_DISCARD: case VKD3DSIH_DIV: case VKD3DSIH_DP2: case VKD3DSIH_DP3: @@ -5714,6 +4065,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_DSY: case VKD3DSIH_DSY_COARSE: case VKD3DSIH_DSY_FINE: + case VKD3DSIH_ELSE: + case VKD3DSIH_ENDIF: + case VKD3DSIH_ENDLOOP: + case VKD3DSIH_ENDSWITCH: case VKD3DSIH_EQO: case VKD3DSIH_EXP: case VKD3DSIH_F16TOF32: @@ -5721,21 +4076,43 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_FRC: case VKD3DSIH_FTOI: case VKD3DSIH_FTOU: + case VKD3DSIH_GATHER4: + case VKD3DSIH_GATHER4_PO: + case VKD3DSIH_GATHER4_C: + case VKD3DSIH_GATHER4_PO_C: case VKD3DSIH_GEO: + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_IADD: case VKD3DSIH_IEQ: + case VKD3DSIH_IF: case VKD3DSIH_IGE: case VKD3DSIH_ILT: case VKD3DSIH_IMAD: case VKD3DSIH_IMAX: case VKD3DSIH_IMIN: + case VKD3DSIH_IMM_ATOMIC_AND: + case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: + case VKD3DSIH_IMM_ATOMIC_EXCH: + case VKD3DSIH_IMM_ATOMIC_IADD: + case VKD3DSIH_IMM_ATOMIC_IMAX: + case VKD3DSIH_IMM_ATOMIC_IMIN: + case VKD3DSIH_IMM_ATOMIC_UMAX: + case VKD3DSIH_IMM_ATOMIC_UMIN: + case VKD3DSIH_IMM_ATOMIC_OR: + case VKD3DSIH_IMM_ATOMIC_XOR: case VKD3DSIH_IMUL: case VKD3DSIH_INE: case VKD3DSIH_INEG: case VKD3DSIH_ISHL: case VKD3DSIH_ISHR: case VKD3DSIH_ITOF: + case VKD3DSIH_LD: + case VKD3DSIH_LD2DMS: + case VKD3DSIH_LD_RAW: + case VKD3DSIH_LD_UAV_TYPED: case VKD3DSIH_LOG: + case VKD3DSIH_LOOP: case VKD3DSIH_LTO: case VKD3DSIH_MAD: case VKD3DSIH_MAX: @@ -5747,14 +4124,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_NOT: case VKD3DSIH_OR: case VKD3DSIH_RCP: + case VKD3DSIH_RESINFO: + case VKD3DSIH_RET: case VKD3DSIH_ROUND_NE: case VKD3DSIH_ROUND_NI: case VKD3DSIH_ROUND_PI: case VKD3DSIH_ROUND_Z: case VKD3DSIH_RSQ: + case VKD3DSIH_SAMPLE: + case VKD3DSIH_SAMPLE_B: + case VKD3DSIH_SAMPLE_C: + case VKD3DSIH_SAMPLE_C_LZ: + case VKD3DSIH_SAMPLE_GRAD: case VKD3DSIH_SAMPLE_INFO: + case VKD3DSIH_SAMPLE_LOD: case VKD3DSIH_SINCOS: case VKD3DSIH_SQRT: + case VKD3DSIH_STORE_RAW: + case VKD3DSIH_STORE_UAV_TYPED: + case VKD3DSIH_SWITCH: case VKD3DSIH_UDIV: case VKD3DSIH_UGE: case VKD3DSIH_ULT: @@ -5772,102 +4160,23 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ } }
-static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block) +static void tpf_write_program(struct tpf_compiler *tpf, const struct vsir_program *program) { - const struct hlsl_ir_node *instr; - unsigned int vsir_instr_idx; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { - if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) - { - hlsl_fixme(tpf->ctx, &instr->loc, "Class %#x should have been lowered or removed.", - instr->data_type->class); - break; - } - - if (!instr->reg.allocated) - { - VKD3D_ASSERT(instr->type == HLSL_IR_CONSTANT); - continue; - } - } - - switch (instr->type) - { - case HLSL_IR_CALL: - case HLSL_IR_CONSTANT: - vkd3d_unreachable(); - - case HLSL_IR_IF: - write_sm4_if(tpf, hlsl_ir_if(instr)); - break; - - case HLSL_IR_JUMP: - write_sm4_jump(tpf, hlsl_ir_jump(instr)); - break; - - case HLSL_IR_LOAD: - write_sm4_load(tpf, hlsl_ir_load(instr)); - break; - - case HLSL_IR_RESOURCE_LOAD: - write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); - break; - - case HLSL_IR_RESOURCE_STORE: - write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); - break; - - case HLSL_IR_LOOP: - write_sm4_loop(tpf, hlsl_ir_loop(instr)); - break; - - case HLSL_IR_STORE: - write_sm4_store(tpf, hlsl_ir_store(instr)); - break; - - case HLSL_IR_SWITCH: - write_sm4_switch(tpf, hlsl_ir_switch(instr)); - break; - - case HLSL_IR_SWIZZLE: - write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); - break; - - case HLSL_IR_VSIR_INSTRUCTION_REF: - vsir_instr_idx = hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx; - tpf_handle_instruction(tpf, &tpf->program->instructions.elements[vsir_instr_idx]); - break; - - default: - hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); - } - } -} + unsigned int i;
-static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) -{ if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size);
- write_sm4_block(tpf, &func->body); - - write_sm4_ret(tpf); + for (i = 0; i < program->instructions.count; ++i) + tpf_handle_instruction(tpf, &program->instructions.elements[i]); }
-static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *entry_func) +static void tpf_write_shdr(struct tpf_compiler *tpf) { - const struct vkd3d_shader_version *version = &tpf->program->shader_version; + const struct vsir_program *program = tpf->program; + const struct vkd3d_shader_version *version; struct vkd3d_bytecode_buffer buffer = {0}; - struct extern_resource *extern_resources; - unsigned int extern_resources_count, i; - const struct hlsl_buffer *cbuffer; - struct hlsl_ctx *ctx = tpf->ctx; size_t token_count_position; - uint32_t global_flags = 0;
static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = { @@ -5884,101 +4193,45 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec
tpf->buffer = &buffer;
- extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - + version = &program->shader_version; put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type])); token_count_position = put_u32(&buffer, 0);
- if (version->major == 4) - { - for (i = 0; i < extern_resources_count; ++i) - { - const struct extern_resource *resource = &extern_resources[i]; - const struct hlsl_type *type = resource->component_type; - - if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) - { - global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS; - break; - } - } - } - - if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) - global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; - - if (global_flags) - write_sm4_dcl_global_flags(tpf, global_flags); + if (program->global_flags) + write_sm4_dcl_global_flags(tpf, program->global_flags);
if (version->type == VKD3D_SHADER_TYPE_HULL) { tpf_write_hs_decls(tpf);
- tpf_write_dcl_input_control_point_count(tpf, 1); /* TODO: Obtain from InputPatch */ - tpf_write_dcl_output_control_point_count(tpf, ctx->output_control_point_count); - tpf_write_dcl_tessellator_domain(tpf, ctx->domain); - tpf_write_dcl_tessellator_partitioning(tpf, ctx->partitioning); - tpf_write_dcl_tessellator_output_primitive(tpf, ctx->output_primitive); + tpf_write_dcl_input_control_point_count(tpf, program->input_control_point_count); + tpf_write_dcl_output_control_point_count(tpf, program->output_control_point_count); + tpf_write_dcl_tessellator_domain(tpf, program->tess_domain); + tpf_write_dcl_tessellator_partitioning(tpf, program->tess_partitioning); + tpf_write_dcl_tessellator_output_primitive(tpf, program->tess_output_primitive); } else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) { - tpf_write_dcl_input_control_point_count(tpf, 0); /* TODO: Obtain from OutputPatch */ - tpf_write_dcl_tessellator_domain(tpf, ctx->domain); - } - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) - write_sm4_dcl_constant_buffer(tpf, cbuffer); - } - - for (i = 0; i < extern_resources_count; ++i) - { - const struct extern_resource *resource = &extern_resources[i]; - - if (resource->regset == HLSL_REGSET_SAMPLERS) - write_sm4_dcl_samplers(tpf, resource); - else if (resource->regset == HLSL_REGSET_TEXTURES) - write_sm4_dcl_textures(tpf, resource, false); - else if (resource->regset == HLSL_REGSET_UAVS) - write_sm4_dcl_textures(tpf, resource, true); + tpf_write_dcl_input_control_point_count(tpf, program->input_control_point_count); + tpf_write_dcl_tessellator_domain(tpf, program->tess_domain); }
- if (version->type == VKD3D_SHADER_TYPE_HULL) - tpf_write_hs_control_point_phase(tpf); - - tpf_write_shader_function(tpf, entry_func); - - if (version->type == VKD3D_SHADER_TYPE_HULL) - { - tpf_write_hs_fork_phase(tpf); - tpf_write_shader_function(tpf, ctx->patch_constant_func); - } + tpf_write_program(tpf, program);
set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t));
- add_section(ctx, &tpf->dxbc, TAG_SHDR, &buffer); + add_section(tpf, TAG_SHDR, &buffer); tpf->buffer = NULL; - - sm4_free_extern_resources(extern_resources, extern_resources_count); }
static void tpf_write_sfi0(struct tpf_compiler *tpf) { - struct extern_resource *extern_resources; - unsigned int extern_resources_count; - struct hlsl_ctx *ctx = tpf->ctx; uint64_t *flags;
flags = vkd3d_calloc(1, sizeof(*flags));
- extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - for (unsigned int i = 0; i < extern_resources_count; ++i) - { - if (extern_resources[i].component_type && extern_resources[i].component_type->e.resource.rasteriser_ordered) - *flags |= VKD3D_SM4_REQUIRES_ROVS; - } - sm4_free_extern_resources(extern_resources, extern_resources_count); + if (tpf->program->features.rovs) + *flags |= DXBC_SFI0_REQUIRES_ROVS;
/* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ @@ -5993,7 +4246,6 @@ static void tpf_write_stat(struct tpf_compiler *tpf) { struct vkd3d_bytecode_buffer buffer = {0}; const struct sm4_stat *stat = tpf->stat; - struct hlsl_ctx *ctx = tpf->ctx;
put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]); put_u32(&buffer, stat->fields[VKD3D_STAT_TEMPS]); @@ -6025,7 +4277,7 @@ static void tpf_write_stat(struct tpf_compiler *tpf) put_u32(&buffer, stat->fields[VKD3D_STAT_LOD]); put_u32(&buffer, 0); /* Sample frequency */
- if (hlsl_version_ge(ctx, 5, 0)) + if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 0)) { put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_GS_INSTANCES]); put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_CONTROL_POINT_COUNT]); @@ -6037,15 +4289,19 @@ static void tpf_write_stat(struct tpf_compiler *tpf) put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]); }
- add_section(ctx, &tpf->dxbc, TAG_STAT, &buffer); + add_section(tpf, TAG_STAT, &buffer); +} + +static void tpf_write_section(struct tpf_compiler *tpf, uint32_t tag, const struct vkd3d_shader_code *code) +{ + struct vkd3d_bytecode_buffer buffer = {0}; + + bytecode_put_bytes(&buffer, code->code, code->size); + add_section(tpf, tag, &buffer); }
-/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving - * data from the other parameters instead, so they can be removed from the - * arguments and this function can be independent of HLSL structs. */ -int tpf_compile(struct vsir_program *program, uint64_t config_flags, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, - struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_code *rdef, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { enum vkd3d_shader_type shader_type = program->shader_version.type; struct tpf_compiler tpf = {0}; @@ -6053,7 +4309,6 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, size_t i; int ret;
- tpf.ctx = ctx; tpf.program = program; tpf.buffer = NULL; tpf.stat = &stat; @@ -6064,14 +4319,12 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN); if (shader_type == VKD3D_SHADER_TYPE_HULL || shader_type == VKD3D_SHADER_TYPE_DOMAIN) tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG); - write_sm4_rdef(ctx, &tpf.dxbc); - tpf_write_shdr(&tpf, entry_func); + tpf_write_section(&tpf, TAG_RDEF, rdef); + tpf_write_shdr(&tpf); tpf_write_sfi0(&tpf); tpf_write_stat(&tpf);
ret = VKD3D_OK; - if (ctx->result) - ret = ctx->result; if (tpf.result) ret = tpf.result;
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index d751f2dc6bf..021691bb3a1 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -377,7 +377,8 @@ size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer) return aligned_size; }
- memset(buffer->data + buffer->size, 0xab, aligned_size - buffer->size); + if (aligned_size > buffer->size) + memset(&buffer->data[buffer->size], 0xab, aligned_size - buffer->size); buffer->size = aligned_size; return aligned_size; } @@ -394,7 +395,8 @@ size_t bytecode_put_bytes_unaligned(struct vkd3d_bytecode_buffer *buffer, const buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; return offset; } - memcpy(buffer->data + offset, bytes, size); + if (size) + memcpy(&buffer->data[offset], bytes, size); buffer->size = offset + size; return offset; } @@ -805,6 +807,9 @@ struct vkd3d_shader_scan_context
struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; size_t combined_samplers_size; + + enum vkd3d_shader_tessellator_output_primitive output_primitive; + enum vkd3d_shader_tessellator_partitioning partitioning; };
static VKD3D_PRINTF_FUNC(3, 4) void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, @@ -1262,6 +1267,12 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, instruction->declaration.structured_resource.byte_stride, false, instruction->flags); break; + case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: + context->output_primitive = instruction->declaration.tessellator_output_primitive; + break; + case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: + context->partitioning = instruction->declaration.tessellator_partitioning; + break; case VKD3DSIH_IF: case VKD3DSIH_IFC: cf_info = vkd3d_shader_scan_push_cf_info(context); @@ -1502,6 +1513,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) { struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; + struct vkd3d_shader_scan_hull_shader_tessellation_info *tessellation_info; struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; struct vkd3d_shader_scan_descriptor_info *descriptor_info; struct vkd3d_shader_scan_signature_info *signature_info; @@ -1530,6 +1542,8 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh descriptor_info1 = &local_descriptor_info1; }
+ tessellation_info = vkd3d_find_struct(compile_info->next, SCAN_HULL_SHADER_TESSELLATION_INFO); + vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, descriptor_info1, combined_sampler_info, message_context);
@@ -1573,6 +1587,12 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh if (!ret && descriptor_info) ret = convert_descriptor_info(descriptor_info, descriptor_info1);
+ if (!ret && tessellation_info) + { + tessellation_info->output_primitive = context.output_primitive; + tessellation_info->partitioning = context.partitioning; + } + if (ret < 0) { if (combined_sampler_info) @@ -1959,7 +1979,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( static const enum vkd3d_shader_target_type dxbc_tpf_types[] = { VKD3D_SHADER_TARGET_SPIRV_BINARY, -#ifdef HAVE_SPIRV_TOOLS +#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) VKD3D_SHADER_TARGET_SPIRV_TEXT, #endif VKD3D_SHADER_TARGET_D3D_ASM, @@ -1974,7 +1994,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( static const enum vkd3d_shader_target_type hlsl_types[] = { VKD3D_SHADER_TARGET_SPIRV_BINARY, -#ifdef HAVE_SPIRV_TOOLS +#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) VKD3D_SHADER_TARGET_SPIRV_TEXT, #endif VKD3D_SHADER_TARGET_D3D_ASM, @@ -1986,7 +2006,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( static const enum vkd3d_shader_target_type d3dbc_types[] = { VKD3D_SHADER_TARGET_SPIRV_BINARY, -#ifdef HAVE_SPIRV_TOOLS +#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) VKD3D_SHADER_TARGET_SPIRV_TEXT, #endif VKD3D_SHADER_TARGET_D3D_ASM, @@ -1996,7 +2016,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( static const enum vkd3d_shader_target_type dxbc_dxil_types[] = { VKD3D_SHADER_TARGET_SPIRV_BINARY, -# ifdef HAVE_SPIRV_TOOLS +#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) VKD3D_SHADER_TARGET_SPIRV_TEXT, # endif VKD3D_SHADER_TARGET_D3D_ASM, diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index be7c0b73a22..03643acff3c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -62,6 +62,8 @@ #define VKD3D_SHADER_COMPONENT_TYPE_COUNT (VKD3D_SHADER_COMPONENT_UINT64 + 1) #define VKD3D_SHADER_MINIMUM_PRECISION_COUNT (VKD3D_SHADER_MINIMUM_PRECISION_UINT_16 + 1)
+#define VKD3D_MAX_STREAM_COUNT 4 + enum vkd3d_shader_error { VKD3D_SHADER_ERROR_DXBC_INVALID_SIZE = 1, @@ -163,6 +165,8 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE = 5037, VKD3D_SHADER_ERROR_HLSL_INVALID_PARTITIONING = 5038, VKD3D_SHADER_ERROR_HLSL_MISPLACED_SAMPLER_STATE = 5039, + VKD3D_SHADER_ERROR_HLSL_AMBIGUOUS_CALL = 5040, + VKD3D_SHADER_ERROR_HLSL_DUPLICATE_PATCH = 5041,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, @@ -247,6 +251,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER = 9020, VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC = 9021, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE = 9022, + VKD3D_SHADER_ERROR_VSIR_INVALID_RANGE = 9023,
VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300,
@@ -591,28 +596,25 @@ enum vkd3d_shader_opcode
enum vkd3d_shader_register_type { - VKD3DSPR_TEMP = 0, - VKD3DSPR_INPUT = 1, - VKD3DSPR_CONST = 2, - VKD3DSPR_ADDR = 3, - VKD3DSPR_TEXTURE = 3, - VKD3DSPR_RASTOUT = 4, - VKD3DSPR_ATTROUT = 5, - VKD3DSPR_TEXCRDOUT = 6, - VKD3DSPR_OUTPUT = 6, - VKD3DSPR_CONSTINT = 7, - VKD3DSPR_COLOROUT = 8, - VKD3DSPR_DEPTHOUT = 9, - VKD3DSPR_COMBINED_SAMPLER = 10, - VKD3DSPR_CONST2 = 11, - VKD3DSPR_CONST3 = 12, - VKD3DSPR_CONST4 = 13, - VKD3DSPR_CONSTBOOL = 14, - VKD3DSPR_LOOP = 15, - VKD3DSPR_TEMPFLOAT16 = 16, - VKD3DSPR_MISCTYPE = 17, - VKD3DSPR_LABEL = 18, - VKD3DSPR_PREDICATE = 19, + VKD3DSPR_TEMP, + VKD3DSPR_INPUT, + VKD3DSPR_CONST, + VKD3DSPR_ADDR, + VKD3DSPR_TEXTURE, + VKD3DSPR_RASTOUT, + VKD3DSPR_ATTROUT, + VKD3DSPR_TEXCRDOUT, + VKD3DSPR_OUTPUT, + VKD3DSPR_CONSTINT, + VKD3DSPR_COLOROUT, + VKD3DSPR_DEPTHOUT, + VKD3DSPR_COMBINED_SAMPLER, + VKD3DSPR_CONSTBOOL, + VKD3DSPR_LOOP, + VKD3DSPR_TEMPFLOAT16, + VKD3DSPR_MISCTYPE, + VKD3DSPR_LABEL, + VKD3DSPR_PREDICATE, VKD3DSPR_IMMCONST, VKD3DSPR_IMMCONST64, VKD3DSPR_CONSTBUFFER, @@ -1123,6 +1125,12 @@ bool vsir_signature_find_sysval(const struct shader_signature *signature, enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index); void shader_signature_cleanup(struct shader_signature *signature);
+struct vsir_features +{ + /* The shader requires rasteriser-ordered views. */ + bool rovs; +}; + struct dxbc_shader_desc { const uint32_t *byte_code; @@ -1131,6 +1139,7 @@ struct dxbc_shader_desc struct shader_signature input_signature; struct shader_signature output_signature; struct shader_signature patch_constant_signature; + struct vsir_features features; };
struct vkd3d_shader_register_semantic @@ -1400,9 +1409,10 @@ enum vsir_control_flow_type
enum vsir_normalisation_level { - VSIR_NOT_NORMALISED, + VSIR_NORMALISED_SM1, + VSIR_NORMALISED_SM4, VSIR_NORMALISED_HULL_CONTROL_POINT_IO, - VSIR_FULLY_NORMALISED_IO, + VSIR_NORMALISED_SM6, };
struct vsir_program @@ -1428,9 +1438,16 @@ struct vsir_program bool use_vocp; bool has_point_size; bool has_point_coord; + bool has_fog; uint8_t diffuse_written_mask; enum vsir_control_flow_type cf_type; enum vsir_normalisation_level normalisation_level; + enum vkd3d_tessellator_domain tess_domain; + enum vkd3d_shader_tessellator_partitioning tess_partitioning; + enum vkd3d_shader_tessellator_output_primitive tess_output_primitive; + uint32_t io_dcls[VKD3D_BITMAP_SIZE(VKD3DSPR_COUNT)]; + + struct vsir_features features;
const char **block_names; size_t block_name_count; @@ -1625,7 +1642,7 @@ bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg); bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, - const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func); + const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func, bool is_patch);
int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, struct vkd3d_shader_message_context *message_context, struct vsir_program *program); @@ -1643,6 +1660,10 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, struct shader_signature *signature);
+int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + int glsl_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, @@ -1661,6 +1682,9 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
+int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_code *rdef, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + enum vkd3d_md5_variant { VKD3D_MD5_STANDARD, @@ -1942,6 +1966,21 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain,
#define DXBC_MAX_SECTION_COUNT 7
+#define DXBC_SFI0_REQUIRES_DOUBLES 0x00000001u +#define DXBC_SFI0_REQUIRES_EARLY_DEPTH_STENCIL 0x00000002u +#define DXBC_SFI0_REQUIRES_UAVS_AT_EVERY_STAGE 0x00000004u +#define DXBC_SFI0_REQUIRES_64_UAVS 0x00000008u +#define DXBC_SFI0_REQUIRES_MINIMUM_PRECISION 0x00000010u +#define DXBC_SFI0_REQUIRES_11_1_DOUBLE_EXTENSIONS 0x00000020u +#define DXBC_SFI0_REQUIRES_11_1_SHADER_EXTENSIONS 0x00000040u +#define DXBC_SFI0_REQUIRES_LEVEL_9_COMPARISON_FILTERING 0x00000080u +#define DXBC_SFI0_REQUIRES_TILED_RESOURCES 0x00000100u +#define DXBC_SFI0_REQUIRES_STENCIL_REF 0x00000200u +#define DXBC_SFI0_REQUIRES_INNER_COVERAGE 0x00000400u +#define DXBC_SFI0_REQUIRES_TYPED_UAV_LOAD_ADDITIONAL_FORMATS 0x00000800u +#define DXBC_SFI0_REQUIRES_ROVS 0x00001000u +#define DXBC_SFI0_REQUIRES_VIEWPORT_AND_RT_ARRAY_INDEX_FROM_ANY_SHADER_FEEDING_RASTERIZER 0x00002000u + struct dxbc_writer { unsigned int section_count; diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index a55a97f6f2f..ce0c3b9128f 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -327,9 +327,12 @@ static void *vkd3d_fence_worker_main(void *arg) struct vkd3d_waiting_fence *old_fences, *cur_fences = NULL; struct vkd3d_fence_worker *worker = arg; unsigned int i; + bool timeline;
vkd3d_set_thread_name("vkd3d_fence");
+ timeline = worker->device->vk_info.KHR_timeline_semaphore; + for (;;) { vkd3d_mutex_lock(&worker->mutex); @@ -357,7 +360,12 @@ static void *vkd3d_fence_worker_main(void *arg) vkd3d_mutex_unlock(&worker->mutex);
for (i = 0; i < cur_fence_count; ++i) - worker->wait_for_gpu_fence(worker, &cur_fences[i]); + { + if (timeline) + vkd3d_wait_for_gpu_timeline_semaphore(worker, &cur_fences[i]); + else + vkd3d_wait_for_gpu_fence(worker, &cur_fences[i]); + } }
vkd3d_free(cur_fences); @@ -379,9 +387,6 @@ static HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, worker->fences = NULL; worker->fences_size = 0;
- worker->wait_for_gpu_fence = device->vk_info.KHR_timeline_semaphore - ? vkd3d_wait_for_gpu_timeline_semaphore : vkd3d_wait_for_gpu_fence; - vkd3d_mutex_init(&worker->mutex);
vkd3d_cond_init(&worker->cond); @@ -399,6 +404,7 @@ static HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, struct d3d12_device *device) { + unsigned int i; HRESULT hr;
TRACE("worker %p.\n", worker); @@ -416,6 +422,9 @@ static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, vkd3d_mutex_destroy(&worker->mutex); vkd3d_cond_destroy(&worker->cond);
+ for (i = 0; i < worker->fence_count; ++i) + d3d12_fence_decref(worker->fences[i].fence); + vkd3d_free(worker->fences);
return S_OK; @@ -556,7 +565,8 @@ static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence) fence->old_vk_fences[i] = VK_NULL_HANDLE; }
- d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true); + if (!device->vk_info.KHR_timeline_semaphore) + d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true); VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL));
vkd3d_mutex_unlock(&fence->mutex); @@ -1255,6 +1265,74 @@ VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint return VK_CALL(vkCreateSemaphore(device->vk_device, &info, NULL, timeline_semaphore)); }
+static void vkd3d_vk_descriptor_pool_array_cleanup(struct vkd3d_vk_descriptor_pool_array *array) +{ + vkd3d_free(array->pools); +} + +static void vkd3d_vk_descriptor_pool_array_init(struct vkd3d_vk_descriptor_pool_array *array) +{ + memset(array, 0, sizeof(*array)); +} + +static bool vkd3d_vk_descriptor_pool_array_push_array(struct vkd3d_vk_descriptor_pool_array *array, + const struct vkd3d_vk_descriptor_pool *pools, size_t count) +{ + if (!vkd3d_array_reserve((void **)&array->pools, &array->capacity, array->count + count, sizeof(*array->pools))) + return false; + + memcpy(&array->pools[array->count], pools, count * sizeof(*pools)); + array->count += count; + + return true; +} + +static bool vkd3d_vk_descriptor_pool_array_push(struct vkd3d_vk_descriptor_pool_array *array, + unsigned int descriptor_count, VkDescriptorPool vk_pool) +{ + struct vkd3d_vk_descriptor_pool pool = + { + .descriptor_count = descriptor_count, + .vk_pool = vk_pool, + }; + + return vkd3d_vk_descriptor_pool_array_push_array(array, &pool, 1); +} + +static VkDescriptorPool vkd3d_vk_descriptor_pool_array_find(struct vkd3d_vk_descriptor_pool_array *array, + unsigned int *descriptor_count) +{ + VkDescriptorPool vk_pool; + size_t i; + + for (i = 0; i < array->count; ++i) + { + if (array->pools[i].descriptor_count >= *descriptor_count) + { + *descriptor_count = array->pools[i].descriptor_count; + vk_pool = array->pools[i].vk_pool; + array->pools[i] = array->pools[--array->count]; + + return vk_pool; + } + } + + return VK_NULL_HANDLE; +} + +static void vkd3d_vk_descriptor_pool_array_destroy_pools(struct vkd3d_vk_descriptor_pool_array *array, + const struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + size_t i; + + for (i = 0; i < array->count; ++i) + { + VK_CALL(vkDestroyDescriptorPool(device->vk_device, array->pools[i].vk_pool, NULL)); + } + array->count = 0; +} + /* Command buffers */ static void d3d12_command_list_mark_as_invalid(struct d3d12_command_list *list, const char *message, ...) @@ -1376,18 +1454,6 @@ static bool d3d12_command_allocator_add_framebuffer(struct d3d12_command_allocat return true; }
-static bool d3d12_command_allocator_add_descriptor_pool(struct d3d12_command_allocator *allocator, - VkDescriptorPool pool) -{ - if (!vkd3d_array_reserve((void **)&allocator->descriptor_pools, &allocator->descriptor_pools_size, - allocator->descriptor_pool_count + 1, sizeof(*allocator->descriptor_pools))) - return false; - - allocator->descriptor_pools[allocator->descriptor_pool_count++] = pool; - - return true; -} - static bool d3d12_command_allocator_add_view(struct d3d12_command_allocator *allocator, struct vkd3d_view *view) { @@ -1426,37 +1492,71 @@ static bool d3d12_command_allocator_add_transfer_buffer(struct d3d12_command_all }
static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( - struct d3d12_command_allocator *allocator) + struct d3d12_command_allocator *allocator, enum vkd3d_shader_descriptor_type descriptor_type, + unsigned int descriptor_count, bool unbounded) { struct d3d12_device *device = allocator->device; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct VkDescriptorPoolCreateInfo pool_desc; VkDevice vk_device = device->vk_device; + VkDescriptorPoolSize vk_pool_sizes[2]; + unsigned int pool_size, pool_limit; VkDescriptorPool vk_pool; VkResult vr;
- if (allocator->free_descriptor_pool_count > 0) - { - vk_pool = allocator->free_descriptor_pools[allocator->free_descriptor_pool_count - 1]; - allocator->free_descriptor_pools[allocator->free_descriptor_pool_count - 1] = VK_NULL_HANDLE; - --allocator->free_descriptor_pool_count; - } - else + if (!(vk_pool = vkd3d_vk_descriptor_pool_array_find(&allocator->free_descriptor_pools[descriptor_type], + &descriptor_count))) { + pool_limit = device->vk_pool_limits[descriptor_type]; + + if (descriptor_count > pool_limit) + { + if (!unbounded) + { + ERR("Descriptor count %u exceeds maximum pool size %u.\n", descriptor_count, pool_limit); + return VK_NULL_HANDLE; + } + + WARN("Clamping descriptor count %u to maximum pool size %u for unbounded allocation.\n", + descriptor_count, pool_limit); + descriptor_count = pool_limit; + } + + pool_size = allocator->vk_pool_sizes[descriptor_type]; + if (descriptor_count > pool_size) + { + pool_size = 1u << (vkd3d_log2i(descriptor_count - 1) + 1); + pool_size = min(pool_limit, pool_size); + } + descriptor_count = pool_size; + + vk_pool_sizes[0].type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, true); + vk_pool_sizes[0].descriptorCount = descriptor_count; + + vk_pool_sizes[1].type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, false); + vk_pool_sizes[1].descriptorCount = descriptor_count; + pool_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; pool_desc.pNext = NULL; pool_desc.flags = 0; pool_desc.maxSets = 512; - pool_desc.poolSizeCount = device->vk_pool_count; - pool_desc.pPoolSizes = device->vk_pool_sizes; + pool_desc.poolSizeCount = 1; + if (vk_pool_sizes[1].type != vk_pool_sizes[0].type) + ++pool_desc.poolSizeCount; + pool_desc.pPoolSizes = vk_pool_sizes; + if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0) { ERR("Failed to create descriptor pool, vr %d.\n", vr); return VK_NULL_HANDLE; } + + if (!unbounded || descriptor_count < pool_limit) + allocator->vk_pool_sizes[descriptor_type] = min(pool_limit, descriptor_count * 2); }
- if (!(d3d12_command_allocator_add_descriptor_pool(allocator, vk_pool))) + if (!(vkd3d_vk_descriptor_pool_array_push(&allocator->descriptor_pools[descriptor_type], + descriptor_count, vk_pool))) { ERR("Failed to add descriptor pool.\n"); VK_CALL(vkDestroyDescriptorPool(vk_device, vk_pool, NULL)); @@ -1466,9 +1566,9 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( return vk_pool; }
-static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( - struct d3d12_command_allocator *allocator, VkDescriptorSetLayout vk_set_layout, - unsigned int variable_binding_size, bool unbounded) +static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set(struct d3d12_command_allocator *allocator, + enum vkd3d_shader_descriptor_type descriptor_type, unsigned int descriptor_count, + VkDescriptorSetLayout vk_set_layout, unsigned int variable_binding_size, bool unbounded) { struct d3d12_device *device = allocator->device; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; @@ -1478,14 +1578,15 @@ static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( VkDescriptorSet vk_descriptor_set; VkResult vr;
- if (!allocator->vk_descriptor_pool) - allocator->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator); - if (!allocator->vk_descriptor_pool) + if (!allocator->vk_descriptor_pools[descriptor_type]) + allocator->vk_descriptor_pools[descriptor_type] = d3d12_command_allocator_allocate_descriptor_pool(allocator, + descriptor_type, descriptor_count, unbounded); + if (!allocator->vk_descriptor_pools[descriptor_type]) return VK_NULL_HANDLE;
set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; set_desc.pNext = NULL; - set_desc.descriptorPool = allocator->vk_descriptor_pool; + set_desc.descriptorPool = allocator->vk_descriptor_pools[descriptor_type]; set_desc.descriptorSetCount = 1; set_desc.pSetLayouts = &vk_set_layout; if (unbounded) @@ -1499,16 +1600,17 @@ static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) >= 0) return vk_descriptor_set;
- allocator->vk_descriptor_pool = VK_NULL_HANDLE; + allocator->vk_descriptor_pools[descriptor_type] = VK_NULL_HANDLE; if (vr == VK_ERROR_FRAGMENTED_POOL || vr == VK_ERROR_OUT_OF_POOL_MEMORY_KHR) - allocator->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator); - if (!allocator->vk_descriptor_pool) + allocator->vk_descriptor_pools[descriptor_type] = d3d12_command_allocator_allocate_descriptor_pool(allocator, + descriptor_type, descriptor_count, unbounded); + if (!allocator->vk_descriptor_pools[descriptor_type]) { ERR("Failed to allocate descriptor set, vr %d.\n", vr); return VK_NULL_HANDLE; }
- set_desc.descriptorPool = allocator->vk_descriptor_pool; + set_desc.descriptorPool = allocator->vk_descriptor_pools[descriptor_type]; if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) < 0) { FIXME("Failed to allocate descriptor set from a new pool, vr %d.\n", vr); @@ -1534,38 +1636,50 @@ static void vkd3d_buffer_destroy(struct vkd3d_buffer *buffer, struct d3d12_devic VK_CALL(vkDestroyBuffer(device->vk_device, buffer->vk_buffer, NULL)); }
+static void d3d12_command_allocator_reset_descriptor_pool_array(struct d3d12_command_allocator *allocator, + enum vkd3d_shader_descriptor_type type) +{ + struct vkd3d_vk_descriptor_pool_array *array = &allocator->descriptor_pools[type]; + struct d3d12_device *device = allocator->device; + const struct vkd3d_vk_device_procs *vk_procs; + const struct vkd3d_vk_descriptor_pool *pool; + size_t i; + + vk_procs = &device->vk_procs; + for (i = 0; i < array->count; ++i) + { + pool = &array->pools[i]; + if (pool->descriptor_count < allocator->vk_pool_sizes[type] + || !vkd3d_vk_descriptor_pool_array_push_array(&allocator->free_descriptor_pools[type], pool, 1)) + VK_CALL(vkDestroyDescriptorPool(device->vk_device, pool->vk_pool, NULL)); + else + VK_CALL(vkResetDescriptorPool(device->vk_device, pool->vk_pool, 0)); + } + array->count = 0; +} + static void d3d12_command_allocator_free_resources(struct d3d12_command_allocator *allocator, bool keep_reusable_resources) { struct d3d12_device *device = allocator->device; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - unsigned int i, j; + unsigned int i;
- allocator->vk_descriptor_pool = VK_NULL_HANDLE; + memset(allocator->vk_descriptor_pools, 0, sizeof(allocator->vk_descriptor_pools));
if (keep_reusable_resources) { - if (vkd3d_array_reserve((void **)&allocator->free_descriptor_pools, - &allocator->free_descriptor_pools_size, - allocator->free_descriptor_pool_count + allocator->descriptor_pool_count, - sizeof(*allocator->free_descriptor_pools))) + for (i = 0; i < ARRAY_SIZE(allocator->descriptor_pools); ++i) { - for (i = 0, j = allocator->free_descriptor_pool_count; i < allocator->descriptor_pool_count; ++i, ++j) - { - VK_CALL(vkResetDescriptorPool(device->vk_device, allocator->descriptor_pools[i], 0)); - allocator->free_descriptor_pools[j] = allocator->descriptor_pools[i]; - } - allocator->free_descriptor_pool_count += allocator->descriptor_pool_count; - allocator->descriptor_pool_count = 0; + d3d12_command_allocator_reset_descriptor_pool_array(allocator, i); } } else { - for (i = 0; i < allocator->free_descriptor_pool_count; ++i) + for (i = 0; i < ARRAY_SIZE(allocator->free_descriptor_pools); ++i) { - VK_CALL(vkDestroyDescriptorPool(device->vk_device, allocator->free_descriptor_pools[i], NULL)); + vkd3d_vk_descriptor_pool_array_destroy_pools(&allocator->free_descriptor_pools[i], device); } - allocator->free_descriptor_pool_count = 0; }
for (i = 0; i < allocator->transfer_buffer_count; ++i) @@ -1586,11 +1700,10 @@ static void d3d12_command_allocator_free_resources(struct d3d12_command_allocato } allocator->view_count = 0;
- for (i = 0; i < allocator->descriptor_pool_count; ++i) + for (i = 0; i < ARRAY_SIZE(allocator->descriptor_pools); ++i) { - VK_CALL(vkDestroyDescriptorPool(device->vk_device, allocator->descriptor_pools[i], NULL)); + vkd3d_vk_descriptor_pool_array_destroy_pools(&allocator->descriptor_pools[i], device); } - allocator->descriptor_pool_count = 0;
for (i = 0; i < allocator->framebuffer_count; ++i) { @@ -1647,6 +1760,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo { struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); unsigned int refcount = vkd3d_atomic_decrement_u32(&allocator->refcount); + size_t i;
TRACE("%p decreasing refcount to %u.\n", allocator, refcount);
@@ -1664,8 +1778,11 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo vkd3d_free(allocator->transfer_buffers); vkd3d_free(allocator->buffer_views); vkd3d_free(allocator->views); - vkd3d_free(allocator->descriptor_pools); - vkd3d_free(allocator->free_descriptor_pools); + for (i = 0; i < ARRAY_SIZE(allocator->free_descriptor_pools); ++i) + { + vkd3d_vk_descriptor_pool_array_cleanup(&allocator->descriptor_pools[i]); + vkd3d_vk_descriptor_pool_array_cleanup(&allocator->free_descriptor_pools[i]); + } vkd3d_free(allocator->framebuffers); vkd3d_free(allocator->passes);
@@ -1822,6 +1939,7 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo struct vkd3d_queue *queue; VkResult vr; HRESULT hr; + size_t i;
if (FAILED(hr = vkd3d_private_store_init(&allocator->private_store))) return hr; @@ -1851,11 +1969,12 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo return hresult_from_vk_result(vr); }
- allocator->vk_descriptor_pool = VK_NULL_HANDLE; + memset(allocator->vk_descriptor_pools, 0, sizeof(allocator->vk_descriptor_pools));
- allocator->free_descriptor_pools = NULL; - allocator->free_descriptor_pools_size = 0; - allocator->free_descriptor_pool_count = 0; + for (i = 0; i < ARRAY_SIZE(allocator->free_descriptor_pools); ++i) + { + vkd3d_vk_descriptor_pool_array_init(&allocator->free_descriptor_pools[i]); + }
allocator->passes = NULL; allocator->passes_size = 0; @@ -1865,9 +1984,11 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo allocator->framebuffers_size = 0; allocator->framebuffer_count = 0;
- allocator->descriptor_pools = NULL; - allocator->descriptor_pools_size = 0; - allocator->descriptor_pool_count = 0; + for (i = 0; i < ARRAY_SIZE(allocator->descriptor_pools); ++i) + { + vkd3d_vk_descriptor_pool_array_init(&allocator->descriptor_pools[i]); + allocator->vk_pool_sizes[i] = min(VKD3D_INITIAL_DESCRIPTORS_POOL_SIZE, device->vk_pool_limits[i]); + }
allocator->views = NULL; allocator->views_size = 0; @@ -2749,7 +2870,8 @@ static void d3d12_command_list_prepare_descriptors(struct d3d12_command_list *li }
vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator, - layout->vk_layout, variable_binding_size, unbounded_offset != UINT_MAX); + layout->descriptor_type, layout->descriptor_count + variable_binding_size, layout->vk_layout, + variable_binding_size, unbounded_offset != UINT_MAX); bindings->descriptor_sets[bindings->descriptor_set_count++] = vk_descriptor_set; }
@@ -2805,15 +2927,8 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des break; }
- if (range->descriptor_count == UINT_MAX) - { - vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; - vk_descriptor_write->dstBinding = 0; - } - else - { - vk_descriptor_write->dstBinding += use_array ? 1 : range->descriptor_count; - } + vk_descriptor_write->dstSet = vk_descriptor_sets[range->image_set]; + vk_descriptor_write->dstBinding = use_array ? range->image_binding : range->image_binding + index;
vk_image_info->sampler = VK_NULL_HANDLE; vk_image_info->imageView = u.view->v.u.vk_image_view; @@ -2934,10 +3049,11 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list }
static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *vk_descriptor_write, - const struct d3d12_root_parameter *root_parameter, VkDescriptorSet vk_descriptor_set, + const struct d3d12_root_parameter *root_parameter, const VkDescriptorSet *vk_descriptor_sets, VkBufferView *vk_buffer_view, const VkDescriptorBufferInfo *vk_buffer_info) { const struct d3d12_root_descriptor *root_descriptor; + VkDescriptorSet vk_descriptor_set;
switch (root_parameter->parameter_type) { @@ -2956,6 +3072,7 @@ static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *v }
root_descriptor = &root_parameter->u.descriptor; + vk_descriptor_set = vk_descriptor_sets ? vk_descriptor_sets[root_descriptor->set] : VK_NULL_HANDLE;
vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; vk_descriptor_write->pNext = NULL; @@ -3011,7 +3128,7 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list }
if (!vk_write_descriptor_set_from_root_descriptor(&descriptor_writes[descriptor_count], - root_parameter, bindings->descriptor_sets[0], vk_buffer_view, vk_buffer_info)) + root_parameter, bindings->descriptor_sets, vk_buffer_view, vk_buffer_info)) continue;
++descriptor_count; @@ -3039,8 +3156,8 @@ static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_comma uav_counter_count = state->uav_counters.binding_count; if (!(vk_descriptor_writes = vkd3d_calloc(uav_counter_count, sizeof(*vk_descriptor_writes)))) return; - if (!(vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set( - list->allocator, state->uav_counters.vk_set_layout, 0, false))) + if (!(vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator, + VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, uav_counter_count, state->uav_counters.vk_set_layout, 0, false))) goto done;
for (i = 0; i < uav_counter_count; ++i) @@ -4612,8 +4729,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list,
if (vk_info->KHR_push_descriptor) { - vk_write_descriptor_set_from_root_descriptor(&descriptor_write, - root_parameter, VK_NULL_HANDLE, NULL, &buffer_info); + vk_write_descriptor_set_from_root_descriptor(&descriptor_write, root_parameter, NULL, NULL, &buffer_info); VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bindings->vk_bind_point, root_signature->vk_pipeline_layout, 0, 1, &descriptor_write)); } @@ -4621,7 +4737,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, { d3d12_command_list_prepare_descriptors(list, bind_point); vk_write_descriptor_set_from_root_descriptor(&descriptor_write, - root_parameter, bindings->descriptor_sets[0], NULL, &buffer_info); + root_parameter, bindings->descriptor_sets, NULL, &buffer_info); VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL));
VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); @@ -4685,8 +4801,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li
if (vk_info->KHR_push_descriptor) { - vk_write_descriptor_set_from_root_descriptor(&descriptor_write, - root_parameter, VK_NULL_HANDLE, &vk_buffer_view, NULL); + vk_write_descriptor_set_from_root_descriptor(&descriptor_write, root_parameter, NULL, &vk_buffer_view, NULL); VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bindings->vk_bind_point, root_signature->vk_pipeline_layout, 0, 1, &descriptor_write)); } @@ -4694,7 +4809,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li { d3d12_command_list_prepare_descriptors(list, bind_point); vk_write_descriptor_set_from_root_descriptor(&descriptor_write, - root_parameter, bindings->descriptor_sets[0], &vk_buffer_view, NULL); + root_parameter, bindings->descriptor_sets, &vk_buffer_view, NULL); VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL));
VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); @@ -5371,8 +5486,8 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, view->info.texture.vk_view_type, view->format->type, &pipeline); }
- if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set( - list->allocator, pipeline.vk_set_layout, 0, false))) + if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set(list->allocator, + VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, 1, pipeline.vk_set_layout, 0, false))) { ERR("Failed to allocate descriptor set.\n"); return; @@ -6345,6 +6460,7 @@ static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op) break;
case VKD3D_CS_OP_SIGNAL: + case VKD3D_CS_OP_SIGNAL_ON_CPU: d3d12_fence_decref(op->u.signal.fence); break;
@@ -7335,6 +7451,7 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * struct vkd3d_cs_op_data *op; struct d3d12_fence *fence; unsigned int i; + HRESULT hr;
queue->is_flushing = true;
@@ -7368,6 +7485,11 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); break;
+ case VKD3D_CS_OP_SIGNAL_ON_CPU: + if (FAILED(hr = d3d12_fence_Signal(&op->u.signal.fence->ID3D12Fence1_iface, op->u.signal.value))) + ERR("Failed to signal fence %p, hr %s.\n", op->u.signal.fence, debugstr_hresult(hr)); + break; + case VKD3D_CS_OP_EXECUTE: d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); break; @@ -7510,6 +7632,36 @@ void vkd3d_release_vk_queue(ID3D12CommandQueue *queue) return vkd3d_queue_release(d3d12_queue->vkd3d_queue); }
+HRESULT vkd3d_queue_signal_on_cpu(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, uint64_t value) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface); + struct vkd3d_cs_op_data *op; + HRESULT hr = S_OK; + + TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); + + vkd3d_mutex_lock(&command_queue->op_mutex); + + if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) + { + ERR("Failed to add op.\n"); + hr = E_OUTOFMEMORY; + goto done; + } + op->opcode = VKD3D_CS_OP_SIGNAL_ON_CPU; + op->u.signal.fence = fence; + op->u.signal.value = value; + + d3d12_fence_incref(fence); + + d3d12_command_queue_submit_locked(command_queue); + +done: + vkd3d_mutex_unlock(&command_queue->op_mutex); + return hr; +} + /* ID3D12CommandSignature */ static inline struct d3d12_command_signature *impl_from_ID3D12CommandSignature(ID3D12CommandSignature *iface) { diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index e92373a36fa..b51e2963efa 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -1473,16 +1473,21 @@ static void vkd3d_device_vk_heaps_descriptor_limits_init(struct vkd3d_device_des uav_divisor = properties->maxDescriptorSetUpdateAfterBindSampledImages >= (3u << 20) ? 3 : 2; }
- limits->uniform_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers, - properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision); - limits->sampled_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSampledImages, - properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision); - limits->storage_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers, - properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision); - limits->storage_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageImages, - properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision); - limits->sampler_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSamplers, - properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision); + limits->uniform_buffer_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers, + properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision), + VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); + limits->sampled_image_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindSampledImages, + properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision), + VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); + limits->storage_buffer_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers, + properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision), + VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); + limits->storage_image_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindStorageImages, + properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision), + VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); + limits->sampler_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindSamplers, + properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision), + VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); limits->sampler_max_descriptors = min(limits->sampler_max_descriptors, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS); }
@@ -2677,39 +2682,16 @@ static void vkd3d_time_domains_init(struct d3d12_device *device) static void device_init_descriptor_pool_sizes(struct d3d12_device *device) { const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; - VkDescriptorPoolSize *pool_sizes = device->vk_pool_sizes; + unsigned int *pool_sizes = device->vk_pool_limits;
- if (device->use_vk_heaps) - { - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - pool_sizes[0].descriptorCount = min(limits->storage_image_max_descriptors, - VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE); - pool_sizes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - pool_sizes[1].descriptorCount = pool_sizes[0].descriptorCount; - pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLER; - pool_sizes[2].descriptorCount = min(limits->sampler_max_descriptors, D3D12_MAX_LIVE_STATIC_SAMPLERS); - device->vk_pool_count = 3; - return; - } - - VKD3D_ASSERT(ARRAY_SIZE(device->vk_pool_sizes) >= 6); - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, + pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_CBV] = min(limits->uniform_buffer_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); - pool_sizes[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - pool_sizes[1].descriptorCount = min(limits->sampled_image_max_descriptors, + pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_SRV] = min(limits->sampled_image_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); - pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - pool_sizes[2].descriptorCount = pool_sizes[1].descriptorCount; - pool_sizes[3].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - pool_sizes[3].descriptorCount = min(limits->storage_image_max_descriptors, + pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_UAV] = min(limits->storage_image_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); - pool_sizes[4].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - pool_sizes[4].descriptorCount = pool_sizes[3].descriptorCount; - pool_sizes[5].type = VK_DESCRIPTOR_TYPE_SAMPLER; - pool_sizes[5].descriptorCount = min(limits->sampler_max_descriptors, + pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER] = min(limits->sampler_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); - device->vk_pool_count = 6; };
static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) @@ -3461,6 +3443,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 for (i = 0; i < data->NumFeatureLevels; ++i) { D3D_FEATURE_LEVEL fl = data->pFeatureLevelsRequested[i]; + TRACE("Requested feature level %#x.\n", fl); if (data->MaxSupportedFeatureLevel < fl && fl <= vulkan_info->max_feature_level) data->MaxSupportedFeatureLevel = fl; } @@ -3574,12 +3557,6 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 return E_INVALIDARG; }
- if (data->Format == DXGI_FORMAT_UNKNOWN) - { - data->PlaneCount = 1; - return S_OK; - } - if (!(format = vkd3d_get_format(device, data->Format, false))) format = vkd3d_get_format(device, data->Format, true); if (!format) @@ -4385,7 +4362,7 @@ static void d3d12_device_get_resource1_allocation_info(struct d3d12_device *devi { desc = &resource_descs[i];
- if (FAILED(d3d12_resource_validate_desc(desc, device))) + if (FAILED(d3d12_resource_validate_desc(desc, device, 0))) { WARN("Invalid resource desc.\n"); goto invalid; @@ -4716,10 +4693,11 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, uint64_t base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_counts, UINT64 *row_sizes, UINT64 *total_bytes) { - unsigned int i, sub_resource_idx, miplevel_idx, row_count, row_size, row_pitch; + unsigned int i, sub_resource_idx, plane_idx, miplevel_idx, row_count, row_size, row_pitch; unsigned int width, height, depth, plane_count, sub_resources_per_plane; const struct vkd3d_format *format; uint64_t offset, size, total; + DXGI_FORMAT plane_format;
if (layouts) memset(layouts, 0xff, sizeof(*layouts) * sub_resource_count); @@ -4730,20 +4708,19 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, if (total_bytes) *total_bytes = ~(uint64_t)0;
- if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) + if (!(format = vkd3d_get_format(device, desc->Format, true))) { WARN("Invalid format %#x.\n", desc->Format); return; }
- if (FAILED(d3d12_resource_validate_desc(desc, device))) + if (FAILED(d3d12_resource_validate_desc(desc, device, VKD3D_VALIDATE_FORCE_ALLOW_DS))) { WARN("Invalid resource desc.\n"); return; }
- plane_count = ((format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) - && (format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)) ? 2 : 1; + plane_count = format->plane_count; sub_resources_per_plane = d3d12_resource_desc_get_sub_resource_count(desc);
if (!vkd3d_bound_range(first_sub_resource, sub_resource_count, sub_resources_per_plane * plane_count)) @@ -4754,21 +4731,31 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device,
offset = 0; total = 0; + plane_format = desc->Format; for (i = 0; i < sub_resource_count; ++i) { sub_resource_idx = (first_sub_resource + i) % sub_resources_per_plane; + plane_idx = (first_sub_resource + i) / sub_resources_per_plane; miplevel_idx = sub_resource_idx % desc->MipLevels; + + if (plane_count > 1) + { + plane_format = !plane_idx ? DXGI_FORMAT_R32_TYPELESS : DXGI_FORMAT_R8_TYPELESS; + format = vkd3d_get_format(device, plane_format, true); + } + width = align(d3d12_resource_desc_get_width(desc, miplevel_idx), format->block_width); height = align(d3d12_resource_desc_get_height(desc, miplevel_idx), format->block_height); depth = d3d12_resource_desc_get_depth(desc, miplevel_idx); row_count = height / format->block_height; row_size = (width / format->block_width) * format->byte_count * format->block_byte_count; - row_pitch = align(row_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + /* Direct3D 12 requires double the alignment for dual planes. */ + row_pitch = align(row_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * plane_count);
if (layouts) { layouts[i].Offset = base_offset + offset; - layouts[i].Footprint.Format = desc->Format; + layouts[i].Footprint.Format = plane_format; layouts[i].Footprint.Width = width; layouts[i].Footprint.Height = height; layouts[i].Footprint.Depth = depth; @@ -4780,7 +4767,7 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, row_sizes[i] = row_size;
size = max(0, row_count - 1) * row_pitch + row_size; - size = max(0, depth - 1) * align(size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) + size; + size = max(0, depth - 1) * align(size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * plane_count) + size;
total = offset + size; offset = align(total, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c index 1f7d90eb95f..eab97715944 100644 --- a/libs/vkd3d/libs/vkd3d/resource.c +++ b/libs/vkd3d/libs/vkd3d/resource.c @@ -951,7 +951,7 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, HRESULT hr;
VKD3D_ASSERT(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); - VKD3D_ASSERT(d3d12_resource_validate_desc(desc, device) == S_OK); + VKD3D_ASSERT(d3d12_resource_validate_desc(desc, device, 0) == S_OK);
if (!desc->MipLevels) { @@ -1847,7 +1847,7 @@ static bool d3d12_resource_validate_texture_alignment(const D3D12_RESOURCE_DESC1 return true; }
-HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device) +HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device, uint32_t flags) { const D3D12_MIP_REGION *mip_region = &desc->SamplerFeedbackMipRegion; const struct vkd3d_format *format; @@ -1893,7 +1893,8 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3 return E_INVALIDARG; }
- if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) + if (!(format = vkd3d_get_format(device, desc->Format, + desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL || flags & VKD3D_VALIDATE_FORCE_ALLOW_DS))) { WARN("Invalid format %#x.\n", desc->Format); return E_INVALIDARG; @@ -2013,7 +2014,7 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 resource->gpu_address = 0; resource->flags = 0;
- if (FAILED(hr = d3d12_resource_validate_desc(&resource->desc, device))) + if (FAILED(hr = d3d12_resource_validate_desc(&resource->desc, device, 0))) return hr;
resource->format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0); diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index 2b0f81d3812..aa08dc985bd 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -265,25 +265,6 @@ static enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHA } }
-static VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d_shader_descriptor_type type, - bool is_buffer) -{ - switch (type) - { - case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: - return is_buffer ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: - return is_buffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: - return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: - return VK_DESCRIPTOR_TYPE_SAMPLER; - default: - FIXME("Unhandled descriptor range type type %#x.\n", type); - return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - } -} - static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_range_type( D3D12_DESCRIPTOR_RANGE_TYPE type) { @@ -597,8 +578,9 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i goto done; }
- qsort(info->ranges, info->range_count, sizeof(*info->ranges), - d3d12_root_signature_info_range_compare); + if (info->ranges) + qsort(info->ranges, info->range_count, sizeof(*info->ranges), + d3d12_root_signature_info_range_compare);
for (i = D3D12_SHADER_VISIBILITY_VERTEX; i <= D3D12_SHADER_VISIBILITY_MESH; ++i) { @@ -717,6 +699,8 @@ struct vk_binding_array VkDescriptorSetLayoutBinding *bindings; size_t capacity, count;
+ enum vkd3d_shader_descriptor_type descriptor_type; + unsigned int descriptor_set; unsigned int table_index; unsigned int unbounded_offset; VkDescriptorSetLayoutCreateFlags flags; @@ -754,14 +738,24 @@ static bool vk_binding_array_add_binding(struct vk_binding_array *array, return true; }
+static void vk_binding_array_make_unbound(struct vk_binding_array *array, + unsigned int offset, unsigned int table_index) +{ + array->unbounded_offset = offset; + array->table_index = table_index; +} + struct vkd3d_descriptor_set_context { struct vk_binding_array vk_bindings[VKD3D_MAX_DESCRIPTOR_SETS]; + struct vk_binding_array *current_binding_array[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; unsigned int table_index; - unsigned int unbounded_offset; unsigned int descriptor_index; unsigned int uav_counter_index; unsigned int push_constant_index; + + struct vk_binding_array *push_descriptor_set; + bool push_descriptor; };
static void descriptor_set_context_cleanup(struct vkd3d_descriptor_set_context *context) @@ -786,46 +780,66 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns return true; }
-static struct vk_binding_array *d3d12_root_signature_current_vk_binding_array( - struct d3d12_root_signature *root_signature, struct vkd3d_descriptor_set_context *context) +static struct vk_binding_array *d3d12_root_signature_append_vk_binding_array( + struct d3d12_root_signature *root_signature, enum vkd3d_shader_descriptor_type descriptor_type, + VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context) { + struct vk_binding_array *array; + unsigned int set; + if (root_signature->vk_set_count >= ARRAY_SIZE(context->vk_bindings)) return NULL;
- return &context->vk_bindings[root_signature->vk_set_count]; + set = root_signature->vk_set_count++; + array = &context->vk_bindings[set]; + array->descriptor_type = descriptor_type; + array->descriptor_set = set; + array->unbounded_offset = UINT_MAX; + array->flags = flags; + + return array; }
-static void d3d12_root_signature_append_vk_binding_array(struct d3d12_root_signature *root_signature, - VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context) +static struct vk_binding_array *d3d12_root_signature_vk_binding_array_for_type( + struct d3d12_root_signature *root_signature, enum vkd3d_shader_descriptor_type descriptor_type, + struct vkd3d_descriptor_set_context *context) { - struct vk_binding_array *array; + struct vk_binding_array *array, **current;
- if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) || !array->count) - return; + if (context->push_descriptor) + { + if (!context->push_descriptor_set) + context->push_descriptor_set = d3d12_root_signature_append_vk_binding_array(root_signature, + descriptor_type, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, context);
- array->table_index = context->table_index; - array->unbounded_offset = context->unbounded_offset; - array->flags = flags; + return context->push_descriptor_set; + }
- ++root_signature->vk_set_count; + current = context->current_binding_array; + if (!(array = current[descriptor_type])) + { + array = d3d12_root_signature_append_vk_binding_array(root_signature, descriptor_type, 0, context); + current[descriptor_type] = array; + } + + return array; }
static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, - enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, - unsigned int register_idx, bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, - unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context, - const VkSampler *immutable_sampler, unsigned int *binding_idx) + struct vk_binding_array *array, enum vkd3d_shader_descriptor_type descriptor_type, + unsigned int register_space, unsigned int register_idx, bool buffer_descriptor, + enum vkd3d_shader_visibility shader_visibility, unsigned int descriptor_count, + struct vkd3d_descriptor_set_context *context, const VkSampler *immutable_sampler) { struct vkd3d_shader_descriptor_offset *offset = root_signature->descriptor_offsets ? &root_signature->descriptor_offsets[context->descriptor_index] : NULL; struct vkd3d_shader_resource_binding *mapping; - struct vk_binding_array *array; + VkDescriptorType vk_descriptor_type; unsigned int idx;
- if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) - || !(vk_binding_array_add_binding(&context->vk_bindings[root_signature->vk_set_count], - vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor), descriptor_count, - stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx))) + vk_descriptor_type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor); + if (!vk_binding_array_add_binding(array, vk_descriptor_type, descriptor_count, + stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx)) return E_OUTOFMEMORY;
mapping = &root_signature->descriptor_mapping[context->descriptor_index++]; @@ -834,7 +848,7 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur mapping->register_index = register_idx; mapping->shader_visibility = shader_visibility; mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; - mapping->binding.set = root_signature->vk_set_count; + mapping->binding.set = array->descriptor_set; mapping->binding.binding = idx; mapping->binding.count = descriptor_count; if (offset) @@ -843,12 +857,6 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur offset->dynamic_offset_index = ~0u; }
- if (context->unbounded_offset != UINT_MAX) - d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); - - if (binding_idx) - *binding_idx = idx; - return S_OK; }
@@ -911,7 +919,7 @@ static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_ro }
static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_root_signature *root_signature, - const struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility, + struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility, unsigned int vk_binding_array_count, unsigned int bindings_per_range, struct vkd3d_descriptor_set_context *context) { @@ -919,34 +927,49 @@ static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_r bool is_buffer = range->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER; enum vkd3d_shader_descriptor_type descriptor_type = range->type; unsigned int i, register_space = range->register_space; + struct vk_binding_array *array; HRESULT hr;
- if (range->descriptor_count == UINT_MAX) - context->unbounded_offset = range->offset; + if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, descriptor_type, context))) + return E_OUTOFMEMORY;
+ range->set = array->descriptor_set - root_signature->main_set; + range->binding = array->count; for (i = 0; i < bindings_per_range; ++i) { - if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, + if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, descriptor_type, register_space, range->base_register_idx + i, is_buffer, shader_visibility, - vk_binding_array_count, context, NULL, NULL))) + vk_binding_array_count, context, NULL))) return hr; }
- if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + if (range->descriptor_count == UINT_MAX) { - context->unbounded_offset = UINT_MAX; - return S_OK; + vk_binding_array_make_unbound(array, range->offset, context->table_index); + context->current_binding_array[descriptor_type] = NULL; }
+ if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + return S_OK; + + if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, descriptor_type, context))) + return E_OUTOFMEMORY; + + range->image_set = array->descriptor_set - root_signature->main_set; + range->image_binding = array->count; for (i = 0; i < bindings_per_range; ++i) { - if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, + if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, descriptor_type, register_space, range->base_register_idx + i, false, shader_visibility, - vk_binding_array_count, context, NULL, NULL))) + vk_binding_array_count, context, NULL))) return hr; }
- context->unbounded_offset = UINT_MAX; + if (range->descriptor_count == UINT_MAX) + { + vk_binding_array_make_unbound(array, range->offset, context->table_index); + context->current_binding_array[descriptor_type] = NULL; + }
return S_OK; } @@ -1199,16 +1222,16 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
if (use_vk_heaps) { - /* set, binding and vk_binding_count are not used. */ + /* set, binding, image_set, image_binding, and vk_binding_count are not used. */ range->set = 0; range->binding = 0; + range->image_set = 0; + range->image_binding = 0; range->vk_binding_count = 0; d3d12_root_signature_map_descriptor_heap_binding(root_signature, range, shader_visibility, context); continue; }
- range->set = root_signature->vk_set_count - root_signature->main_set; - if (root_signature->use_descriptor_arrays) { if (j && range->type != table->ranges[j - 1].type) @@ -1229,6 +1252,8 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
range->set = base_range->set; range->binding = base_range->binding; + range->image_set = base_range->image_set; + range->image_binding = base_range->image_binding; range->vk_binding_count = base_range->vk_binding_count - rel_offset; d3d12_root_signature_map_descriptor_unbounded_binding(root_signature, range, rel_offset, shader_visibility, context); @@ -1251,8 +1276,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo bindings_per_range = range->descriptor_count; }
- range->binding = context->vk_bindings[root_signature->vk_set_count].count; - if (FAILED(hr = d3d12_root_signature_init_descriptor_table_binding(root_signature, range, p->ShaderVisibility, vk_binding_array_count, bindings_per_range, context))) return hr; @@ -1266,7 +1289,9 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature, const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) { - unsigned int binding, i; + enum vkd3d_shader_descriptor_type descriptor_type; + struct vk_binding_array *array; + unsigned int i; HRESULT hr;
root_signature->push_descriptor_mask = 0; @@ -1281,14 +1306,19 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign
root_signature->push_descriptor_mask |= 1u << i;
- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, - vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType), + descriptor_type = vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType); + if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, descriptor_type, context))) + return E_OUTOFMEMORY; + + root_signature->parameters[i].parameter_type = p->ParameterType; + root_signature->parameters[i].u.descriptor.set = array->descriptor_set; + root_signature->parameters[i].u.descriptor.binding = array->count; + + if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, descriptor_type, p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, true, - vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL, &binding))) + vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL))) return hr;
- root_signature->parameters[i].parameter_type = p->ParameterType; - root_signature->parameters[i].u.descriptor.binding = binding; }
return S_OK; @@ -1298,10 +1328,19 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) { + struct vk_binding_array *array; unsigned int i; HRESULT hr;
VKD3D_ASSERT(root_signature->static_sampler_count == desc->NumStaticSamplers); + + if (!desc->NumStaticSamplers) + return S_OK; + + if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, + VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, context))) + return E_OUTOFMEMORY; + for (i = 0; i < desc->NumStaticSamplers; ++i) { const D3D12_STATIC_SAMPLER_DESC *s = &desc->pStaticSamplers[i]; @@ -1309,16 +1348,13 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i]))) return hr;
- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, + if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, false, vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), 1, context, - &root_signature->static_samplers[i], NULL))) + &root_signature->static_samplers[i]))) return hr; }
- if (device->use_vk_heaps) - d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); - return S_OK; }
@@ -1450,29 +1486,52 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, return S_OK; }
+static HRESULT d3d12_descriptor_set_layout_init(struct d3d12_descriptor_set_layout *layout, + struct d3d12_device *device, const struct vk_binding_array *array) +{ + unsigned int descriptor_count; + bool unbounded; + HRESULT hr; + size_t i; + + descriptor_count = array->unbounded_offset; + if (!(unbounded = descriptor_count != UINT_MAX)) + { + for (i = 0, descriptor_count = 0; i < array->count; ++i) + { + descriptor_count += array->bindings[i].descriptorCount; + } + } + + if (FAILED(hr = vkd3d_create_descriptor_set_layout(device, array->flags, + array->count, unbounded, array->bindings, &layout->vk_layout))) + return hr; + layout->descriptor_type = array->descriptor_type; + layout->descriptor_count = descriptor_count; + layout->unbounded_offset = array->unbounded_offset; + layout->table_index = array->table_index; + + return S_OK; +} + static HRESULT d3d12_root_signature_create_descriptor_set_layouts(struct d3d12_root_signature *root_signature, struct vkd3d_descriptor_set_context *context) { unsigned int i; HRESULT hr;
- d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); - if (!vkd3d_validate_descriptor_set_count(root_signature->device, root_signature->vk_set_count)) return E_INVALIDARG;
for (i = 0; i < root_signature->vk_set_count; ++i) { - struct d3d12_descriptor_set_layout *layout = &root_signature->descriptor_set_layouts[i]; - struct vk_binding_array *array = &context->vk_bindings[i]; + const struct vk_binding_array *array = &context->vk_bindings[i];
VKD3D_ASSERT(array->count);
- if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, array->flags, array->count, - array->unbounded_offset != UINT_MAX, array->bindings, &layout->vk_layout))) + if (FAILED(hr = d3d12_descriptor_set_layout_init(&root_signature->descriptor_set_layouts[i], + root_signature->device, array))) return hr; - layout->unbounded_offset = array->unbounded_offset; - layout->table_index = array->table_index; }
return S_OK; @@ -1518,7 +1577,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa HRESULT hr;
memset(&context, 0, sizeof(context)); - context.unbounded_offset = UINT_MAX;
root_signature->ID3D12RootSignature_iface.lpVtbl = &d3d12_root_signature_vtbl; root_signature->refcount = 1; @@ -1580,17 +1638,11 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa sizeof(*root_signature->static_samplers)))) goto fail;
+ context.push_descriptor = vk_info->KHR_push_descriptor; if (FAILED(hr = d3d12_root_signature_init_root_descriptors(root_signature, desc, &context))) goto fail; - - /* We use KHR_push_descriptor for root descriptor parameters. */ - if (vk_info->KHR_push_descriptor) - { - d3d12_root_signature_append_vk_binding_array(root_signature, - VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, &context); - } - - root_signature->main_set = root_signature->vk_set_count; + root_signature->main_set = !!context.push_descriptor_set; + context.push_descriptor = false;
if (FAILED(hr = d3d12_root_signature_init_push_constants(root_signature, desc, root_signature->push_constant_ranges, &root_signature->push_constant_range_count))) @@ -2281,7 +2333,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device,
const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_15}, {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, {VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE, 0}, {VKD3D_SHADER_COMPILE_OPTION_FEATURE, feature_flags_compile_option(device)}, @@ -2336,7 +2388,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER
const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_15}, {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, };
@@ -3543,7 +3595,6 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s mask |= 1u << e->InputSlot; } graphics->attribute_count = j; - vkd3d_shader_free_shader_signature(&input_signature);
switch (desc->strip_cut_value) { @@ -3609,6 +3660,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s if (FAILED(hr = vkd3d_private_store_init(&state->private_store))) goto fail;
+ vkd3d_shader_free_shader_signature(&input_signature); state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; state->implicit_root_signature = NULL; d3d12_device_add_ref(state->device = device); @@ -4015,7 +4067,7 @@ static int compile_hlsl_cs(const struct vkd3d_shader_code *hlsl, struct vkd3d_sh
static const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_15}, };
info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c index 839bb173854..c2832a61f67 100644 --- a/libs/vkd3d/libs/vkd3d/utils.c +++ b/libs/vkd3d/libs/vkd3d/utils.c @@ -29,7 +29,7 @@ #define UINT VKD3D_FORMAT_TYPE_UINT static const struct vkd3d_format vkd3d_formats[] = { - {DXGI_FORMAT_UNKNOWN, VK_FORMAT_UNDEFINED, 1, 1, 1, 1}, + {DXGI_FORMAT_UNKNOWN, VK_FORMAT_UNDEFINED, 1, 1, 1, 1, 0, 1}, {DXGI_FORMAT_R32G32B32A32_TYPELESS, VK_FORMAT_R32G32B32A32_SFLOAT, 16, 1, 1, 1, COLOR, 1, TYPELESS}, {DXGI_FORMAT_R32G32B32A32_FLOAT, VK_FORMAT_R32G32B32A32_SFLOAT, 16, 1, 1, 1, COLOR, 1}, {DXGI_FORMAT_R32G32B32A32_UINT, VK_FORMAT_R32G32B32A32_UINT, 16, 1, 1, 1, COLOR, 1, UINT}, diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index 97a99782d6a..fd1fbb1679a 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -58,12 +58,19 @@ #define VKD3D_MAX_VK_SYNC_OBJECTS 4u #define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u #define VKD3D_MAX_DESCRIPTOR_SETS 64u +/* Direct3D 12 binding tier 3 has a limit of "1,000,000+" CBVs, SRVs and UAVs. + * I am not sure what the "+" is supposed to mean: it probably hints that + * implementations may have an even higher limit, but that's pretty obvious, + * that table is for guaranteed minimum limits. */ +#define VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS 1000000u /* D3D12 binding tier 3 has a limit of 2048 samplers. */ #define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u -/* The main limitation here is the simple descriptor pool recycling scheme - * requiring each pool to contain all descriptor types used by vkd3d. Limit - * this number to prevent excessive pool memory use. */ #define VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE (16 * 1024u) +#define VKD3D_INITIAL_DESCRIPTORS_POOL_SIZE 1024u + +#define VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT (VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER + 1) + +#define VKD3D_VALIDATE_FORCE_ALLOW_DS 0x1u
extern uint64_t object_global_serial_id;
@@ -235,8 +242,6 @@ struct vkd3d_fence_worker struct vkd3d_waiting_fence *fences; size_t fences_size;
- void (*wait_for_gpu_fence)(struct vkd3d_fence_worker *worker, const struct vkd3d_waiting_fence *enqueued_fence); - struct vkd3d_queue *queue; struct d3d12_device *device; }; @@ -529,7 +534,7 @@ struct vkd3d_resource_allocation_info };
bool d3d12_resource_is_cpu_accessible(const struct d3d12_resource *resource); -HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device); +HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device, uint32_t flags); void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, @@ -770,6 +775,25 @@ static inline struct d3d12_dsv_desc *d3d12_dsv_desc_from_cpu_handle(D3D12_CPU_DE void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_device *device, struct d3d12_resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc);
+static inline VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d_shader_descriptor_type type, + bool is_buffer) +{ + switch (type) + { + case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: + return is_buffer ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: + return is_buffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: + return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: + return VK_DESCRIPTOR_TYPE_SAMPLER; + default: + FIXME("Unhandled descriptor range type type %#x.\n", type); + return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + } +} + enum vkd3d_vk_descriptor_set_index { VKD3D_SET_INDEX_SAMPLER, @@ -899,6 +923,8 @@ struct d3d12_root_descriptor_table_range unsigned int vk_binding_count; uint32_t set; uint32_t binding; + uint32_t image_set; + uint32_t image_binding;
enum vkd3d_shader_descriptor_type type; uint32_t descriptor_magic; @@ -920,6 +946,7 @@ struct d3d12_root_constant
struct d3d12_root_descriptor { + uint32_t set; uint32_t binding; };
@@ -936,7 +963,9 @@ struct d3d12_root_parameter
struct d3d12_descriptor_set_layout { + enum vkd3d_shader_descriptor_type descriptor_type; VkDescriptorSetLayout vk_layout; + unsigned int descriptor_count; unsigned int unbounded_offset; unsigned int table_index; }; @@ -1135,6 +1164,18 @@ struct vkd3d_buffer VkDeviceMemory vk_memory; };
+struct vkd3d_vk_descriptor_pool +{ + unsigned int descriptor_count; + VkDescriptorPool vk_pool; +}; + +struct vkd3d_vk_descriptor_pool_array +{ + struct vkd3d_vk_descriptor_pool *pools; + size_t capacity, count; +}; + /* ID3D12CommandAllocator */ struct d3d12_command_allocator { @@ -1146,11 +1187,9 @@ struct d3d12_command_allocator
VkCommandPool vk_command_pool;
- VkDescriptorPool vk_descriptor_pool; + VkDescriptorPool vk_descriptor_pools[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT];
- VkDescriptorPool *free_descriptor_pools; - size_t free_descriptor_pools_size; - size_t free_descriptor_pool_count; + struct vkd3d_vk_descriptor_pool_array free_descriptor_pools[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT];
VkRenderPass *passes; size_t passes_size; @@ -1160,9 +1199,8 @@ struct d3d12_command_allocator size_t framebuffers_size; size_t framebuffer_count;
- VkDescriptorPool *descriptor_pools; - size_t descriptor_pools_size; - size_t descriptor_pool_count; + struct vkd3d_vk_descriptor_pool_array descriptor_pools[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; + unsigned int vk_pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT];
struct vkd3d_view **views; size_t views_size; @@ -1324,6 +1362,7 @@ enum vkd3d_cs_op { VKD3D_CS_OP_WAIT, VKD3D_CS_OP_SIGNAL, + VKD3D_CS_OP_SIGNAL_ON_CPU, VKD3D_CS_OP_EXECUTE, VKD3D_CS_OP_UPDATE_MAPPINGS, VKD3D_CS_OP_COPY_MAPPINGS, @@ -1516,8 +1555,6 @@ struct vkd3d_desc_object_cache size_t size; };
-#define VKD3D_DESCRIPTOR_POOL_COUNT 6 - /* ID3D12Device */ struct d3d12_device { @@ -1536,8 +1573,7 @@ struct d3d12_device struct vkd3d_desc_object_cache view_desc_cache; struct vkd3d_desc_object_cache cbuffer_desc_cache;
- VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; - unsigned int vk_pool_count; + unsigned int vk_pool_limits[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; bool use_vk_heaps;
This merge request was approved by Matteo Bruni.
This merge request was approved by Elizabeth Figura.