This currently fails if the shader loads from the UAV, because it causes vkd3d-shader to specify the R32f format instead of Unknown.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com
-- v2: vkd3d: Send typed UAV unknown format read support info to vkd3d-shader. vkd3d: Check specific formats for typed UAV load feature support. vkd3d-shader: Introduce a compile option to use Unknown format for typed UAV loads. tests: Test float4 UAV buffer load/store.
From: Conor McCarthy cmccarthy@codeweavers.com
This currently fails if the shader loads from the UAV, because it causes vkd3d-shader to specify the R32f format instead of Unknown.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- tests/d3d12.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+)
diff --git a/tests/d3d12.c b/tests/d3d12.c index ec5cfb20..7a0e33a4 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -20947,6 +20947,57 @@ static void test_typed_buffer_uav(void) 0x00000000, 0x0002100a, 0x00004001, 0x00000020, 0x0002400a, 0x0a0000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00004002, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x0100003e, }; + static const DWORD cs_vec4_code[] = + { +#if 0 + RWBuffer<float4> buffer; + + [numthreads(8, 1, 1)] + void main(uint3 group_id : SV_groupID, uint group_index : SV_GroupIndex) + { + uint global_index = 8 * group_id.x + group_index; + buffer[global_index] = float4(0.5f, 0.625f, 0.75f, 1.0f); + } +#endif + 0x43425844, 0x37e9fa91, 0x11ca38f4, 0x9d4a70b7, 0x4fd05c45, 0x00000001, 0x000000e0, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x0000008c, 0x00050050, 0x00000023, 0x0100086a, + 0x0400089c, 0x0011e000, 0x00000000, 0x00005555, 0x0200005f, 0x00024000, 0x0200005f, 0x00021012, + 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000001, 0x00000001, 0x07000023, 0x00100012, + 0x00000000, 0x0002100a, 0x00004001, 0x00000008, 0x0002400a, 0x0a0000a4, 0x0011e0f2, 0x00000000, + 0x00100006, 0x00000000, 0x00004002, 0x3f000000, 0x3f200000, 0x3f400000, 0x3f800000, 0x0100003e, + }; + static const DWORD cs_vec4_load_code[] = + { + /* Compiled with /Od */ +#if 0 + RWBuffer<float4> buffer; + + [numthreads(4, 1, 1)] + void main(uint3 group_id : SV_groupID, uint group_index : SV_GroupIndex) + { + uint global_index = 8 * group_id.x + group_index * 2u; + buffer[global_index] = float4(0.625f, 0.5f, 1.0f, 0.75f); + buffer[global_index + 1] = buffer[global_index]; + } +#endif + 0x43425844, 0xe4c1718b, 0xd2cd85b0, 0x06a8d73e, 0x2815a795, 0x00000001, 0x000001b0, 0x00000004, + 0x00000030, 0x00000040, 0x00000050, 0x000001a0, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, + 0x4e47534f, 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000148, 0x00050050, 0x00000052, + 0x0100886a, 0x0400089c, 0x0011e000, 0x00000000, 0x00005555, 0x0200005f, 0x00024000, 0x0200005f, + 0x00021012, 0x02000068, 0x00000002, 0x0400009b, 0x00000004, 0x00000001, 0x00000001, 0x05000036, + 0x00100012, 0x00000000, 0x00004001, 0x00000008, 0x07000026, 0x0000d000, 0x00100012, 0x00000000, + 0x0010000a, 0x00000000, 0x0002100a, 0x07000026, 0x0000d000, 0x00100022, 0x00000000, 0x0002400a, + 0x00004001, 0x00000002, 0x0700001e, 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, + 0x00000000, 0x0a0000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00004002, 0x3f200000, + 0x3f000000, 0x3f800000, 0x3f400000, 0x05000036, 0x00100022, 0x00000000, 0x00004001, 0x00000001, + 0x0700001e, 0x00100022, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x890000a3, + 0x80000042, 0x00155543, 0x001000f2, 0x00000001, 0x00100006, 0x00000000, 0x0011ee46, 0x00000000, + 0x070000a4, 0x0011e0f2, 0x00000000, 0x00100556, 0x00000000, 0x00100e46, 0x00000001, 0x0100003e, + 0x30494653, 0x00000008, 0x00000800, 0x00000000, + }; + static const struct vec4 expected = {0.5f, 0.625f, 0.75f, 1.0f}; + static const struct vec4 expected_ld = {0.625f, 0.5f, 1.0f, 0.75f};
if (!init_compute_test_context(&context)) return; @@ -21004,6 +21055,48 @@ static void test_typed_buffer_uav(void) check_readback_data_float(&rb.rb, NULL, 0.5f, 0); release_resource_readback(&rb);
+ reset_command_list(command_list, context.allocator); + transition_sub_resource_state(command_list, resource, 0, + D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + ID3D12PipelineState_Release(pipeline_state); + pipeline_state = create_compute_pipeline_state(device, root_signature, + shader_bytecode(cs_vec4_code, sizeof(cs_vec4_code))); + + uav_desc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + uav_desc.Buffer.NumElements = 16; + ID3D12Device_CreateUnorderedAccessView(device, resource, NULL, &uav_desc, cpu_descriptor_handle); + + ID3D12GraphicsCommandList_SetPipelineState(command_list, pipeline_state); + ID3D12GraphicsCommandList_SetComputeRootSignature(command_list, root_signature); + ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(command_list, 0, gpu_descriptor_handle); + ID3D12GraphicsCommandList_Dispatch(command_list, 2, 1, 1); + + transition_sub_resource_state(command_list, resource, 0, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + + get_buffer_readback_with_command_list(resource, uav_desc.Format, &rb, queue, command_list); + check_readback_data_vec4(&rb.rb, NULL, &expected, 0); + release_resource_readback(&rb); + + reset_command_list(command_list, context.allocator); + transition_sub_resource_state(command_list, resource, 0, + D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + ID3D12PipelineState_Release(pipeline_state); + pipeline_state = create_compute_pipeline_state(device, root_signature, + shader_bytecode(cs_vec4_load_code, sizeof(cs_vec4_load_code))); + + ID3D12GraphicsCommandList_SetPipelineState(command_list, pipeline_state); + ID3D12GraphicsCommandList_SetComputeRootSignature(command_list, root_signature); + ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(command_list, 0, gpu_descriptor_handle); + ID3D12GraphicsCommandList_Dispatch(command_list, 2, 1, 1); + + transition_sub_resource_state(command_list, resource, 0, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + + get_buffer_readback_with_command_list(resource, uav_desc.Format, &rb, queue, command_list); + todo check_readback_data_vec4(&rb.rb, NULL, &expected_ld, 0); + release_resource_readback(&rb); + ID3D12Resource_Release(resource); ID3D12RootSignature_Release(root_signature); ID3D12PipelineState_Release(pipeline_state);
From: Conor McCarthy cmccarthy@codeweavers.com
Specifying R32 for UAVs created with a vector format, e.g. R32G32B32A32_FLOAT, results in only the red being loaded/stored, potentially causing images to contain only the red component.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- include/vkd3d_shader.h | 18 ++++++++++++++++++ libs/vkd3d-shader/spirv.c | 16 +++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/include/vkd3d_shader.h b/include/vkd3d_shader.h index ebddbba7..fc42fef4 100644 --- a/include/vkd3d_shader.h +++ b/include/vkd3d_shader.h @@ -99,6 +99,22 @@ enum vkd3d_shader_compile_option_buffer_uav VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_BUFFER_UAV), };
+/** + * Determines how typed UAVs are declared. + */ +enum vkd3d_shader_compile_option_typed_uav +{ + /** Use R32(u)i/R32f format for UAVs which are read from. This is the default value. */ + VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV_READ_FORMAT_R32 = 0x00000000, + /** + * Use Unknown format for UAVs which are read from. This should only be set if + * shaderStorageImageReadWithoutFormat is enabled in the target environment. + */ + VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV_READ_FORMAT_UNKNOWN = 0x00000001, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV), +}; + enum vkd3d_shader_compile_option_formatting_flags { VKD3D_SHADER_COMPILE_OPTION_FORMATTING_NONE = 0x00000000, @@ -127,6 +143,8 @@ enum vkd3d_shader_compile_option_name VKD3D_SHADER_COMPILE_OPTION_FORMATTING = 0x00000003, /** \a value is a member of enum vkd3d_shader_api_version. \since 1.3 */ VKD3D_SHADER_COMPILE_OPTION_API_VERSION = 0x00000004, + /** \a value is a member of enum vkd3d_shader_compile_option_typed_uav. \since 1.5 */ + VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV = 0x00000005,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), }; diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index c519a2ba..a3ad02aa 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -2228,6 +2228,7 @@ struct vkd3d_dxbc_compiler
bool strip_debug; bool ssbo_uavs; + bool uav_read_without_format;
struct rb_tree symbol_table; uint32_t temp_id; @@ -2379,6 +2380,15 @@ struct vkd3d_dxbc_compiler *vkd3d_dxbc_compiler_create(const struct vkd3d_shader
case VKD3D_SHADER_COMPILE_OPTION_API_VERSION: break; + + case VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV: + if (option->value == VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV_READ_FORMAT_R32) + compiler->uav_read_without_format = false; + else if (option->value == VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV_READ_FORMAT_UNKNOWN) + compiler->uav_read_without_format = true; + else + WARN("Ignoring unrecognised value %#x for option %#x.\n", option->value, option->name); + break; } }
@@ -5856,14 +5866,18 @@ static uint32_t vkd3d_dxbc_compiler_get_image_type_id(struct vkd3d_dxbc_compiler const struct vkd3d_shader_descriptor_info *d; uint32_t sampled_type_id; SpvImageFormat format; + bool uav_read;
format = SpvImageFormatUnknown; if (reg->type == VKD3DSPR_UAV) { d = vkd3d_dxbc_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); - if (raw_structured || (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) + uav_read = !!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); + if (raw_structured || (uav_read && !compiler->uav_read_without_format)) format = image_format_for_image_read(data_type); + else if (uav_read) + vkd3d_spirv_enable_capability(builder, SpvCapabilityStorageImageReadWithoutFormat); }
sampled_type_id = vkd3d_spirv_get_type_id(builder, data_type, 1);
From: Conor McCarthy cmccarthy@codeweavers.com
Vulkan's shaderStorageImageExtendedFormats includes more formats than are required by D3D12.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/device.c | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index eaedc444..ec6bb57c 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -1391,6 +1391,45 @@ static void vkd3d_device_vk_heaps_descriptor_limits_init(struct vkd3d_device_des limits->sampler_max_descriptors = min(limits->sampler_max_descriptors, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS); }
+static bool d3d12_device_supports_typed_uav_load_additional_formats(const struct d3d12_device *device) +{ + const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; + const struct vkd3d_format *format; + VkFormatProperties properties; + unsigned int i; + + static const DXGI_FORMAT additional_formats[] = + { + DXGI_FORMAT_R32G32B32A32_FLOAT, + DXGI_FORMAT_R32G32B32A32_UINT, + DXGI_FORMAT_R32G32B32A32_SINT, + DXGI_FORMAT_R16G16B16A16_FLOAT, + DXGI_FORMAT_R16G16B16A16_UINT, + DXGI_FORMAT_R16G16B16A16_SINT, + DXGI_FORMAT_R8G8B8A8_UNORM, + DXGI_FORMAT_R8G8B8A8_UINT, + DXGI_FORMAT_R8G8B8A8_SINT, + DXGI_FORMAT_R16_FLOAT, + DXGI_FORMAT_R16_UINT, + DXGI_FORMAT_R16_SINT, + DXGI_FORMAT_R8_UNORM, + DXGI_FORMAT_R8_UINT, + DXGI_FORMAT_R8_SINT, + }; + + for (i = 0; i < ARRAY_SIZE(additional_formats); ++i) + { + format = vkd3d_get_format(device, additional_formats[i], false); + assert(format); + + VK_CALL(vkGetPhysicalDeviceFormatProperties(device->vk_physical_device, format->vk_format, &properties)); + if (!((properties.linearTilingFeatures | properties.optimalTilingFeatures) & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) + return false; + } + + return true; +} + static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, const struct vkd3d_device_create_info *create_info, struct vkd3d_physical_device_info *physical_device_info, @@ -1455,7 +1494,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, else device->feature_options.ResourceBindingTier = D3D12_RESOURCE_BINDING_TIER_3;
- device->feature_options.TypedUAVLoadAdditionalFormats = features->shaderStorageImageExtendedFormats; + device->feature_options.TypedUAVLoadAdditionalFormats = features->shaderStorageImageReadWithoutFormat + && d3d12_device_supports_typed_uav_load_additional_formats(device); /* GL_INTEL_fragment_shader_ordering, no Vulkan equivalent. */ device->feature_options.ROVsSupported = FALSE; /* GL_INTEL_conservative_rasterization, no Vulkan equivalent. */
From: Conor McCarthy cmccarthy@codeweavers.com
Fixes reflections in Control appearing with only their red component.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=52146 Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/device.c | 1 + libs/vkd3d/state.c | 17 +++++++++++++---- libs/vkd3d/vkd3d_private.h | 2 ++ tests/d3d12.c | 18 +++++++++++++++++- 4 files changed, 33 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index ec6bb57c..1c29bdc1 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -1464,6 +1464,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; + vulkan_info->uav_read_without_format = features->shaderStorageImageReadWithoutFormat; vulkan_info->max_vertex_attrib_divisor = max(physical_device_info->vertex_divisor_properties.maxVertexAttribDivisor, 1);
device->feature_options.DoublePrecisionFloatShaderOps = features->shaderFloat64; diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 7a29ade8..895a5a2e 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1944,6 +1944,13 @@ struct d3d12_pipeline_state *unsafe_impl_from_ID3D12PipelineState(ID3D12Pipeline return impl_from_ID3D12PipelineState(iface); }
+static inline unsigned int typed_uav_compile_option(const struct d3d12_device *device) +{ + return device->vk_info.uav_read_without_format + ? VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV_READ_FORMAT_UNKNOWN + : VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV_READ_FORMAT_R32; +} + static HRESULT create_shader_stage(struct d3d12_device *device, struct VkPipelineShaderStageCreateInfo *stage_desc, enum VkShaderStageFlagBits stage, const D3D12_SHADER_BYTECODE *code, const struct vkd3d_shader_interface_info *shader_interface) @@ -1955,9 +1962,10 @@ static HRESULT create_shader_stage(struct d3d12_device *device, VkResult vr; int ret;
- static const struct vkd3d_shader_compile_option options[] = + const struct vkd3d_shader_compile_option options[] = { {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_4}, + {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, };
stage_desc->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; @@ -2001,14 +2009,15 @@ static HRESULT create_shader_stage(struct d3d12_device *device, return S_OK; }
-static int vkd3d_scan_dxbc(const D3D12_SHADER_BYTECODE *code, +static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER_BYTECODE *code, struct vkd3d_shader_scan_descriptor_info *descriptor_info) { struct vkd3d_shader_compile_info compile_info;
- static const struct vkd3d_shader_compile_option options[] = + const struct vkd3d_shader_compile_option options[] = { {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_4}, + {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, };
compile_info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; @@ -2170,7 +2179,7 @@ static HRESULT d3d12_pipeline_state_find_and_init_uav_counters(struct d3d12_pipe
shader_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; shader_info.next = NULL; - if ((ret = vkd3d_scan_dxbc(code, &shader_info)) < 0) + if ((ret = vkd3d_scan_dxbc(device, code, &shader_info)) < 0) { WARN("Failed to scan shader bytecode, stage %#x, vkd3d result %d.\n", stage_flags, ret); return hresult_from_vkd3d_result(ret); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index f00181a2..9976fe58 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -143,6 +143,8 @@ struct vkd3d_vulkan_info bool rasterization_stream; bool transform_feedback_queries;
+ bool uav_read_without_format; + bool vertex_attrib_zero_divisor; unsigned int max_vertex_attrib_divisor;
diff --git a/tests/d3d12.c b/tests/d3d12.c index 7a0e33a4..9fc199bc 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -590,6 +590,21 @@ static bool is_stencil_ref_export_supported(ID3D12Device *device) return options.PSSpecifiedStencilRefSupported; }
+static bool are_typed_uav_load_additional_formats_supported(ID3D12Device *device) +{ + D3D12_FEATURE_DATA_D3D12_OPTIONS options; + HRESULT hr; + + if (FAILED(hr = ID3D12Device_CheckFeatureSupport(device, + D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)))) + { + trace("Failed to check feature support, hr %#x.\n", hr); + return false; + } + + return options.TypedUAVLoadAdditionalFormats; +} + #define create_cb_root_signature(a, b, c, e) create_cb_root_signature_(__LINE__, a, b, c, e) static ID3D12RootSignature *create_cb_root_signature_(unsigned int line, ID3D12Device *device, unsigned int reg_idx, D3D12_SHADER_VISIBILITY shader_visibility, @@ -21094,7 +21109,8 @@ static void test_typed_buffer_uav(void) D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
get_buffer_readback_with_command_list(resource, uav_desc.Format, &rb, queue, command_list); - todo check_readback_data_vec4(&rb.rb, NULL, &expected_ld, 0); + todo_if(!are_typed_uav_load_additional_formats_supported(device)) + check_readback_data_vec4(&rb.rb, NULL, &expected_ld, 0); release_resource_readback(&rb);
ID3D12Resource_Release(resource);
This merge request was approved by Henri Verbeet.