Based on the vkd3d implementation.
Signed-off-by: Jan Sikorski jsikorski@codeweavers.com --- dlls/wined3d/adapter_vk.c | 2 + dlls/wined3d/uav_clear_shaders.inc.c | 365 +++++++++++++++++++++++++++ dlls/wined3d/view.c | 217 +++++++++++++--- dlls/wined3d/wined3d_private.h | 22 ++ 4 files changed, 571 insertions(+), 35 deletions(-) create mode 100644 dlls/wined3d/uav_clear_shaders.inc.c
diff --git a/dlls/wined3d/adapter_vk.c b/dlls/wined3d/adapter_vk.c index c7da02865ea..7d96fd59eab 100644 --- a/dlls/wined3d/adapter_vk.c +++ b/dlls/wined3d/adapter_vk.c @@ -719,6 +719,7 @@ static HRESULT adapter_vk_init_3d(struct wined3d_device *device) wined3d_device_create_default_samplers(device, &context_vk->c); wined3d_device_vk_create_null_resources(device_vk, context_vk); wined3d_device_vk_create_null_views(device_vk, context_vk); + wined3d_device_vk_uav_clear_state_init(device_vk);
return WINED3D_OK; } @@ -740,6 +741,7 @@ static void adapter_vk_uninit_3d_cs(void *object) device->shader_backend->shader_destroy(shader); }
+ wined3d_device_vk_uav_clear_state_cleanup(device_vk); device->blitter->ops->blitter_destroy(device->blitter, NULL); device->shader_backend->shader_free_private(device, &context_vk->c); wined3d_device_vk_destroy_null_views(device_vk, context_vk); diff --git a/dlls/wined3d/uav_clear_shaders.inc.c b/dlls/wined3d/uav_clear_shaders.inc.c new file mode 100644 index 00000000000..6cb3c808578 --- /dev/null +++ b/dlls/wined3d/uav_clear_shaders.inc.c @@ -0,0 +1,365 @@ +static const uint32_t cs_uav_clear_buffer_float_code[] = +{ +#if 0 + RWBuffer<float4> dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(128, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0xe114ba61, 0xff6a0d0b, 0x7b25c8f4, 0xfcf7cf22, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400089c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000080, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_buffer_uint_code[] = +{ +#if 0 + RWBuffer<uint4> dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(128, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0x3afd0cfd, 0x5145c166, 0x5b9f76b8, 0xa73775cd, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400089c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000080, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_array_float_code[] = +{ +#if 0 + RWTexture1DArray<float4> dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[int2(u_info.dst_offset.x + thread_id.x, thread_id.y)] = u_info.clear_value; + } +#endif + 0x43425844, 0x3d73bc2d, 0x2b635f3d, 0x6bf98e92, 0xbe0aa5d9, 0x00000001, 0x0000011c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000c8, 0x00050050, 0x00000032, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400389c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x04000036, 0x001000e2, 0x00000000, 0x00020556, 0x080000a4, 0x0011e0f2, 0x00000000, + 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_array_uint_code[] = +{ +#if 0 + RWTexture1DArray<uint4> dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[int2(u_info.dst_offset.x + thread_id.x, thread_id.y)] = u_info.clear_value; + } +#endif + 0x43425844, 0x2f0ca457, 0x72068b34, 0xd9dadc2b, 0xd3178c3e, 0x00000001, 0x0000011c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000c8, 0x00050050, 0x00000032, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400389c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x04000036, 0x001000e2, 0x00000000, 0x00020556, 0x080000a4, 0x0011e0f2, 0x00000000, + 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_float_code[] = +{ +#if 0 + RWTexture1D<float4> dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0x05266503, 0x4b97006f, 0x01a5cc63, 0xe617d0a1, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400109c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_uint_code[] = +{ +#if 0 + RWTexture1D<uint4> dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0x19d5c8f2, 0x3ca4ac24, 0x9e258499, 0xf0463fd6, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400109c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_array_float_code[] = +{ +#if 0 + RWTexture2DArray<float4> dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy + thread_id.xy, thread_id.z)] = u_info.clear_value; + } +#endif + 0x43425844, 0x924d2d2c, 0xb9166376, 0x99f83871, 0x8ef65025, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400409c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_array_uint_code[] = +{ +#if 0 + RWTexture2DArray<uint4> dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy + thread_id.xy, thread_id.z)] = u_info.clear_value; + } +#endif + 0x43425844, 0xa92219d4, 0xa2c5e47d, 0x0d308500, 0xf32197b4, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400409c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_float_code[] = +{ +#if 0 + RWTexture2D<float4> dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[u_info.dst_offset.xy + thread_id.xy] = u_info.clear_value; + } +#endif + 0x43425844, 0x6e735b3f, 0x7348c4fa, 0xb3634e42, 0x50e2d99b, 0x00000001, 0x00000128, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000d4, 0x00050050, 0x00000035, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400189c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x001000f2, 0x00000000, 0x00020546, 0x00208546, 0x00000000, 0x00000001, + 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, + 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_uint_code[] = +{ +#if 0 + RWTexture2D<uint4> dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[u_info.dst_offset.xy + thread_id.xy] = u_info.clear_value; + } +#endif + 0x43425844, 0xf01db5dd, 0xc7dc5e55, 0xb017c1a8, 0x55abd52d, 0x00000001, 0x00000128, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000d4, 0x00050050, 0x00000035, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400189c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x001000f2, 0x00000000, 0x00020546, 0x00208546, 0x00000000, 0x00000001, + 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, + 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_3d_float_code[] = +{ +#if 0 + RWTexture3D<float4> dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy, 0) + thread_id.xyz] = u_info.clear_value; + } +#endif + 0x43425844, 0x5d8f36a0, 0x30fa86a5, 0xfec7f2ef, 0xdfd76cbb, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400289c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_3d_uint_code[] = +{ +#if 0 + RWTexture3D<uint4> dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy, 0) + thread_id.xyz] = u_info.clear_value; + } +#endif + 0x43425844, 0x5b9c95b1, 0xc9bde4e3, 0x9aaff806, 0x24a1d264, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400289c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; diff --git a/dlls/wined3d/view.c b/dlls/wined3d/view.c index 665661e39b1..b1dc33fd1e8 100644 --- a/dlls/wined3d/view.c +++ b/dlls/wined3d/view.c @@ -1726,67 +1726,214 @@ HRESULT wined3d_unordered_access_view_gl_init(struct wined3d_unordered_access_vi return hr; }
+struct wined3d_uav_clear_constants_vk +{ + VkClearColorValue color; + VkOffset2D offset; + VkExtent2D extent; +}; + +static void STDMETHODCALLTYPE wined3d_uav_clear_object_destroyed(void *parent) +{ +} + +static struct wined3d_parent_ops wined3d_uav_clear_ops = +{ + wined3d_uav_clear_object_destroyed +}; + +static bool create_shader(struct wined3d_device *device, const uint32_t *byte_code, size_t byte_code_size, + struct wined3d_shader **shader) +{ + struct wined3d_shader_desc shader_desc; + HRESULT result; + + shader_desc.byte_code = byte_code; + shader_desc.byte_code_size = byte_code_size; + + result = wined3d_shader_create_cs(device, &shader_desc, NULL, &wined3d_uav_clear_ops, shader); + if (FAILED(result)) + WARN("Failed to initialize shader: %#x\n", result); + + return SUCCEEDED(result); +} + +#include "uav_clear_shaders.inc.c" + +void wined3d_device_vk_uav_clear_state_init(struct wined3d_device_vk *device_vk) +{ + struct wined3d_context_vk *context_vk = &device_vk->context_vk; + struct wined3d_device *device = &device_vk->d; + struct wined3d_uav_clear_state_vk *state = &device_vk->uav_clear_state; + + create_shader(device, cs_uav_clear_buffer_float_code, sizeof(cs_uav_clear_buffer_float_code), + &state->float_shaders.buffer); + create_shader(device, cs_uav_clear_buffer_uint_code, sizeof(cs_uav_clear_buffer_uint_code), + &state->uint_shaders.buffer); + create_shader(device, cs_uav_clear_1d_array_float_code, sizeof(cs_uav_clear_1d_array_float_code), + &state->float_shaders.image_1d); + create_shader(device, cs_uav_clear_1d_array_uint_code, sizeof(cs_uav_clear_1d_array_uint_code), + &state->uint_shaders.image_1d); + create_shader(device, cs_uav_clear_1d_float_code, sizeof(cs_uav_clear_1d_float_code), + &state->float_shaders.image_1d_array); + create_shader(device, cs_uav_clear_1d_uint_code, sizeof(cs_uav_clear_1d_uint_code), + &state->uint_shaders.image_1d_array); + create_shader(device, cs_uav_clear_2d_float_code, sizeof(cs_uav_clear_2d_float_code), + &state->float_shaders.image_2d); + create_shader(device, cs_uav_clear_2d_uint_code, sizeof(cs_uav_clear_2d_uint_code), + &state->uint_shaders.image_2d); + create_shader(device, cs_uav_clear_2d_array_float_code, sizeof(cs_uav_clear_2d_array_float_code), + &state->float_shaders.image_2d_array); + create_shader(device, cs_uav_clear_2d_array_uint_code, sizeof(cs_uav_clear_2d_array_uint_code), + &state->uint_shaders.image_2d_array); + create_shader(device, cs_uav_clear_3d_float_code, sizeof(cs_uav_clear_3d_float_code), + &state->float_shaders.image_3d); + create_shader(device, cs_uav_clear_3d_uint_code, sizeof(cs_uav_clear_3d_uint_code), + &state->uint_shaders.image_3d); + + wined3d_context_vk_create_bo(context_vk, sizeof(struct wined3d_uav_clear_constants_vk), + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &state->constants_bo); +} + +void wined3d_device_vk_uav_clear_state_cleanup(struct wined3d_device_vk *device_vk) +{ + struct wined3d_context_vk *context_vk = &device_vk->context_vk; + struct wined3d_uav_clear_state_vk *state = &device_vk->uav_clear_state; + + wined3d_context_vk_destroy_bo(context_vk, &state->constants_bo); + + if (state->float_shaders.buffer) + wined3d_shader_decref(state->float_shaders.buffer); + if (state->uint_shaders.buffer) + wined3d_shader_decref(state->uint_shaders.buffer); + if (state->float_shaders.image_1d) + wined3d_shader_decref(state->float_shaders.image_1d); + if (state->uint_shaders.image_1d) + wined3d_shader_decref(state->uint_shaders.image_1d); + if (state->float_shaders.image_1d_array) + wined3d_shader_decref(state->float_shaders.image_1d_array); + if (state->uint_shaders.image_1d_array) + wined3d_shader_decref(state->uint_shaders.image_1d_array); + if (state->float_shaders.image_2d) + wined3d_shader_decref(state->float_shaders.image_2d); + if (state->uint_shaders.image_2d) + wined3d_shader_decref(state->uint_shaders.image_2d); + if (state->float_shaders.image_2d_array) + wined3d_shader_decref(state->float_shaders.image_2d_array); + if (state->uint_shaders.image_2d_array) + wined3d_shader_decref(state->uint_shaders.image_2d_array); + if (state->float_shaders.image_3d) + wined3d_shader_decref(state->float_shaders.image_3d); + if (state->uint_shaders.image_3d) + wined3d_shader_decref(state->uint_shaders.image_3d); +} + void wined3d_unordered_access_view_vk_clear(struct wined3d_unordered_access_view_vk *view_vk, const struct wined3d_uvec4 *clear_value, struct wined3d_context_vk *context_vk, bool fp) { + struct wined3d_view_desc *view_desc = &view_vk->v.desc; + struct wined3d_uav_clear_constants_vk constants = {0}; + struct wined3d_device *device = context_vk->c.device; + struct wined3d_shader_thread_group_size group_count; + struct wined3d_uav_clear_shaders_vk *shaders; const struct wined3d_vk_info *vk_info; - const struct wined3d_format *format; - struct wined3d_buffer_vk *buffer_vk; + struct wined3d_bo_address bo_address; + struct wined3d_device_vk *device_vk; + struct wined3d_bo_vk *constants_bo; struct wined3d_resource *resource; - VkCommandBuffer vk_command_buffer; VkBufferMemoryBarrier vk_barrier; - VkAccessFlags access_mask; - unsigned int offset, size; + struct wined3d_shader *shader; + struct wined3d_range bo_range; + void *mapped_constants_bo; + DWORD uav_location; + bool is_array;
- TRACE("view_vk %p, clear_value %s, context_vk %p, fp %#x.\n", view_vk, debug_uvec4(clear_value), context_vk, fp); + device_vk = wined3d_device_vk(device); + shaders = fp ? &device_vk->uav_clear_state.float_shaders : &device_vk->uav_clear_state.uint_shaders;
resource = view_vk->v.resource; - if (resource->type != WINED3D_RTYPE_BUFFER) + is_array = view_desc->flags & WINED3D_VIEW_TEXTURE_ARRAY; + + switch (resource->type) { - FIXME("Not implemented for %s resources.\n", debug_d3dresourcetype(resource->type)); - return; + case WINED3D_RTYPE_BUFFER: shader = shaders->buffer; break; + case WINED3D_RTYPE_TEXTURE_1D: shader = is_array ? shaders->image_1d_array : shaders->image_1d; break; + case WINED3D_RTYPE_TEXTURE_2D: shader = is_array ? shaders->image_2d_array : shaders->image_2d; break; + case WINED3D_RTYPE_TEXTURE_3D: shader = shaders->image_3d; break; + + default: + ERR("Unhandled resource type %s.\n", debug_d3dresourcetype(resource->type)); + return; }
- format = view_vk->v.format; - if (format->id != WINED3DFMT_R32_UINT && format->id != WINED3DFMT_R32_SINT) + if (!shader) { - FIXME("Not implemented for format %s.\n", debug_d3dformat(format->id)); + ERR("Shader was not correctly initialized.\n"); return; }
- vk_info = context_vk->vk_info; - buffer_vk = wined3d_buffer_vk(buffer_from_resource(resource)); - wined3d_buffer_load_location(&buffer_vk->b, &context_vk->c, WINED3D_LOCATION_BUFFER); - wined3d_buffer_invalidate_location(&buffer_vk->b, ~WINED3D_LOCATION_BUFFER); + if (resource->type == WINED3D_RTYPE_BUFFER) + uav_location = WINED3D_LOCATION_BUFFER; + else + uav_location = WINED3D_LOCATION_TEXTURE_RGB;
- get_buffer_view_range(&buffer_vk->b, &view_vk->v.desc, format, &offset, &size); + wined3d_view_load_location(resource, view_desc, &context_vk->c, uav_location); + wined3d_unordered_access_view_invalidate_location(&view_vk->v, ~uav_location);
- if (!(vk_command_buffer = wined3d_context_vk_get_command_buffer(context_vk))) - return; - wined3d_context_vk_end_current_render_pass(context_vk); + constants.color.uint32[0] = clear_value->x; + constants.color.uint32[1] = clear_value->y; + constants.color.uint32[2] = clear_value->z; + constants.color.uint32[3] = clear_value->w; + + constants.extent.width = resource->width; + constants.extent.height = resource->height; + + group_count = shader->u.cs.thread_group_size; + + if (resource->type != WINED3D_RTYPE_BUFFER) + { + constants.extent.width >>= view_desc->u.texture.level_idx; + constants.extent.height >>= view_desc->u.texture.level_idx; + group_count.z = (view_desc->u.texture.layer_count + group_count.z - 1) / group_count.z; + } + + group_count.x = (constants.extent.width + group_count.x - 1) / group_count.x; + group_count.y = (constants.extent.height + group_count.y - 1) / group_count.y; + + constants_bo = &device_vk->uav_clear_state.constants_bo; + bo_address.buffer_object = (uintptr_t)constants_bo; + bo_address.addr = NULL; + + mapped_constants_bo = wined3d_context_map_bo_address(&context_vk->c, &bo_address, + sizeof(constants), WINED3D_MAP_WRITE | WINED3D_MAP_DISCARD); + memcpy(mapped_constants_bo, &constants, sizeof(constants)); + + bo_range.offset = 0; + bo_range.size = sizeof(constants); + wined3d_context_unmap_bo_address(&context_vk->c, &bo_address, 1, &bo_range); + + vk_info = context_vk->vk_info;
- access_mask = vk_access_mask_from_bind_flags(buffer_vk->b.resource.bind_flags); vk_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; vk_barrier.pNext = NULL; - vk_barrier.srcAccessMask = access_mask; - vk_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + vk_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + vk_barrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; vk_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vk_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - vk_barrier.buffer = buffer_vk->bo.vk_buffer; - vk_barrier.offset = buffer_vk->bo.buffer_offset + offset; - vk_barrier.size = size; - VK_CALL(vkCmdPipelineBarrier(vk_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &vk_barrier, 0, NULL)); + vk_barrier.buffer = constants_bo->vk_buffer; + vk_barrier.offset = constants_bo->buffer_offset; + vk_barrier.size = constants_bo->size; + VK_CALL(vkCmdPipelineBarrier(wined3d_context_vk_get_command_buffer(context_vk), + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, NULL, 1, &vk_barrier, 0, NULL));
- VK_CALL(vkCmdFillBuffer(vk_command_buffer, buffer_vk->bo.vk_buffer, - buffer_vk->bo.buffer_offset + offset, size, clear_value->x)); + wined3d_unordered_access_view_vk_barrier(view_vk, context_vk, WINED3D_BIND_UNORDERED_ACCESS);
- vk_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - vk_barrier.dstAccessMask = access_mask; - VK_CALL(vkCmdPipelineBarrier(vk_command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, NULL, 1, &vk_barrier, 0, NULL)); + device->adapter->shader_backend->shader_run_compute(group_count.x, group_count.y, group_count.z, + &context_vk->c, shader, constants_bo, view_vk);
- wined3d_context_vk_reference_bo(context_vk, &buffer_vk->bo); + context_invalidate_compute_state(&context_vk->c, STATE_COMPUTE_SHADER); }
void wined3d_unordered_access_view_vk_update(struct wined3d_unordered_access_view_vk *uav_vk, diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 5ffcaa1f8db..1023e64cfd5 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -4007,6 +4007,23 @@ void wined3d_allocator_cleanup(struct wined3d_allocator *allocator) DECLSPEC_HID bool wined3d_allocator_init(struct wined3d_allocator *allocator, size_t pool_count, const struct wined3d_allocator_ops *allocator_ops) DECLSPEC_HIDDEN;
+struct wined3d_uav_clear_shaders_vk +{ + struct wined3d_shader *buffer; + struct wined3d_shader *image_1d; + struct wined3d_shader *image_1d_array; + struct wined3d_shader *image_2d; + struct wined3d_shader *image_2d_array; + struct wined3d_shader *image_3d; +}; + +struct wined3d_uav_clear_state_vk +{ + struct wined3d_uav_clear_shaders_vk float_shaders; + struct wined3d_uav_clear_shaders_vk uint_shaders; + struct wined3d_bo_vk constants_bo; +}; + struct wined3d_device_vk { struct wined3d_device d; @@ -4024,6 +4041,8 @@ struct wined3d_device_vk struct wined3d_null_views_vk null_views_vk;
struct wined3d_allocator allocator; + + struct wined3d_uav_clear_state_vk uav_clear_state; };
static inline struct wined3d_device_vk *wined3d_device_vk(struct wined3d_device *device) @@ -4040,6 +4059,9 @@ void wined3d_device_vk_destroy_null_resources(struct wined3d_device_vk *device_v void wined3d_device_vk_destroy_null_views(struct wined3d_device_vk *device_vk, struct wined3d_context_vk *context_vk) DECLSPEC_HIDDEN;
+void wined3d_device_vk_uav_clear_state_init(struct wined3d_device_vk *device_vk) DECLSPEC_HIDDEN; +void wined3d_device_vk_uav_clear_state_cleanup(struct wined3d_device_vk *device_vk) DECLSPEC_HIDDEN; + static inline float wined3d_alpha_ref(const struct wined3d_state *state) { return (state->render_states[WINED3D_RS_ALPHAREF] & 0xff) / 255.0f;