On Tue, 10 Aug 2021 at 10:35, Jan Sikorski <jsikorski(a)codeweavers.com> wrote:
diff --git a/dlls/wined3d/uav_clear_shaders.inc.c b/dlls/wined3d/uav_clear_shaders.inc.c new file mode 100644 index 00000000000..6cb3c808578 --- /dev/null +++ b/dlls/wined3d/uav_clear_shaders.inc.c @@ -0,0 +1,365 @@ +static const uint32_t cs_uav_clear_buffer_float_code[] = +{ +#if 0 + RWBuffer<float4> dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(128, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0xe114ba61, 0xff6a0d0b, 0x7b25c8f4, 0xfcf7cf22, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400089c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000080, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + We're missing a license header here. I don't see much point in making this a .c file; a .h seams preferable, and if we're following the vkd3d convention, that would be wined3d_shaders.h.
+static void STDMETHODCALLTYPE wined3d_uav_clear_object_destroyed(void *parent) +{ +} + +static struct wined3d_parent_ops wined3d_uav_clear_ops = +{ + wined3d_uav_clear_object_destroyed +}; + That's "wined3d_null_parent_ops".
+static bool create_shader(struct wined3d_device *device, const uint32_t *byte_code, size_t byte_code_size, + struct wined3d_shader **shader) +{ + struct wined3d_shader_desc shader_desc; + HRESULT result; + + shader_desc.byte_code = byte_code; + shader_desc.byte_code_size = byte_code_size; + + result = wined3d_shader_create_cs(device, &shader_desc, NULL, &wined3d_uav_clear_ops, shader); + if (FAILED(result)) + WARN("Failed to initialize shader: %#x\n", result); + + return SUCCEEDED(result); +} + +#include "uav_clear_shaders.inc.c" + +void wined3d_device_vk_uav_clear_state_init(struct wined3d_device_vk *device_vk) +{ + struct wined3d_context_vk *context_vk = &device_vk->context_vk; + struct wined3d_device *device = &device_vk->d; + struct wined3d_uav_clear_state_vk *state = &device_vk->uav_clear_state; + + create_shader(device, cs_uav_clear_buffer_float_code, sizeof(cs_uav_clear_buffer_float_code), + &state->float_shaders.buffer); + create_shader(device, cs_uav_clear_buffer_uint_code, sizeof(cs_uav_clear_buffer_uint_code), + &state->uint_shaders.buffer); + create_shader(device, cs_uav_clear_1d_array_float_code, sizeof(cs_uav_clear_1d_array_float_code), + &state->float_shaders.image_1d); + create_shader(device, cs_uav_clear_1d_array_uint_code, sizeof(cs_uav_clear_1d_array_uint_code), + &state->uint_shaders.image_1d); + create_shader(device, cs_uav_clear_1d_float_code, sizeof(cs_uav_clear_1d_float_code), + &state->float_shaders.image_1d_array); + create_shader(device, cs_uav_clear_1d_uint_code, sizeof(cs_uav_clear_1d_uint_code), + &state->uint_shaders.image_1d_array); + create_shader(device, cs_uav_clear_2d_float_code, sizeof(cs_uav_clear_2d_float_code), + &state->float_shaders.image_2d); + create_shader(device, cs_uav_clear_2d_uint_code, sizeof(cs_uav_clear_2d_uint_code), + &state->uint_shaders.image_2d); + create_shader(device, cs_uav_clear_2d_array_float_code, sizeof(cs_uav_clear_2d_array_float_code), + &state->float_shaders.image_2d_array); + create_shader(device, cs_uav_clear_2d_array_uint_code, sizeof(cs_uav_clear_2d_array_uint_code), + &state->uint_shaders.image_2d_array); + create_shader(device, cs_uav_clear_3d_float_code, sizeof(cs_uav_clear_3d_float_code), + &state->float_shaders.image_3d); + create_shader(device, cs_uav_clear_3d_uint_code, sizeof(cs_uav_clear_3d_uint_code), + &state->uint_shaders.image_3d); + + wined3d_context_vk_create_bo(context_vk, sizeof(struct wined3d_uav_clear_constants_vk), + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &state->constants_bo); +} + We never check the create_shader() return code.
+void wined3d_device_vk_uav_clear_state_cleanup(struct wined3d_device_vk *device_vk) +{ + struct wined3d_context_vk *context_vk = &device_vk->context_vk; + struct wined3d_uav_clear_state_vk *state = &device_vk->uav_clear_state; + + wined3d_context_vk_destroy_bo(context_vk, &state->constants_bo); + + if (state->float_shaders.buffer) + wined3d_shader_decref(state->float_shaders.buffer); + if (state->uint_shaders.buffer) + wined3d_shader_decref(state->uint_shaders.buffer); + if (state->float_shaders.image_1d) + wined3d_shader_decref(state->float_shaders.image_1d); + if (state->uint_shaders.image_1d) + wined3d_shader_decref(state->uint_shaders.image_1d); + if (state->float_shaders.image_1d_array) + wined3d_shader_decref(state->float_shaders.image_1d_array); + if (state->uint_shaders.image_1d_array) + wined3d_shader_decref(state->uint_shaders.image_1d_array); + if (state->float_shaders.image_2d) + wined3d_shader_decref(state->float_shaders.image_2d); + if (state->uint_shaders.image_2d) + wined3d_shader_decref(state->uint_shaders.image_2d); + if (state->float_shaders.image_2d_array) + wined3d_shader_decref(state->float_shaders.image_2d_array); + if (state->uint_shaders.image_2d_array) + wined3d_shader_decref(state->uint_shaders.image_2d_array); + if (state->float_shaders.image_3d) + wined3d_shader_decref(state->float_shaders.image_3d); + if (state->uint_shaders.image_3d) + wined3d_shader_decref(state->uint_shaders.image_3d); +} + ...but if we did, these shaders could never be NULL here.
+ constants.extent.width = resource->width; + constants.extent.height = resource->height; + + group_count = shader->u.cs.thread_group_size; + + if (resource->type != WINED3D_RTYPE_BUFFER) + { + constants.extent.width >>= view_desc->u.texture.level_idx; + constants.extent.height >>= view_desc->u.texture.level_idx; + group_count.z = (view_desc->u.texture.layer_count + group_count.z - 1) / group_count.z; + } + I.e., wined3d_texture_get_level_width(), wined3d_texture_get_level_height().
+ constants_bo = &device_vk->uav_clear_state.constants_bo; + bo_address.buffer_object = (uintptr_t)constants_bo; + bo_address.addr = NULL; + + mapped_constants_bo = wined3d_context_map_bo_address(&context_vk->c, &bo_address, + sizeof(constants), WINED3D_MAP_WRITE | WINED3D_MAP_DISCARD); + memcpy(mapped_constants_bo, &constants, sizeof(constants)); + + bo_range.offset = 0; + bo_range.size = sizeof(constants); + wined3d_context_unmap_bo_address(&context_vk->c, &bo_address, 1, &bo_range); + I.e., wined3d_context_copy_bo_address().
vk_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; vk_barrier.pNext = NULL; - vk_barrier.srcAccessMask = access_mask; - vk_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + vk_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + vk_barrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; vk_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vk_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - vk_barrier.buffer = buffer_vk->bo.vk_buffer; - vk_barrier.offset = buffer_vk->bo.buffer_offset + offset; - vk_barrier.size = size; - VK_CALL(vkCmdPipelineBarrier(vk_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &vk_barrier, 0, NULL)); + vk_barrier.buffer = constants_bo->vk_buffer; + vk_barrier.offset = constants_bo->buffer_offset; + vk_barrier.size = constants_bo->size; + VK_CALL(vkCmdPipelineBarrier(wined3d_context_vk_get_command_buffer(context_vk), + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, NULL, 1, &vk_barrier, 0, NULL));
Do we need that barrier? Specifically, is this not already covered by "Host Write Ordering Guarantees"?