Signed-off-by: Jan Sikorski jsikorski@codeweavers.com --- dlls/wined3d/view.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+)
diff --git a/dlls/wined3d/view.c b/dlls/wined3d/view.c index 0adb0a115dc..665661e39b1 100644 --- a/dlls/wined3d/view.c +++ b/dlls/wined3d/view.c @@ -332,6 +332,25 @@ static void wined3d_view_invalidate_location(struct wined3d_resource *resource, wined3d_texture_invalidate_location(texture, sub_resource_idx, location); }
+static void wined3d_view_load_location(struct wined3d_resource *resource, + const struct wined3d_view_desc *desc, struct wined3d_context *context, DWORD location) +{ + unsigned int i, sub_resource_idx, layer_count; + struct wined3d_texture *texture; + + if (resource->type == WINED3D_RTYPE_BUFFER) + { + wined3d_buffer_load_location(buffer_from_resource(resource), context, location); + return; + } + + texture = texture_from_resource(resource); + sub_resource_idx = desc->u.texture.layer_idx * texture->level_count + desc->u.texture.level_idx; + layer_count = resource->type != WINED3D_RTYPE_TEXTURE_3D ? desc->u.texture.layer_count : 1; + for (i = 0; i < layer_count; ++i, sub_resource_idx += texture->level_count) + wined3d_texture_load_location(texture, sub_resource_idx, context, location); +} + ULONG CDECL wined3d_rendertarget_view_incref(struct wined3d_rendertarget_view *view) { ULONG refcount = InterlockedIncrement(&view->refcount);
Signed-off-by: Jan Sikorski jsikorski@codeweavers.com --- dlls/wined3d/context_vk.c | 2 +- dlls/wined3d/wined3d_private.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/dlls/wined3d/context_vk.c b/dlls/wined3d/context_vk.c index 8df416851f2..afff8bde3d6 100644 --- a/dlls/wined3d/context_vk.c +++ b/dlls/wined3d/context_vk.c @@ -2556,7 +2556,7 @@ static VkResult wined3d_context_vk_create_vk_descriptor_pool(struct wined3d_devi return vr; }
-static VkResult wined3d_context_vk_create_vk_descriptor_set(struct wined3d_context_vk *context_vk, +VkResult wined3d_context_vk_create_vk_descriptor_set(struct wined3d_context_vk *context_vk, VkDescriptorSetLayout vk_set_layout, VkDescriptorSet *vk_descriptor_set) { struct wined3d_device_vk *device_vk = wined3d_device_vk(context_vk->c.device); diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 86eae149306..87757e504a2 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -2663,6 +2663,8 @@ void wined3d_context_vk_submit_command_buffer(struct wined3d_context_vk *context unsigned int wait_semaphore_count, const VkSemaphore *wait_semaphores, const VkPipelineStageFlags *wait_stages, unsigned int signal_semaphore_count, const VkSemaphore *signal_semaphores) DECLSPEC_HIDDEN; void wined3d_context_vk_wait_command_buffer(struct wined3d_context_vk *context_vk, uint64_t id) DECLSPEC_HIDDEN; +VkResult wined3d_context_vk_create_vk_descriptor_set(struct wined3d_context_vk *context_vk, + VkDescriptorSetLayout vk_set_layout, VkDescriptorSet *vk_descriptor_set) DECLSPEC_HIDDEN;
typedef void (*APPLYSTATEFUNC)(struct wined3d_context *ctx, const struct wined3d_state *state, DWORD state_id);
On Tue, 10 Aug 2021 at 10:34, Jan Sikorski jsikorski@codeweavers.com wrote:
dlls/wined3d/context_vk.c | 2 +- dlls/wined3d/wined3d_private.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-)
Somewhat similarly, as it is, wined3d_context_vk_create_vk_descriptor_set() isn't actually used outside of context_vk.c.
Extracted from shader_spirv_resource_bindings_init().
Signed-off-by: Jan Sikorski jsikorski@codeweavers.com --- dlls/wined3d/shader_spirv.c | 125 ++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 57 deletions(-)
diff --git a/dlls/wined3d/shader_spirv.c b/dlls/wined3d/shader_spirv.c index 7617ee9d6ef..fb9f9dc4ff1 100644 --- a/dlls/wined3d/shader_spirv.c +++ b/dlls/wined3d/shader_spirv.c @@ -657,86 +657,97 @@ static enum wined3d_data_type wined3d_data_type_from_vkd3d(enum vkd3d_shader_res } }
-static bool shader_spirv_resource_bindings_init(struct shader_spirv_resource_bindings *bindings, - struct wined3d_shader_resource_bindings *wined3d_bindings, - const struct wined3d_state *state, uint32_t shader_mask) +static bool shader_spirv_resource_bindings_add_shader(struct shader_spirv_resource_bindings *bindings, + struct wined3d_shader_resource_bindings *wined3d_bindings, struct wined3d_shader *shader, + enum wined3d_shader_type shader_type) { struct vkd3d_shader_scan_descriptor_info *descriptor_info; enum wined3d_shader_descriptor_type wined3d_type; enum vkd3d_shader_visibility shader_visibility; - enum wined3d_shader_type shader_type; VkDescriptorType vk_descriptor_type; VkShaderStageFlagBits vk_stage; - struct wined3d_shader *shader; size_t binding_idx; unsigned int i;
- bindings->binding_count = 0; - bindings->uav_counter_count = 0; - bindings->vk_binding_count = 0; - bindings->so_stage = WINED3D_SHADER_TYPE_GEOMETRY; - wined3d_bindings->count = 0; - - for (shader_type = 0; shader_type < WINED3D_SHADER_TYPE_COUNT; ++shader_type) + if (shader_type == WINED3D_SHADER_TYPE_COMPUTE) { - bindings->binding_base[shader_type] = bindings->vk_binding_count; + descriptor_info = &((struct shader_spirv_compute_program_vk *)shader->backend_data)->descriptor_info; + } + else + { + descriptor_info = &((struct shader_spirv_graphics_program_vk *)shader->backend_data)->descriptor_info; + if (shader_type == WINED3D_SHADER_TYPE_GEOMETRY && !shader->function) + bindings->so_stage = WINED3D_SHADER_TYPE_VERTEX; + }
- if (!(shader_mask & (1u << shader_type)) || !(shader = state->shader[shader_type])) - continue; + vk_stage = vk_shader_stage_from_wined3d(shader_type); + shader_visibility = vkd3d_shader_visibility_from_wined3d(shader_type);
- if (shader_type == WINED3D_SHADER_TYPE_COMPUTE) + for (i = 0; i < descriptor_info->descriptor_count; ++i) + { + struct vkd3d_shader_descriptor_info *d = &descriptor_info->descriptors[i]; + uint32_t flags; + + if (d->register_space) { - descriptor_info = &((struct shader_spirv_compute_program_vk *)shader->backend_data)->descriptor_info; + WARN("Unsupported register space %u.\n", d->register_space); + return false; } + + if (d->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) + flags = VKD3D_SHADER_BINDING_FLAG_BUFFER; else - { - descriptor_info = &((struct shader_spirv_graphics_program_vk *)shader->backend_data)->descriptor_info; - if (shader_type == WINED3D_SHADER_TYPE_GEOMETRY && !shader->function) - bindings->so_stage = WINED3D_SHADER_TYPE_VERTEX; - } + flags = VKD3D_SHADER_BINDING_FLAG_IMAGE;
- vk_stage = vk_shader_stage_from_wined3d(shader_type); - shader_visibility = vkd3d_shader_visibility_from_wined3d(shader_type); + vk_descriptor_type = vk_descriptor_type_from_vkd3d(d->type, d->resource_type); + if (!shader_spirv_resource_bindings_add_binding(bindings, d->type, vk_descriptor_type, + d->register_index, shader_visibility, vk_stage, flags, &binding_idx)) + return false;
- for (i = 0; i < descriptor_info->descriptor_count; ++i) - { - struct vkd3d_shader_descriptor_info *d = &descriptor_info->descriptors[i]; - uint32_t flags; + wined3d_type = wined3d_descriptor_type_from_vkd3d(d->type); + if (!wined3d_shader_resource_bindings_add_binding(wined3d_bindings, shader_type, + wined3d_type, d->register_index, wined3d_shader_resource_type_from_vkd3d(d->resource_type), + wined3d_data_type_from_vkd3d(d->resource_data_type), binding_idx)) + return false;
- if (d->register_space) - { - WARN("Unsupported register space %u.\n", d->register_space); + if (d->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV + && (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER)) + { + if (!shader_spirv_resource_bindings_add_uav_counter_binding(bindings, + d->register_index, shader_visibility, vk_stage, &binding_idx)) return false; - } + if (!wined3d_shader_resource_bindings_add_binding(wined3d_bindings, + shader_type, WINED3D_SHADER_DESCRIPTOR_TYPE_UAV_COUNTER, d->register_index, + WINED3D_SHADER_RESOURCE_BUFFER, WINED3D_DATA_UINT, binding_idx)) + return false; + } + }
- if (d->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) - flags = VKD3D_SHADER_BINDING_FLAG_BUFFER; - else - flags = VKD3D_SHADER_BINDING_FLAG_IMAGE; + return true; +}
- vk_descriptor_type = vk_descriptor_type_from_vkd3d(d->type, d->resource_type); - if (!shader_spirv_resource_bindings_add_binding(bindings, d->type, vk_descriptor_type, - d->register_index, shader_visibility, vk_stage, flags, &binding_idx)) - return false; +static bool shader_spirv_resource_bindings_init(struct shader_spirv_resource_bindings *bindings, + struct wined3d_shader_resource_bindings *wined3d_bindings, + const struct wined3d_state *state, uint32_t shader_mask) +{ + enum wined3d_shader_type shader_type; + struct wined3d_shader *shader;
- wined3d_type = wined3d_descriptor_type_from_vkd3d(d->type); - if (!wined3d_shader_resource_bindings_add_binding(wined3d_bindings, shader_type, - wined3d_type, d->register_index, wined3d_shader_resource_type_from_vkd3d(d->resource_type), - wined3d_data_type_from_vkd3d(d->resource_data_type), binding_idx)) - return false; + bindings->binding_count = 0; + bindings->uav_counter_count = 0; + bindings->vk_binding_count = 0; + bindings->so_stage = WINED3D_SHADER_TYPE_GEOMETRY; + wined3d_bindings->count = 0;
- if (d->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV - && (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER)) - { - if (!shader_spirv_resource_bindings_add_uav_counter_binding(bindings, - d->register_index, shader_visibility, vk_stage, &binding_idx)) - return false; - if (!wined3d_shader_resource_bindings_add_binding(wined3d_bindings, - shader_type, WINED3D_SHADER_DESCRIPTOR_TYPE_UAV_COUNTER, d->register_index, - WINED3D_SHADER_RESOURCE_BUFFER, WINED3D_DATA_UINT, binding_idx)) - return false; - } - } + for (shader_type = 0; shader_type < WINED3D_SHADER_TYPE_COUNT; ++shader_type) + { + bindings->binding_base[shader_type] = bindings->vk_binding_count; + + if (!(shader_mask & (1u << shader_type)) || !(shader = state->shader[shader_type])) + continue; + + if (!shader_spirv_resource_bindings_add_shader(bindings, wined3d_bindings, shader, shader_type)) + return false; }
return true;
Signed-off-by: Jan Sikorski jsikorski@codeweavers.com --- dlls/wined3d/shader_spirv.c | 112 +++++++++++++++++++++++++++++++++ dlls/wined3d/wined3d_private.h | 2 + 2 files changed, 114 insertions(+)
diff --git a/dlls/wined3d/shader_spirv.c b/dlls/wined3d/shader_spirv.c index fb9f9dc4ff1..7ede7551049 100644 --- a/dlls/wined3d/shader_spirv.c +++ b/dlls/wined3d/shader_spirv.c @@ -1128,6 +1128,117 @@ static BOOL shader_spirv_has_ffp_proj_control(void *shader_priv) return priv->ffp_proj_control; }
+static void wined3d_spirv_run_compute(unsigned groups_x, unsigned groups_y, unsigned groups_z, + struct wined3d_context *context, struct wined3d_shader *shader, ...) +{ + struct wined3d_context_vk *context_vk = wined3d_context_vk(context); + struct shader_spirv_priv *priv = context->shader_backend_data; + const struct wined3d_vk_info *vk_info = context_vk->vk_info; + struct shader_spirv_resource_bindings spirv_bindings = {0}; + struct wined3d_shader_resource_bindings bindings = {0}; + struct shader_spirv_compute_program_vk *program; + VkWriteDescriptorSet write_set = {0}; + struct wined3d_device_vk *device_vk; + VkDescriptorSet vk_descriptor_set; + VkCommandBuffer vk_command_buffer; + unsigned int i; + va_list list; + VkResult vr; + + if (!shader_spirv_resource_bindings_add_shader(&spirv_bindings, &bindings, shader, WINED3D_SHADER_TYPE_COMPUTE)) + { + ERR("Failed to initialize bindings.\n"); + return; + } + + program = shader_spirv_find_compute_program_vk(priv, context_vk, shader, &spirv_bindings); + + vr = wined3d_context_vk_create_vk_descriptor_set(context_vk, program->vk_set_layout, &vk_descriptor_set); + if (vr != VK_SUCCESS) + { + ERR("Failed to create descriptor set, vr %s.\n", wined3d_debug_vkresult(vr)); + return; + } + + device_vk = wined3d_device_vk(context_vk->c.device); + if (!(vk_command_buffer = wined3d_context_vk_get_command_buffer(context_vk))) + { + ERR("Failed to get command buffer.\n"); + return; + } + wined3d_context_vk_end_current_render_pass(context_vk); + + write_set.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_set.pNext = NULL; + write_set.dstArrayElement = 0; + write_set.descriptorCount = 1; + write_set.dstSet = vk_descriptor_set; + + va_start(list, shader); + for (i = 0; i < bindings.count; ++i) + { + struct wined3d_shader_resource_binding *b = bindings.bindings + i; + VkDescriptorBufferInfo buffer_info; + + write_set.dstBinding = b->binding_idx; + + switch (b->shader_descriptor_type) + { + case WINED3D_SHADER_DESCRIPTOR_TYPE_CBV: + { + struct wined3d_bo_vk *bo = va_arg(list, struct wined3d_bo_vk *); + + wined3d_context_vk_reference_bo(context_vk, bo); + + buffer_info.buffer = bo->vk_buffer; + buffer_info.offset = bo->buffer_offset; + buffer_info.range = bo->size; + + write_set.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + write_set.pBufferInfo = &buffer_info; + break; + } + case WINED3D_SHADER_DESCRIPTOR_TYPE_UAV: + { + struct wined3d_unordered_access_view_vk *uav = va_arg(list, struct wined3d_unordered_access_view_vk *); + struct wined3d_texture_vk *texture_vk; + wined3d_context_vk_reference_unordered_access_view(context_vk, uav); + + if (uav->v.resource->type == WINED3D_RTYPE_BUFFER) + { + write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + write_set.pTexelBufferView = &uav->view_vk.u.vk_buffer_view; + } + else + { + write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + write_set.pImageInfo = &uav->view_vk.u.vk_image_info; + if (!write_set.pImageInfo->imageView) + { + texture_vk = wined3d_texture_vk(wined3d_texture_from_resource(uav->v.resource)); + write_set.pImageInfo = wined3d_texture_vk_get_default_image_info(texture_vk, context_vk); + } + } + + break; + } + default: + { + ERR("Unhandled type %#x.\n", b->shader_descriptor_type); + break; + } + } + + VK_CALL(vkUpdateDescriptorSets(device_vk->vk_device, 1, &write_set, 0, NULL)); + } + va_end(list); + + VK_CALL(vkCmdBindPipeline(vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, program->vk_pipeline)); + VK_CALL(vkCmdBindDescriptorSets(vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, program->vk_pipeline_layout, + 0, 1, &vk_descriptor_set, 0, NULL)); + VK_CALL(vkCmdDispatch(vk_command_buffer, groups_x, groups_y, groups_z)); +} + static const struct wined3d_shader_backend_ops spirv_shader_backend_vk = { .shader_handle_instruction = shader_spirv_handle_instruction, @@ -1147,6 +1258,7 @@ static const struct wined3d_shader_backend_ops spirv_shader_backend_vk = .shader_get_caps = shader_spirv_get_caps, .shader_color_fixup_supported = shader_spirv_color_fixup_supported, .shader_has_ffp_proj_control = shader_spirv_has_ffp_proj_control, + .shader_run_compute = wined3d_spirv_run_compute, };
const struct wined3d_shader_backend_ops *wined3d_spirv_shader_backend_init_vk(void) diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 87757e504a2..5ffcaa1f8db 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -1511,6 +1511,8 @@ struct wined3d_shader_backend_ops void (*shader_get_caps)(const struct wined3d_adapter *adapter, struct shader_caps *caps); BOOL (*shader_color_fixup_supported)(struct color_fixup_desc fixup); BOOL (*shader_has_ffp_proj_control)(void *shader_priv); + void (*shader_run_compute)(unsigned groups_x, unsigned groups_y, unsigned groups_z, + struct wined3d_context *context, struct wined3d_shader *shader, ...); };
extern const struct wined3d_shader_backend_ops glsl_shader_backend DECLSPEC_HIDDEN;
On Tue, 10 Aug 2021 at 10:35, Jan Sikorski jsikorski@codeweavers.com wrote:
dlls/wined3d/shader_spirv.c | 112 +++++++++++++++++++++++++++++++++ dlls/wined3d/wined3d_private.h | 2 + 2 files changed, 114 insertions(+)
And this too is never used anywhere until patch 5/5 in this series.
static const struct wined3d_shader_backend_ops spirv_shader_backend_vk = { .shader_handle_instruction = shader_spirv_handle_instruction, @@ -1147,6 +1258,7 @@ static const struct wined3d_shader_backend_ops spirv_shader_backend_vk = .shader_get_caps = shader_spirv_get_caps, .shader_color_fixup_supported = shader_spirv_color_fixup_supported, .shader_has_ffp_proj_control = shader_spirv_has_ffp_proj_control,
- .shader_run_compute = wined3d_spirv_run_compute,
};
"shader_run_compute" remains uninitialised for the other shader backends.
const struct wined3d_shader_backend_ops *wined3d_spirv_shader_backend_init_vk(void) diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 87757e504a2..5ffcaa1f8db 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -1511,6 +1511,8 @@ struct wined3d_shader_backend_ops void (*shader_get_caps)(const struct wined3d_adapter *adapter, struct shader_caps *caps); BOOL (*shader_color_fixup_supported)(struct color_fixup_desc fixup); BOOL (*shader_has_ffp_proj_control)(void *shader_priv);
- void (*shader_run_compute)(unsigned groups_x, unsigned groups_y, unsigned groups_z,
struct wined3d_context *context, struct wined3d_shader *shader, ...);
};
Conceptually, it doesn't seem quite proper for the shader backends to do compute dispatch by themselves; ideally these would only translate shaders, although in practice they're also responsible for setting up some related state, for OpenGL in particular.
I'm not sure whether you perhaps already considered and rejected this, but would it be very hard to use the existing .shader_select_compute() operation from wined3d_unordered_access_view_vk_clear()? Or perhaps simply adapter_vk_dispatch_compute()?
On 10 Aug 2021, at 20:50, Henri Verbeet hverbeet@gmail.com wrote:
On Tue, 10 Aug 2021 at 10:35, Jan Sikorski jsikorski@codeweavers.com wrote:
dlls/wined3d/shader_spirv.c | 112 +++++++++++++++++++++++++++++++++ dlls/wined3d/wined3d_private.h | 2 + 2 files changed, 114 insertions(+)
And this too is never used anywhere until patch 5/5 in this series.
Yes, do I squash it all together then?
- .shader_run_compute = wined3d_spirv_run_compute,
};
"shader_run_compute" remains uninitialised for the other shader backends.
It should be initialised to NULL, which seemed appropriate, but if we don’t want this I’ll make it spit an ERR.
Conceptually, it doesn't seem quite proper for the shader backends to do compute dispatch by themselves; ideally these would only translate shaders, although in practice they're also responsible for setting up some related state, for OpenGL in particular.
I'm not sure whether you perhaps already considered and rejected this, but would it be very hard to use the existing .shader_select_compute() operation from wined3d_unordered_access_view_vk_clear()? Or perhaps simply adapter_vk_dispatch_compute()?
My understanding is that I can’t touch wined3d_state, so in order to unite with existing functionality I’d have to decouple it from the state. Maybe we want this anyway? It looks straightforward to do for shader_select_compute. adapter_vk_dispatch_compute() mostly applies state so I don’t see how it could be of use here.
- Jan
On Wed, 11 Aug 2021 at 11:50, Jan Sikorski jsikorski@codeweavers.com wrote:
On 10 Aug 2021, at 20:50, Henri Verbeet hverbeet@gmail.com wrote: On Tue, 10 Aug 2021 at 10:35, Jan Sikorski jsikorski@codeweavers.com wrote:
dlls/wined3d/shader_spirv.c | 112 +++++++++++++++++++++++++++++++++ dlls/wined3d/wined3d_private.h | 2 + 2 files changed, 114 insertions(+)
And this too is never used anywhere until patch 5/5 in this series.
Yes, do I squash it all together then?
2, 4, and 5 would probably need to be together, yes. In the case of wined3d_view_load_location(), you can still do that as a separate patch if you use it in wined3d_rendertarget_view_load_location() in the same patch.
- .shader_run_compute = wined3d_spirv_run_compute,
};
"shader_run_compute" remains uninitialised for the other shader backends.
It should be initialised to NULL, which seemed appropriate, but if we don’t want this I’ll make it spit an ERR.
So far we've gone with printing a FIXME/ERR for unimplemented wined3d_shader_backend_ops operations, yes. Perhaps we could revisit that, but even then we'd probably want to put an explicit NULL in e.g. "glsl_shader_backend".
Conceptually, it doesn't seem quite proper for the shader backends to do compute dispatch by themselves; ideally these would only translate shaders, although in practice they're also responsible for setting up some related state, for OpenGL in particular.
I'm not sure whether you perhaps already considered and rejected this, but would it be very hard to use the existing .shader_select_compute() operation from wined3d_unordered_access_view_vk_clear()? Or perhaps simply adapter_vk_dispatch_compute()?
My understanding is that I can’t touch wined3d_state, so in order to unite with existing functionality I’d have to decouple it from the state. Maybe we want this anyway? It looks straightforward to do for shader_select_compute. adapter_vk_dispatch_compute() mostly applies state so I don’t see how it could be of use here.
I think we have a number of options, each with their advantages and disadvantages:
- We can do this largely adapter agnostic in wined3d_device_context_clear_uav_float()/wined3d_device_context_clear_uav_uint() by using wined3d_state_create(), wined3d_device_context_set_state(), wined3d_device_context_set_shader(), wined3d_device_context_set_constant_buffers(), wined3d_device_context_set_unordered_access_views(), and wined3d_device_context_dispatch(). The nice thing about that approach is that it would work just as well for the OpenGL backend, even without ARB_clear_texture and ARB_clear_buffer, or e.g. for a hypothetical Metal backend. The main disadvantages would be that wined3d_device_context_set_state() is a fairly heavy operation, and we wouldn't be able to use the same approach for e.g. depth/stencil readback.
- Instead of using wined3d_state_create() and wined3d_device_context_set_state(), we could modify the existing device state before dispatch, and restore it afterwards. For graphics shaders that kind of thing can be a bit painful, but we wouldn't have to touch that much state for compute shaders. That still doesn't help for depth/stencil readback though.
- Instead of doing this on the application side of the command stream, we could do it from wined3d_cs_exec_clear_unordered_access_view() or wined3d_unordered_access_view_vk_clear(). We'd now have to manually set and invalidate the relevant state, but perhaps that's still ok. The basic principle should generalise to other compute shaders, and we're still not using anything adapter-specific, so we could e.g. call this from wined3d_unordered_access_view_gl_clear() as a fallback. (Or simply the main implementation; glClearTexSubImage() is convenient, but I'm not sure whether it's any more efficient than using compute shaders on any GL implementation.)
- There's a chance that implementing this in a Vulkan-specific way would be more efficient. E.g. there may be an advantage to using push constants instead of uniform buffers, and we'd avoid some state processing. It would be hard to quantify that without doing some benchmarking first though. If we take this approach, we essentially want a small wrapper around vkd3d_shader_compile() from the shader backend; we give it a D3D shader, some interface/binding information, and get back a SPIR-V shader. Decoupling shader_select_compute() from the state largely gets us there, although it introduces some asymmetry between shader_select() and shader_select_compute() (and decoupling shader_select() from the state won't be quite as easy). Arguably there's not much of a point in passing the shader backend a wined3d_shader in this case either; we may as well just pass it the byte code. This approach is likely the most flexible and the most efficient, but also likely the most complicated. By its nature, we can't use the same implementation for different adapter types.
On 11 Aug 2021, at 14:25, Henri Verbeet hverbeet@gmail.com wrote:
I think we have a number of options, each with their advantages and disadvantages:
I’m generally partial to just doing what needs to be done and skipping the higher level machinery, even if that’s a bit more implementation work. With the first approach I’d be worried about being too slow. The state tweaking sound adventurous, but yeah, maybe it’s fine for compute.. For the direct option, I’m not quite sure what would the preferred way of talking to the shader backend be - are you thinking a new backend operation that takes the byte code with extra info and returns a VkShaderModule (through some opaque integer that gets cast)? Going further, I wonder if it's preferable to just provide SPIR-V up front and skip the shader translation step too?
- Jan
On Wed, 11 Aug 2021 at 18:08, Jan Sikorski jsikorski@codeweavers.com wrote:
On 11 Aug 2021, at 14:25, Henri Verbeet hverbeet@gmail.com wrote:
I think we have a number of options, each with their advantages and disadvantages:
I’m generally partial to just doing what needs to be done and skipping the higher level machinery, even if that’s a bit more implementation work. With the first approach I’d be worried about being too slow. The state tweaking sound adventurous, but yeah, maybe it’s fine for compute.. For the direct option, I’m not quite sure what would the preferred way of talking to the shader backend be - are you thinking a new backend operation that takes the byte code with extra info and returns a VkShaderModule (through some opaque integer that gets cast)? Going further, I wonder if it's preferable to just provide SPIR-V up front and skip the shader translation step too?
Something along those lines, yes.
I don't think we want to put compiled SPIR-V in the source. We don't actually want to put D3D bytecode in there either, but hopefully we'll actually have a HLSL compiler one of these days, and then we'll be able to just generate HLSL.
Based on the vkd3d implementation.
Signed-off-by: Jan Sikorski jsikorski@codeweavers.com --- dlls/wined3d/adapter_vk.c | 2 + dlls/wined3d/uav_clear_shaders.inc.c | 365 +++++++++++++++++++++++++++ dlls/wined3d/view.c | 217 +++++++++++++--- dlls/wined3d/wined3d_private.h | 22 ++ 4 files changed, 571 insertions(+), 35 deletions(-) create mode 100644 dlls/wined3d/uav_clear_shaders.inc.c
diff --git a/dlls/wined3d/adapter_vk.c b/dlls/wined3d/adapter_vk.c index c7da02865ea..7d96fd59eab 100644 --- a/dlls/wined3d/adapter_vk.c +++ b/dlls/wined3d/adapter_vk.c @@ -719,6 +719,7 @@ static HRESULT adapter_vk_init_3d(struct wined3d_device *device) wined3d_device_create_default_samplers(device, &context_vk->c); wined3d_device_vk_create_null_resources(device_vk, context_vk); wined3d_device_vk_create_null_views(device_vk, context_vk); + wined3d_device_vk_uav_clear_state_init(device_vk);
return WINED3D_OK; } @@ -740,6 +741,7 @@ static void adapter_vk_uninit_3d_cs(void *object) device->shader_backend->shader_destroy(shader); }
+ wined3d_device_vk_uav_clear_state_cleanup(device_vk); device->blitter->ops->blitter_destroy(device->blitter, NULL); device->shader_backend->shader_free_private(device, &context_vk->c); wined3d_device_vk_destroy_null_views(device_vk, context_vk); diff --git a/dlls/wined3d/uav_clear_shaders.inc.c b/dlls/wined3d/uav_clear_shaders.inc.c new file mode 100644 index 00000000000..6cb3c808578 --- /dev/null +++ b/dlls/wined3d/uav_clear_shaders.inc.c @@ -0,0 +1,365 @@ +static const uint32_t cs_uav_clear_buffer_float_code[] = +{ +#if 0 + RWBuffer<float4> dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(128, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0xe114ba61, 0xff6a0d0b, 0x7b25c8f4, 0xfcf7cf22, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400089c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000080, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_buffer_uint_code[] = +{ +#if 0 + RWBuffer<uint4> dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(128, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0x3afd0cfd, 0x5145c166, 0x5b9f76b8, 0xa73775cd, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400089c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000080, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_array_float_code[] = +{ +#if 0 + RWTexture1DArray<float4> dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[int2(u_info.dst_offset.x + thread_id.x, thread_id.y)] = u_info.clear_value; + } +#endif + 0x43425844, 0x3d73bc2d, 0x2b635f3d, 0x6bf98e92, 0xbe0aa5d9, 0x00000001, 0x0000011c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000c8, 0x00050050, 0x00000032, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400389c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x04000036, 0x001000e2, 0x00000000, 0x00020556, 0x080000a4, 0x0011e0f2, 0x00000000, + 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_array_uint_code[] = +{ +#if 0 + RWTexture1DArray<uint4> dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[int2(u_info.dst_offset.x + thread_id.x, thread_id.y)] = u_info.clear_value; + } +#endif + 0x43425844, 0x2f0ca457, 0x72068b34, 0xd9dadc2b, 0xd3178c3e, 0x00000001, 0x0000011c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000c8, 0x00050050, 0x00000032, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400389c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x04000036, 0x001000e2, 0x00000000, 0x00020556, 0x080000a4, 0x0011e0f2, 0x00000000, + 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_float_code[] = +{ +#if 0 + RWTexture1D<float4> dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0x05266503, 0x4b97006f, 0x01a5cc63, 0xe617d0a1, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400109c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_uint_code[] = +{ +#if 0 + RWTexture1D<uint4> dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0x19d5c8f2, 0x3ca4ac24, 0x9e258499, 0xf0463fd6, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400109c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_array_float_code[] = +{ +#if 0 + RWTexture2DArray<float4> dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy + thread_id.xy, thread_id.z)] = u_info.clear_value; + } +#endif + 0x43425844, 0x924d2d2c, 0xb9166376, 0x99f83871, 0x8ef65025, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400409c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_array_uint_code[] = +{ +#if 0 + RWTexture2DArray<uint4> dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy + thread_id.xy, thread_id.z)] = u_info.clear_value; + } +#endif + 0x43425844, 0xa92219d4, 0xa2c5e47d, 0x0d308500, 0xf32197b4, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400409c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_float_code[] = +{ +#if 0 + RWTexture2D<float4> dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[u_info.dst_offset.xy + thread_id.xy] = u_info.clear_value; + } +#endif + 0x43425844, 0x6e735b3f, 0x7348c4fa, 0xb3634e42, 0x50e2d99b, 0x00000001, 0x00000128, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000d4, 0x00050050, 0x00000035, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400189c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x001000f2, 0x00000000, 0x00020546, 0x00208546, 0x00000000, 0x00000001, + 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, + 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_uint_code[] = +{ +#if 0 + RWTexture2D<uint4> dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[u_info.dst_offset.xy + thread_id.xy] = u_info.clear_value; + } +#endif + 0x43425844, 0xf01db5dd, 0xc7dc5e55, 0xb017c1a8, 0x55abd52d, 0x00000001, 0x00000128, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000d4, 0x00050050, 0x00000035, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400189c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x001000f2, 0x00000000, 0x00020546, 0x00208546, 0x00000000, 0x00000001, + 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, + 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_3d_float_code[] = +{ +#if 0 + RWTexture3D<float4> dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy, 0) + thread_id.xyz] = u_info.clear_value; + } +#endif + 0x43425844, 0x5d8f36a0, 0x30fa86a5, 0xfec7f2ef, 0xdfd76cbb, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400289c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_3d_uint_code[] = +{ +#if 0 + RWTexture3D<uint4> dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy, 0) + thread_id.xyz] = u_info.clear_value; + } +#endif + 0x43425844, 0x5b9c95b1, 0xc9bde4e3, 0x9aaff806, 0x24a1d264, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400289c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; diff --git a/dlls/wined3d/view.c b/dlls/wined3d/view.c index 665661e39b1..b1dc33fd1e8 100644 --- a/dlls/wined3d/view.c +++ b/dlls/wined3d/view.c @@ -1726,67 +1726,214 @@ HRESULT wined3d_unordered_access_view_gl_init(struct wined3d_unordered_access_vi return hr; }
+struct wined3d_uav_clear_constants_vk +{ + VkClearColorValue color; + VkOffset2D offset; + VkExtent2D extent; +}; + +static void STDMETHODCALLTYPE wined3d_uav_clear_object_destroyed(void *parent) +{ +} + +static struct wined3d_parent_ops wined3d_uav_clear_ops = +{ + wined3d_uav_clear_object_destroyed +}; + +static bool create_shader(struct wined3d_device *device, const uint32_t *byte_code, size_t byte_code_size, + struct wined3d_shader **shader) +{ + struct wined3d_shader_desc shader_desc; + HRESULT result; + + shader_desc.byte_code = byte_code; + shader_desc.byte_code_size = byte_code_size; + + result = wined3d_shader_create_cs(device, &shader_desc, NULL, &wined3d_uav_clear_ops, shader); + if (FAILED(result)) + WARN("Failed to initialize shader: %#x\n", result); + + return SUCCEEDED(result); +} + +#include "uav_clear_shaders.inc.c" + +void wined3d_device_vk_uav_clear_state_init(struct wined3d_device_vk *device_vk) +{ + struct wined3d_context_vk *context_vk = &device_vk->context_vk; + struct wined3d_device *device = &device_vk->d; + struct wined3d_uav_clear_state_vk *state = &device_vk->uav_clear_state; + + create_shader(device, cs_uav_clear_buffer_float_code, sizeof(cs_uav_clear_buffer_float_code), + &state->float_shaders.buffer); + create_shader(device, cs_uav_clear_buffer_uint_code, sizeof(cs_uav_clear_buffer_uint_code), + &state->uint_shaders.buffer); + create_shader(device, cs_uav_clear_1d_array_float_code, sizeof(cs_uav_clear_1d_array_float_code), + &state->float_shaders.image_1d); + create_shader(device, cs_uav_clear_1d_array_uint_code, sizeof(cs_uav_clear_1d_array_uint_code), + &state->uint_shaders.image_1d); + create_shader(device, cs_uav_clear_1d_float_code, sizeof(cs_uav_clear_1d_float_code), + &state->float_shaders.image_1d_array); + create_shader(device, cs_uav_clear_1d_uint_code, sizeof(cs_uav_clear_1d_uint_code), + &state->uint_shaders.image_1d_array); + create_shader(device, cs_uav_clear_2d_float_code, sizeof(cs_uav_clear_2d_float_code), + &state->float_shaders.image_2d); + create_shader(device, cs_uav_clear_2d_uint_code, sizeof(cs_uav_clear_2d_uint_code), + &state->uint_shaders.image_2d); + create_shader(device, cs_uav_clear_2d_array_float_code, sizeof(cs_uav_clear_2d_array_float_code), + &state->float_shaders.image_2d_array); + create_shader(device, cs_uav_clear_2d_array_uint_code, sizeof(cs_uav_clear_2d_array_uint_code), + &state->uint_shaders.image_2d_array); + create_shader(device, cs_uav_clear_3d_float_code, sizeof(cs_uav_clear_3d_float_code), + &state->float_shaders.image_3d); + create_shader(device, cs_uav_clear_3d_uint_code, sizeof(cs_uav_clear_3d_uint_code), + &state->uint_shaders.image_3d); + + wined3d_context_vk_create_bo(context_vk, sizeof(struct wined3d_uav_clear_constants_vk), + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &state->constants_bo); +} + +void wined3d_device_vk_uav_clear_state_cleanup(struct wined3d_device_vk *device_vk) +{ + struct wined3d_context_vk *context_vk = &device_vk->context_vk; + struct wined3d_uav_clear_state_vk *state = &device_vk->uav_clear_state; + + wined3d_context_vk_destroy_bo(context_vk, &state->constants_bo); + + if (state->float_shaders.buffer) + wined3d_shader_decref(state->float_shaders.buffer); + if (state->uint_shaders.buffer) + wined3d_shader_decref(state->uint_shaders.buffer); + if (state->float_shaders.image_1d) + wined3d_shader_decref(state->float_shaders.image_1d); + if (state->uint_shaders.image_1d) + wined3d_shader_decref(state->uint_shaders.image_1d); + if (state->float_shaders.image_1d_array) + wined3d_shader_decref(state->float_shaders.image_1d_array); + if (state->uint_shaders.image_1d_array) + wined3d_shader_decref(state->uint_shaders.image_1d_array); + if (state->float_shaders.image_2d) + wined3d_shader_decref(state->float_shaders.image_2d); + if (state->uint_shaders.image_2d) + wined3d_shader_decref(state->uint_shaders.image_2d); + if (state->float_shaders.image_2d_array) + wined3d_shader_decref(state->float_shaders.image_2d_array); + if (state->uint_shaders.image_2d_array) + wined3d_shader_decref(state->uint_shaders.image_2d_array); + if (state->float_shaders.image_3d) + wined3d_shader_decref(state->float_shaders.image_3d); + if (state->uint_shaders.image_3d) + wined3d_shader_decref(state->uint_shaders.image_3d); +} + void wined3d_unordered_access_view_vk_clear(struct wined3d_unordered_access_view_vk *view_vk, const struct wined3d_uvec4 *clear_value, struct wined3d_context_vk *context_vk, bool fp) { + struct wined3d_view_desc *view_desc = &view_vk->v.desc; + struct wined3d_uav_clear_constants_vk constants = {0}; + struct wined3d_device *device = context_vk->c.device; + struct wined3d_shader_thread_group_size group_count; + struct wined3d_uav_clear_shaders_vk *shaders; const struct wined3d_vk_info *vk_info; - const struct wined3d_format *format; - struct wined3d_buffer_vk *buffer_vk; + struct wined3d_bo_address bo_address; + struct wined3d_device_vk *device_vk; + struct wined3d_bo_vk *constants_bo; struct wined3d_resource *resource; - VkCommandBuffer vk_command_buffer; VkBufferMemoryBarrier vk_barrier; - VkAccessFlags access_mask; - unsigned int offset, size; + struct wined3d_shader *shader; + struct wined3d_range bo_range; + void *mapped_constants_bo; + DWORD uav_location; + bool is_array;
- TRACE("view_vk %p, clear_value %s, context_vk %p, fp %#x.\n", view_vk, debug_uvec4(clear_value), context_vk, fp); + device_vk = wined3d_device_vk(device); + shaders = fp ? &device_vk->uav_clear_state.float_shaders : &device_vk->uav_clear_state.uint_shaders;
resource = view_vk->v.resource; - if (resource->type != WINED3D_RTYPE_BUFFER) + is_array = view_desc->flags & WINED3D_VIEW_TEXTURE_ARRAY; + + switch (resource->type) { - FIXME("Not implemented for %s resources.\n", debug_d3dresourcetype(resource->type)); - return; + case WINED3D_RTYPE_BUFFER: shader = shaders->buffer; break; + case WINED3D_RTYPE_TEXTURE_1D: shader = is_array ? shaders->image_1d_array : shaders->image_1d; break; + case WINED3D_RTYPE_TEXTURE_2D: shader = is_array ? shaders->image_2d_array : shaders->image_2d; break; + case WINED3D_RTYPE_TEXTURE_3D: shader = shaders->image_3d; break; + + default: + ERR("Unhandled resource type %s.\n", debug_d3dresourcetype(resource->type)); + return; }
- format = view_vk->v.format; - if (format->id != WINED3DFMT_R32_UINT && format->id != WINED3DFMT_R32_SINT) + if (!shader) { - FIXME("Not implemented for format %s.\n", debug_d3dformat(format->id)); + ERR("Shader was not correctly initialized.\n"); return; }
- vk_info = context_vk->vk_info; - buffer_vk = wined3d_buffer_vk(buffer_from_resource(resource)); - wined3d_buffer_load_location(&buffer_vk->b, &context_vk->c, WINED3D_LOCATION_BUFFER); - wined3d_buffer_invalidate_location(&buffer_vk->b, ~WINED3D_LOCATION_BUFFER); + if (resource->type == WINED3D_RTYPE_BUFFER) + uav_location = WINED3D_LOCATION_BUFFER; + else + uav_location = WINED3D_LOCATION_TEXTURE_RGB;
- get_buffer_view_range(&buffer_vk->b, &view_vk->v.desc, format, &offset, &size); + wined3d_view_load_location(resource, view_desc, &context_vk->c, uav_location); + wined3d_unordered_access_view_invalidate_location(&view_vk->v, ~uav_location);
- if (!(vk_command_buffer = wined3d_context_vk_get_command_buffer(context_vk))) - return; - wined3d_context_vk_end_current_render_pass(context_vk); + constants.color.uint32[0] = clear_value->x; + constants.color.uint32[1] = clear_value->y; + constants.color.uint32[2] = clear_value->z; + constants.color.uint32[3] = clear_value->w; + + constants.extent.width = resource->width; + constants.extent.height = resource->height; + + group_count = shader->u.cs.thread_group_size; + + if (resource->type != WINED3D_RTYPE_BUFFER) + { + constants.extent.width >>= view_desc->u.texture.level_idx; + constants.extent.height >>= view_desc->u.texture.level_idx; + group_count.z = (view_desc->u.texture.layer_count + group_count.z - 1) / group_count.z; + } + + group_count.x = (constants.extent.width + group_count.x - 1) / group_count.x; + group_count.y = (constants.extent.height + group_count.y - 1) / group_count.y; + + constants_bo = &device_vk->uav_clear_state.constants_bo; + bo_address.buffer_object = (uintptr_t)constants_bo; + bo_address.addr = NULL; + + mapped_constants_bo = wined3d_context_map_bo_address(&context_vk->c, &bo_address, + sizeof(constants), WINED3D_MAP_WRITE | WINED3D_MAP_DISCARD); + memcpy(mapped_constants_bo, &constants, sizeof(constants)); + + bo_range.offset = 0; + bo_range.size = sizeof(constants); + wined3d_context_unmap_bo_address(&context_vk->c, &bo_address, 1, &bo_range); + + vk_info = context_vk->vk_info;
- access_mask = vk_access_mask_from_bind_flags(buffer_vk->b.resource.bind_flags); vk_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; vk_barrier.pNext = NULL; - vk_barrier.srcAccessMask = access_mask; - vk_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + vk_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + vk_barrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; vk_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vk_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - vk_barrier.buffer = buffer_vk->bo.vk_buffer; - vk_barrier.offset = buffer_vk->bo.buffer_offset + offset; - vk_barrier.size = size; - VK_CALL(vkCmdPipelineBarrier(vk_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &vk_barrier, 0, NULL)); + vk_barrier.buffer = constants_bo->vk_buffer; + vk_barrier.offset = constants_bo->buffer_offset; + vk_barrier.size = constants_bo->size; + VK_CALL(vkCmdPipelineBarrier(wined3d_context_vk_get_command_buffer(context_vk), + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, NULL, 1, &vk_barrier, 0, NULL));
- VK_CALL(vkCmdFillBuffer(vk_command_buffer, buffer_vk->bo.vk_buffer, - buffer_vk->bo.buffer_offset + offset, size, clear_value->x)); + wined3d_unordered_access_view_vk_barrier(view_vk, context_vk, WINED3D_BIND_UNORDERED_ACCESS);
- vk_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - vk_barrier.dstAccessMask = access_mask; - VK_CALL(vkCmdPipelineBarrier(vk_command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, NULL, 1, &vk_barrier, 0, NULL)); + device->adapter->shader_backend->shader_run_compute(group_count.x, group_count.y, group_count.z, + &context_vk->c, shader, constants_bo, view_vk);
- wined3d_context_vk_reference_bo(context_vk, &buffer_vk->bo); + context_invalidate_compute_state(&context_vk->c, STATE_COMPUTE_SHADER); }
void wined3d_unordered_access_view_vk_update(struct wined3d_unordered_access_view_vk *uav_vk, diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 5ffcaa1f8db..1023e64cfd5 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -4007,6 +4007,23 @@ void wined3d_allocator_cleanup(struct wined3d_allocator *allocator) DECLSPEC_HID bool wined3d_allocator_init(struct wined3d_allocator *allocator, size_t pool_count, const struct wined3d_allocator_ops *allocator_ops) DECLSPEC_HIDDEN;
+struct wined3d_uav_clear_shaders_vk +{ + struct wined3d_shader *buffer; + struct wined3d_shader *image_1d; + struct wined3d_shader *image_1d_array; + struct wined3d_shader *image_2d; + struct wined3d_shader *image_2d_array; + struct wined3d_shader *image_3d; +}; + +struct wined3d_uav_clear_state_vk +{ + struct wined3d_uav_clear_shaders_vk float_shaders; + struct wined3d_uav_clear_shaders_vk uint_shaders; + struct wined3d_bo_vk constants_bo; +}; + struct wined3d_device_vk { struct wined3d_device d; @@ -4024,6 +4041,8 @@ struct wined3d_device_vk struct wined3d_null_views_vk null_views_vk;
struct wined3d_allocator allocator; + + struct wined3d_uav_clear_state_vk uav_clear_state; };
static inline struct wined3d_device_vk *wined3d_device_vk(struct wined3d_device *device) @@ -4040,6 +4059,9 @@ void wined3d_device_vk_destroy_null_resources(struct wined3d_device_vk *device_v void wined3d_device_vk_destroy_null_views(struct wined3d_device_vk *device_vk, struct wined3d_context_vk *context_vk) DECLSPEC_HIDDEN;
+void wined3d_device_vk_uav_clear_state_init(struct wined3d_device_vk *device_vk) DECLSPEC_HIDDEN; +void wined3d_device_vk_uav_clear_state_cleanup(struct wined3d_device_vk *device_vk) DECLSPEC_HIDDEN; + static inline float wined3d_alpha_ref(const struct wined3d_state *state) { return (state->render_states[WINED3D_RS_ALPHAREF] & 0xff) / 255.0f;
On Tue, 10 Aug 2021 at 10:35, Jan Sikorski jsikorski@codeweavers.com wrote:
diff --git a/dlls/wined3d/uav_clear_shaders.inc.c b/dlls/wined3d/uav_clear_shaders.inc.c new file mode 100644 index 00000000000..6cb3c808578 --- /dev/null +++ b/dlls/wined3d/uav_clear_shaders.inc.c @@ -0,0 +1,365 @@ +static const uint32_t cs_uav_clear_buffer_float_code[] = +{ +#if 0
- RWBuffer<float4> dst;
- struct
- {
float4 clear_value;
int2 dst_offset;
int2 dst_extent;
- } u_info;
- [numthreads(128, 1, 1)]
- void main(int3 thread_id : SV_DispatchThreadID)
- {
if (thread_id.x < u_info.dst_extent.x)
dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value;
- }
+#endif
- 0x43425844, 0xe114ba61, 0xff6a0d0b, 0x7b25c8f4, 0xfcf7cf22, 0x00000001, 0x0000010c, 0x00000003,
- 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
- 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a,
- 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400089c, 0x0011e000, 0x00000000, 0x00005555,
- 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000080, 0x00000001, 0x00000001,
- 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f,
- 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000,
- 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000,
- 0x00000000, 0x01000015, 0x0100003e,
+};
We're missing a license header here. I don't see much point in making this a .c file; a .h seams preferable, and if we're following the vkd3d convention, that would be wined3d_shaders.h.
+static void STDMETHODCALLTYPE wined3d_uav_clear_object_destroyed(void *parent) +{ +}
+static struct wined3d_parent_ops wined3d_uav_clear_ops = +{
- wined3d_uav_clear_object_destroyed
+};
That's "wined3d_null_parent_ops".
+static bool create_shader(struct wined3d_device *device, const uint32_t *byte_code, size_t byte_code_size,
struct wined3d_shader **shader)
+{
- struct wined3d_shader_desc shader_desc;
- HRESULT result;
- shader_desc.byte_code = byte_code;
- shader_desc.byte_code_size = byte_code_size;
- result = wined3d_shader_create_cs(device, &shader_desc, NULL, &wined3d_uav_clear_ops, shader);
- if (FAILED(result))
WARN("Failed to initialize shader: %#x\n", result);
- return SUCCEEDED(result);
+}
+#include "uav_clear_shaders.inc.c"
+void wined3d_device_vk_uav_clear_state_init(struct wined3d_device_vk *device_vk) +{
- struct wined3d_context_vk *context_vk = &device_vk->context_vk;
- struct wined3d_device *device = &device_vk->d;
- struct wined3d_uav_clear_state_vk *state = &device_vk->uav_clear_state;
- create_shader(device, cs_uav_clear_buffer_float_code, sizeof(cs_uav_clear_buffer_float_code),
&state->float_shaders.buffer);
- create_shader(device, cs_uav_clear_buffer_uint_code, sizeof(cs_uav_clear_buffer_uint_code),
&state->uint_shaders.buffer);
- create_shader(device, cs_uav_clear_1d_array_float_code, sizeof(cs_uav_clear_1d_array_float_code),
&state->float_shaders.image_1d);
- create_shader(device, cs_uav_clear_1d_array_uint_code, sizeof(cs_uav_clear_1d_array_uint_code),
&state->uint_shaders.image_1d);
- create_shader(device, cs_uav_clear_1d_float_code, sizeof(cs_uav_clear_1d_float_code),
&state->float_shaders.image_1d_array);
- create_shader(device, cs_uav_clear_1d_uint_code, sizeof(cs_uav_clear_1d_uint_code),
&state->uint_shaders.image_1d_array);
- create_shader(device, cs_uav_clear_2d_float_code, sizeof(cs_uav_clear_2d_float_code),
&state->float_shaders.image_2d);
- create_shader(device, cs_uav_clear_2d_uint_code, sizeof(cs_uav_clear_2d_uint_code),
&state->uint_shaders.image_2d);
- create_shader(device, cs_uav_clear_2d_array_float_code, sizeof(cs_uav_clear_2d_array_float_code),
&state->float_shaders.image_2d_array);
- create_shader(device, cs_uav_clear_2d_array_uint_code, sizeof(cs_uav_clear_2d_array_uint_code),
&state->uint_shaders.image_2d_array);
- create_shader(device, cs_uav_clear_3d_float_code, sizeof(cs_uav_clear_3d_float_code),
&state->float_shaders.image_3d);
- create_shader(device, cs_uav_clear_3d_uint_code, sizeof(cs_uav_clear_3d_uint_code),
&state->uint_shaders.image_3d);
- wined3d_context_vk_create_bo(context_vk, sizeof(struct wined3d_uav_clear_constants_vk),
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&state->constants_bo);
+}
We never check the create_shader() return code.
+void wined3d_device_vk_uav_clear_state_cleanup(struct wined3d_device_vk *device_vk) +{
- struct wined3d_context_vk *context_vk = &device_vk->context_vk;
- struct wined3d_uav_clear_state_vk *state = &device_vk->uav_clear_state;
- wined3d_context_vk_destroy_bo(context_vk, &state->constants_bo);
- if (state->float_shaders.buffer)
wined3d_shader_decref(state->float_shaders.buffer);
- if (state->uint_shaders.buffer)
wined3d_shader_decref(state->uint_shaders.buffer);
- if (state->float_shaders.image_1d)
wined3d_shader_decref(state->float_shaders.image_1d);
- if (state->uint_shaders.image_1d)
wined3d_shader_decref(state->uint_shaders.image_1d);
- if (state->float_shaders.image_1d_array)
wined3d_shader_decref(state->float_shaders.image_1d_array);
- if (state->uint_shaders.image_1d_array)
wined3d_shader_decref(state->uint_shaders.image_1d_array);
- if (state->float_shaders.image_2d)
wined3d_shader_decref(state->float_shaders.image_2d);
- if (state->uint_shaders.image_2d)
wined3d_shader_decref(state->uint_shaders.image_2d);
- if (state->float_shaders.image_2d_array)
wined3d_shader_decref(state->float_shaders.image_2d_array);
- if (state->uint_shaders.image_2d_array)
wined3d_shader_decref(state->uint_shaders.image_2d_array);
- if (state->float_shaders.image_3d)
wined3d_shader_decref(state->float_shaders.image_3d);
- if (state->uint_shaders.image_3d)
wined3d_shader_decref(state->uint_shaders.image_3d);
+}
...but if we did, these shaders could never be NULL here.
- constants.extent.width = resource->width;
- constants.extent.height = resource->height;
- group_count = shader->u.cs.thread_group_size;
- if (resource->type != WINED3D_RTYPE_BUFFER)
- {
constants.extent.width >>= view_desc->u.texture.level_idx;
constants.extent.height >>= view_desc->u.texture.level_idx;
group_count.z = (view_desc->u.texture.layer_count + group_count.z - 1) / group_count.z;
- }
I.e., wined3d_texture_get_level_width(), wined3d_texture_get_level_height().
- constants_bo = &device_vk->uav_clear_state.constants_bo;
- bo_address.buffer_object = (uintptr_t)constants_bo;
- bo_address.addr = NULL;
- mapped_constants_bo = wined3d_context_map_bo_address(&context_vk->c, &bo_address,
sizeof(constants), WINED3D_MAP_WRITE | WINED3D_MAP_DISCARD);
- memcpy(mapped_constants_bo, &constants, sizeof(constants));
- bo_range.offset = 0;
- bo_range.size = sizeof(constants);
- wined3d_context_unmap_bo_address(&context_vk->c, &bo_address, 1, &bo_range);
I.e., wined3d_context_copy_bo_address().
vk_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; vk_barrier.pNext = NULL;
- vk_barrier.srcAccessMask = access_mask;
- vk_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
- vk_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
- vk_barrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; vk_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vk_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- vk_barrier.buffer = buffer_vk->bo.vk_buffer;
- vk_barrier.offset = buffer_vk->bo.buffer_offset + offset;
- vk_barrier.size = size;
- VK_CALL(vkCmdPipelineBarrier(vk_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &vk_barrier, 0, NULL));
- vk_barrier.buffer = constants_bo->vk_buffer;
- vk_barrier.offset = constants_bo->buffer_offset;
- vk_barrier.size = constants_bo->size;
- VK_CALL(vkCmdPipelineBarrier(wined3d_context_vk_get_command_buffer(context_vk),
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, 0, NULL, 1, &vk_barrier, 0, NULL));
Do we need that barrier? Specifically, is this not already covered by "Host Write Ordering Guarantees"?
On 10 Aug 2021, at 20:51, Henri Verbeet hverbeet@gmail.com wrote: We never check the create_shader() return code.
...but if we did, these shaders could never be NULL here.
They can legitimately be NULL on low feature levels? I guess I should check if it’s >= 11_0 and only then try.
Do we need that barrier? Specifically, is this not already covered by "Host Write Ordering Guarantees”?
I think you’re right, it is.
- Jan
On Wed, 11 Aug 2021 at 11:50, Jan Sikorski jsikorski@codeweavers.com wrote:
On 10 Aug 2021, at 20:51, Henri Verbeet hverbeet@gmail.com wrote: We never check the create_shader() return code.
...but if we did, these shaders could never be NULL here.
They can legitimately be NULL on low feature levels? I guess I should check if it’s >= 11_0 and only then try.
Oh right, yes, these would fail to compile on lower feature levels because we go through wined3d_shader_create_cs().
On Tue, 10 Aug 2021 at 10:34, Jan Sikorski jsikorski@codeweavers.com wrote:
+static void wined3d_view_load_location(struct wined3d_resource *resource,
const struct wined3d_view_desc *desc, struct wined3d_context *context, DWORD location)
+{
- unsigned int i, sub_resource_idx, layer_count;
- struct wined3d_texture *texture;
- if (resource->type == WINED3D_RTYPE_BUFFER)
- {
wined3d_buffer_load_location(buffer_from_resource(resource), context, location);
return;
- }
- texture = texture_from_resource(resource);
- sub_resource_idx = desc->u.texture.layer_idx * texture->level_count + desc->u.texture.level_idx;
- layer_count = resource->type != WINED3D_RTYPE_TEXTURE_3D ? desc->u.texture.layer_count : 1;
- for (i = 0; i < layer_count; ++i, sub_resource_idx += texture->level_count)
wined3d_texture_load_location(texture, sub_resource_idx, context, location);
+}
As it is, this is unused, and would cause compiler warnings when applied. If we're going to have this helper, we'd probably want to use it from wined3d_rendertarget_view_load_location().