From: Stefan Dösinger stefan@codeweavers.com
This patch, together with "wined3d: Avoid barriers between the same write type", increases performance in Rocket League by about 3%. No deeply scientific benchmark, but the patch does have an impact. --- dlls/wined3d/resource.c | 4 +- dlls/wined3d/texture.c | 83 +++++++++++++++++++++++----------- dlls/wined3d/utils.c | 1 + dlls/wined3d/wined3d_private.h | 4 +- 4 files changed, 64 insertions(+), 28 deletions(-)
diff --git a/dlls/wined3d/resource.c b/dlls/wined3d/resource.c index 48a69c9978a..e7b84e9a035 100644 --- a/dlls/wined3d/resource.c +++ b/dlls/wined3d/resource.c @@ -569,6 +569,8 @@ VkAccessFlags vk_access_mask_from_bind_flags(uint32_t bind_flags) flags |= VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT; if (bind_flags & WINED3D_BIND_TRANSFER_DST) flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + if (bind_flags & WINED3D_BIND_TRANSFER_SRC) + flags |= VK_ACCESS_TRANSFER_READ_BIT;
return flags; } @@ -591,7 +593,7 @@ VkPipelineStageFlags vk_pipeline_stage_mask_from_bind_flags(uint32_t bind_flags) flags |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; if (bind_flags & WINED3D_BIND_STREAM_OUTPUT) flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; - if (bind_flags & WINED3D_BIND_TRANSFER_DST) + if (bind_flags & (WINED3D_BIND_TRANSFER_DST | WINED3D_BIND_TRANSFER_SRC)) flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
return flags; diff --git a/dlls/wined3d/texture.c b/dlls/wined3d/texture.c index 41b883d4343..d246126eab5 100644 --- a/dlls/wined3d/texture.c +++ b/dlls/wined3d/texture.c @@ -5746,6 +5746,9 @@ enum VkImageLayout wined3d_layout_from_bind_mask(const struct wined3d_texture_vk case WINED3D_BIND_TRANSFER_DST: return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+ case WINED3D_BIND_TRANSFER_SRC: + return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + default: ERR("Unexpected bind mask %s.\n", wined3d_debug_bind_flags(bind_mask)); return VK_IMAGE_LAYOUT_GENERAL; @@ -7195,26 +7198,43 @@ static DWORD vk_blitter_blit(struct wined3d_blitter *blitter, enum wined3d_blit_ goto next; }
- if (src_texture_vk->layout == VK_IMAGE_LAYOUT_GENERAL) - src_layout = VK_IMAGE_LAYOUT_GENERAL; + if (src_texture->layer_count == 1 && src_texture->level_count == 1) + { + wined3d_texture_vk_barrier(src_texture_vk, context_vk, WINED3D_BIND_TRANSFER_SRC); + src_layout = src_texture_vk->layout; + } else - src_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + { + if (src_texture_vk->layout == VK_IMAGE_LAYOUT_GENERAL) + src_layout = VK_IMAGE_LAYOUT_GENERAL; + else + src_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
- if (dst_texture_vk->layout == VK_IMAGE_LAYOUT_GENERAL) - dst_layout = VK_IMAGE_LAYOUT_GENERAL; + wined3d_context_vk_image_barrier(context_vk, vk_command_buffer, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + vk_access_mask_from_bind_flags(src_texture_vk->bind_mask), + VK_ACCESS_TRANSFER_READ_BIT, src_texture_vk->layout, src_layout, + src_texture_vk->image.vk_image, &vk_src_range); + } + + if (dst_texture->layer_count == 1 && dst_texture->level_count == 1) + { + wined3d_texture_vk_barrier(dst_texture_vk, context_vk, WINED3D_BIND_TRANSFER_DST); + dst_layout = dst_texture_vk->layout; + } else - dst_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + { + if (dst_texture_vk->layout == VK_IMAGE_LAYOUT_GENERAL) + dst_layout = VK_IMAGE_LAYOUT_GENERAL; + else + dst_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
- wined3d_context_vk_image_barrier(context_vk, vk_command_buffer, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - vk_access_mask_from_bind_flags(src_texture_vk->bind_mask), - VK_ACCESS_TRANSFER_READ_BIT, src_texture_vk->layout, src_layout, - src_texture_vk->image.vk_image, &vk_src_range); - wined3d_context_vk_image_barrier(context_vk, vk_command_buffer, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - vk_access_mask_from_bind_flags(dst_texture_vk->bind_mask), - VK_ACCESS_TRANSFER_WRITE_BIT, dst_texture_vk->layout, dst_layout, - dst_texture_vk->image.vk_image, &vk_dst_range); + wined3d_context_vk_image_barrier(context_vk, vk_command_buffer, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + vk_access_mask_from_bind_flags(dst_texture_vk->bind_mask), + VK_ACCESS_TRANSFER_WRITE_BIT, dst_texture_vk->layout, dst_layout, + dst_texture_vk->image.vk_image, &vk_dst_range); + }
if (resolve) { @@ -7440,16 +7460,27 @@ static DWORD vk_blitter_blit(struct wined3d_blitter *blitter, enum wined3d_blit_ dst_texture_vk->image.vk_image, dst_layout, 1, ®ion)); }
- wined3d_context_vk_image_barrier(context_vk, vk_command_buffer, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, - vk_access_mask_from_bind_flags(dst_texture_vk->t.resource.bind_flags), - dst_layout, dst_texture_vk->layout, dst_texture_vk->image.vk_image, &vk_dst_range); - wined3d_context_vk_image_barrier(context_vk, vk_command_buffer, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_ACCESS_TRANSFER_READ_BIT, - vk_access_mask_from_bind_flags(src_texture_vk->t.resource.bind_flags), - src_layout, src_texture_vk->layout, src_texture_vk->image.vk_image, &vk_src_range); + if (dst_texture->layer_count != 1 || dst_texture->level_count != 1) + { + wined3d_context_vk_image_barrier(context_vk, vk_command_buffer, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + vk_access_mask_from_bind_flags(dst_texture_vk->t.resource.bind_flags), + dst_layout, dst_texture_vk->layout, dst_texture_vk->image.vk_image, &vk_dst_range); + } + else + dst_texture_vk->layout = dst_layout; + + if (src_texture->layer_count != 1 || src_texture->level_count != 1) + { + wined3d_context_vk_image_barrier(context_vk, vk_command_buffer, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_ACCESS_TRANSFER_READ_BIT, + vk_access_mask_from_bind_flags(src_texture_vk->t.resource.bind_flags), + src_layout, src_texture_vk->layout, src_texture_vk->image.vk_image, &vk_src_range); + } + else + src_texture_vk->layout = src_layout;
wined3d_texture_validate_location(dst_texture, dst_sub_resource_idx, WINED3D_LOCATION_TEXTURE_RGB); wined3d_texture_invalidate_location(dst_texture, dst_sub_resource_idx, ~WINED3D_LOCATION_TEXTURE_RGB); diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 357adda5e2e..46f266ca4d2 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -4897,6 +4897,7 @@ const char *wined3d_debug_bind_flags(uint32_t bind_flags) BIND_FLAG_TO_STR(WINED3D_BIND_UNORDERED_ACCESS); BIND_FLAG_TO_STR(WINED3D_BIND_INDIRECT_BUFFER); BIND_FLAG_TO_STR(WINED3D_BIND_TRANSFER_DST); + BIND_FLAG_TO_STR(WINED3D_BIND_TRANSFER_SRC); #undef BIND_FLAG_TO_STR if (bind_flags) FIXME("Unrecognised bind flag(s) %#x.\n", bind_flags); diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 7e0ea489ea6..d2fb3c1f03c 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -339,9 +339,11 @@ extern const struct min_lookup minMipLookup[WINED3D_TEXF_LINEAR + 1] DECLSPEC_HI extern const GLenum magLookup[WINED3D_TEXF_LINEAR + 1] DECLSPEC_HIDDEN;
#define WINED3D_BIND_TRANSFER_DST 0x10000000 +#define WINED3D_BIND_TRANSFER_SRC 0x20000000
static const uint32_t WINED3D_READ_ONLY_BIND_MASK = WINED3D_BIND_VERTEX_BUFFER | WINED3D_BIND_INDEX_BUFFER - | WINED3D_BIND_CONSTANT_BUFFER | WINED3D_BIND_SHADER_RESOURCE | WINED3D_BIND_INDIRECT_BUFFER; + | WINED3D_BIND_CONSTANT_BUFFER | WINED3D_BIND_SHADER_RESOURCE | WINED3D_BIND_INDIRECT_BUFFER + | WINED3D_BIND_TRANSFER_SRC;
static const VkAccessFlags WINED3D_READ_ONLY_ACCESS_FLAGS = VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT