From: Zebediah Figura zfigura@codeweavers.com
For discard maps on deferred contexts, we currently blit from the upload buffer to the resource. This is necessary because command lists can be reused and submitted multiple times—we cannot simply have the buffer take ownership, as for discard maps on immediate contexts.
However, it is very common for applications to use command lists only once before throwing them away—in essence taking advantage of the feature only for the multithreading advantages it allows.
Therefore we take advantage of this pattern by trying to rename the buffer anyway. In order to do this we introduce a refcount for BO pointers. When writing to a buffer BO, we first check whether the buffer "owns" the BO—i.e. whether it has a refcount of 1—and if not, we create a new BO for the buffer and copy the contents of the old BO to the new BO. That is, we perform mostly-transparent copy-on-write.
This improves performance, and reduces CPU usage, in Assassin's Creed: Unity. --- dlls/wined3d/buffer.c | 87 ++++++++++++++++++++++++++++++---- dlls/wined3d/context_gl.c | 3 ++ dlls/wined3d/context_vk.c | 5 ++ dlls/wined3d/cs.c | 7 ++- dlls/wined3d/device.c | 1 + dlls/wined3d/view.c | 10 ++++ dlls/wined3d/wined3d_private.h | 7 +++ 7 files changed, 109 insertions(+), 11 deletions(-)
diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c index de1cf90d055..c71c1ac2a87 100644 --- a/dlls/wined3d/buffer.c +++ b/dlls/wined3d/buffer.c @@ -199,8 +199,11 @@ static void wined3d_buffer_gl_destroy_buffer_object(struct wined3d_buffer_gl *bu buffer_gl->b.bo_user.valid = false; list_remove(&buffer_gl->b.bo_user.entry); } - wined3d_context_gl_destroy_bo(context_gl, bo_gl); - heap_free(bo_gl); + if (!--bo_gl->b.refcount) + { + wined3d_context_gl_destroy_bo(context_gl, bo_gl); + heap_free(bo_gl); + } buffer_gl->b.buffer_object = NULL; }
@@ -1015,6 +1018,7 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc
if (flags & WINED3D_MAP_WRITE) { + wined3d_buffer_acquire_bo_for_write(buffer, context); wined3d_buffer_invalidate_location(buffer, ~WINED3D_LOCATION_BUFFER); buffer_invalidate_bo_range(buffer, dirty_offset, dirty_size); } @@ -1127,19 +1131,60 @@ static void wined3d_buffer_set_bo(struct wined3d_buffer *buffer, struct wined3d_ { struct wined3d_bo_user *bo_user;
+ /* The previous BO might have users in other buffers which were valid, + * and should in theory remain valid. The problem is that it's not easy + * to tell which users belong to this buffer and which don't. We could + * add a field, but for now it's easier and probably fine to just + * invalidate every user. */ LIST_FOR_EACH_ENTRY(bo_user, &prev_bo->users, struct wined3d_bo_user, entry) bo_user->valid = false; list_init(&prev_bo->users);
- assert(list_empty(&bo->users)); - - wined3d_context_destroy_bo(context, prev_bo); - heap_free(prev_bo); + if (!--prev_bo->refcount) + { + wined3d_context_destroy_bo(context, prev_bo); + heap_free(prev_bo); + } }
buffer->buffer_object = bo; }
+void wined3d_buffer_acquire_bo_for_write(struct wined3d_buffer *buffer, struct wined3d_context *context) +{ + const struct wined3d_range range = {.size = buffer->resource.size}; + struct wined3d_bo_address dst, src; + struct wined3d_bo *bo; + + if (!(bo = buffer->buffer_object)) + return; + + /* If we are the only owner of this BO, there is nothing to do. */ + if (bo->refcount == 1) + return; + + TRACE("Performing copy-on-write for BO %p.\n", bo); + + /* Grab a reference to the current BO. It's okay if this overflows, because + * the following unload will release it. */ + ++bo->refcount; + + /* Unload and re-prepare to get a new buffer. This is a bit cheap and not + * perfectly idiomatic—we should really just factor out an adapter-agnostic + * function to create a BO and then use wined3d_buffer_set_bo()—but it'll + * do nonetheless. */ + wined3d_buffer_unload_location(buffer, context, WINED3D_LOCATION_BUFFER); + wined3d_buffer_prepare_location(buffer, context, WINED3D_LOCATION_BUFFER); + + /* And finally, perform the actual copy. */ + assert(buffer->buffer_object != bo); + dst.buffer_object = buffer->buffer_object; + dst.addr = NULL; + src.buffer_object = bo; + src.addr = NULL; + wined3d_context_copy_bo_address(context, &dst, &src, 1, &range, WINED3D_MAP_WRITE | WINED3D_MAP_DISCARD); +} + void wined3d_buffer_copy_bo_address(struct wined3d_buffer *dst_buffer, struct wined3d_context *context, unsigned int dst_offset, const struct wined3d_const_bo_address *src_addr, unsigned int size) { @@ -1151,6 +1196,9 @@ void wined3d_buffer_copy_bo_address(struct wined3d_buffer *dst_buffer, struct wi if (!dst_offset && size == dst_buffer->resource.size) map_flags |= WINED3D_MAP_DISCARD;
+ if (map_flags & WINED3D_MAP_DISCARD) + wined3d_buffer_acquire_bo_for_write(dst_buffer, context); + dst_location = wined3d_buffer_get_memory(dst_buffer, context, &dst_addr); dst_addr.addr += dst_offset;
@@ -1182,8 +1230,26 @@ void wined3d_buffer_copy(struct wined3d_buffer *dst_buffer, unsigned int dst_off void wined3d_buffer_update_sub_resource(struct wined3d_buffer *buffer, struct wined3d_context *context, const struct upload_bo *upload_bo, unsigned int offset, unsigned int size) { - if (upload_bo->flags & UPLOAD_BO_RENAME_ON_UNMAP) + struct wined3d_bo *bo = upload_bo->addr.buffer_object; + uint32_t flags = upload_bo->flags; + + /* Try to take this buffer for COW. Don't take it if we've saturated the + * refcount. */ + if (!offset && size == buffer->resource.size + && bo && bo->refcount < UINT8_MAX && !(upload_bo->flags & UPLOAD_BO_RENAME_ON_UNMAP)) { + flags |= UPLOAD_BO_RENAME_ON_UNMAP; + ++bo->refcount; + } + + if (flags & UPLOAD_BO_RENAME_ON_UNMAP) + { + /* Don't increment the refcount. UPLOAD_BO_RENAME_ON_UNMAP transfers an + * existing reference. + * + * FIXME: We could degenerate RENAME to a copy + free and rely on the + * COW logic to detect this case. + */ wined3d_buffer_set_bo(buffer, context, upload_bo->addr.buffer_object); wined3d_buffer_validate_location(buffer, WINED3D_LOCATION_BUFFER); wined3d_buffer_invalidate_location(buffer, ~WINED3D_LOCATION_BUFFER); @@ -1572,8 +1638,11 @@ static void wined3d_buffer_vk_unload_location(struct wined3d_buffer *buffer, buffer->bo_user.valid = false; list_remove(&buffer->bo_user.entry); } - wined3d_context_vk_destroy_bo(context_vk, bo_vk); - heap_free(bo_vk); + if (!--bo_vk->b.refcount) + { + wined3d_context_vk_destroy_bo(context_vk, bo_vk); + heap_free(bo_vk); + } buffer->buffer_object = NULL; break;
diff --git a/dlls/wined3d/context_gl.c b/dlls/wined3d/context_gl.c index d6827c2c0de..36786cc5a29 100644 --- a/dlls/wined3d/context_gl.c +++ b/dlls/wined3d/context_gl.c @@ -4163,6 +4163,7 @@ static void context_gl_load_unordered_access_resources(struct wined3d_context_gl if (view->resource->type == WINED3D_RTYPE_BUFFER) { buffer = buffer_from_resource(view->resource); + wined3d_buffer_acquire_bo_for_write(buffer, &context_gl->c); wined3d_buffer_load_location(buffer, &context_gl->c, WINED3D_LOCATION_BUFFER); wined3d_unordered_access_view_invalidate_location(view, ~WINED3D_LOCATION_BUFFER); wined3d_context_gl_reference_buffer(context_gl, buffer); @@ -4193,6 +4194,8 @@ static void context_gl_load_stream_output_buffers(struct wined3d_context_gl *con if (!(buffer = state->stream_output[i].buffer)) continue;
+ wined3d_buffer_acquire_bo_for_write(buffer, &context_gl->c); + wined3d_buffer_load(buffer, &context_gl->c, state); wined3d_buffer_invalidate_location(buffer, ~WINED3D_LOCATION_BUFFER); wined3d_context_gl_reference_buffer(context_gl, buffer); diff --git a/dlls/wined3d/context_vk.c b/dlls/wined3d/context_vk.c index efc93b94539..859e8fb5949 100644 --- a/dlls/wined3d/context_vk.c +++ b/dlls/wined3d/context_vk.c @@ -478,6 +478,7 @@ static bool wined3d_context_vk_create_slab_bo(struct wined3d_context_vk *context *bo = slab->bo; bo->memory = NULL; bo->slab = slab; + bo->b.refcount = 1; bo->b.client_map_count = 0; bo->b.map_ptr = NULL; bo->b.buffer_offset = idx * object_size; @@ -557,6 +558,7 @@ BOOL wined3d_context_vk_create_bo(struct wined3d_context_vk *context_vk, VkDevic return FALSE; }
+ bo->b.refcount = 1; bo->b.client_map_count = 0; bo->b.map_ptr = NULL; bo->b.buffer_offset = 0; @@ -3415,6 +3417,8 @@ static void wined3d_context_vk_load_buffers(struct wined3d_context_vk *context_v if (!(buffer = state->stream_output[i].buffer)) continue;
+ wined3d_buffer_acquire_bo_for_write(buffer, &context_vk->c); + buffer_vk = wined3d_buffer_vk(buffer); wined3d_buffer_load(&buffer_vk->b, &context_vk->c, state); wined3d_buffer_vk_barrier(buffer_vk, context_vk, WINED3D_BIND_STREAM_OUTPUT); @@ -3525,6 +3529,7 @@ static void wined3d_context_vk_load_shader_resources(struct wined3d_context_vk * uav_vk = wined3d_unordered_access_view_vk(uav); if (uav->resource->type == WINED3D_RTYPE_BUFFER) { + wined3d_buffer_acquire_bo_for_write(buffer_from_resource(uav->resource), &context_vk->c); if (!uav_vk->view_vk.bo_user.valid) { wined3d_unordered_access_view_vk_update(uav_vk, context_vk); diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index e7ccd3f9125..9a4f2cdfc94 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -4494,8 +4494,11 @@ static void wined3d_command_list_destroy_object(void *object)
if ((bo = list->uploads[i].bo)) { - wined3d_context_destroy_bo(context, bo); - heap_free(bo); + if (!--bo->refcount) + { + wined3d_context_destroy_bo(context, bo); + heap_free(bo); + } } else { diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index dce2b65b6d3..d62e9f03338 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -1181,6 +1181,7 @@ bool wined3d_device_gl_create_bo(struct wined3d_device_gl *device_gl, struct win bo->b.memory_offset = bo->b.buffer_offset; bo->b.map_ptr = NULL; bo->b.client_map_count = 0; + bo->b.refcount = 1;
return true; } diff --git a/dlls/wined3d/view.c b/dlls/wined3d/view.c index 755fd5b2648..675cc186b36 100644 --- a/dlls/wined3d/view.c +++ b/dlls/wined3d/view.c @@ -1667,9 +1667,14 @@ void wined3d_unordered_access_view_gl_clear(struct wined3d_unordered_access_view get_buffer_view_range(buffer, &view_gl->v.desc, &format_gl->f, &offset, &size);
if (!offset && size == buffer->resource.size) + { wined3d_buffer_prepare_location(buffer, &context_gl->c, WINED3D_LOCATION_BUFFER); + } else + { + wined3d_buffer_acquire_bo_for_write(buffer, &context_gl->c); wined3d_buffer_load_location(buffer, &context_gl->c, WINED3D_LOCATION_BUFFER); + } wined3d_unordered_access_view_invalidate_location(&view_gl->v, ~WINED3D_LOCATION_BUFFER);
bo_gl = wined3d_bo_gl(buffer->buffer_object); @@ -2073,9 +2078,14 @@ void wined3d_unordered_access_view_vk_clear(struct wined3d_unordered_access_view
get_buffer_view_range(buffer, view_desc, view_format, &offset, &size); if (!offset && size == buffer->resource.size) + { wined3d_buffer_prepare_location(buffer, &context_vk->c, WINED3D_LOCATION_BUFFER); + } else + { + wined3d_buffer_acquire_bo_for_write(buffer, &context_vk->c); wined3d_buffer_load_location(buffer, &context_vk->c, WINED3D_LOCATION_BUFFER); + } wined3d_buffer_validate_location(buffer, WINED3D_LOCATION_BUFFER); wined3d_buffer_invalidate_location(buffer, ~WINED3D_LOCATION_BUFFER); } diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 91e1571224d..6c4f41ab653 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -1611,6 +1611,11 @@ struct wined3d_bo size_t memory_offset; unsigned int client_map_count; bool coherent; + /* Number of resources referencing this BO, used for COW tracking. + * If a resource has this BO as a location and wants to write to it, it + * needs to make a copy unless it's the only owner (refcount == 1). + * Deferred contexts may also hold a reference. */ + uint8_t refcount; };
struct wined3d_bo_gl @@ -4557,6 +4562,8 @@ static inline void wined3d_buffer_validate_user(struct wined3d_buffer *buffer) list_add_head(&buffer->buffer_object->users, &buffer->bo_user.entry); }
+void wined3d_buffer_acquire_bo_for_write(struct wined3d_buffer *buffer, + struct wined3d_context *context) DECLSPEC_HIDDEN; void wined3d_buffer_cleanup(struct wined3d_buffer *buffer) DECLSPEC_HIDDEN; void wined3d_buffer_copy(struct wined3d_buffer *dst_buffer, unsigned int dst_offset, struct wined3d_buffer *src_buffer, unsigned int src_offset, unsigned int size) DECLSPEC_HIDDEN;