From: Zebediah Figura zfigura@codeweavers.com
--- dlls/wined3d/cs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index 5fc2bf64074..1c78ff6512e 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -29,7 +29,7 @@ struct wined3d_deferred_upload { struct wined3d_resource *resource; unsigned int sub_resource_idx; - uint8_t *sysmem; + uint8_t *sysmem, *map_ptr; struct wined3d_box box; uint32_t upload_flags; }; @@ -4166,7 +4166,7 @@ static bool wined3d_deferred_context_map_upload_bo(struct wined3d_device_context return false;
upload->upload_flags = 0; - map_desc->data = (void *)align((size_t)upload->sysmem, RESOURCE_ALIGNMENT); + map_desc->data = upload->map_ptr; return true; }
@@ -4201,9 +4201,10 @@ static bool wined3d_deferred_context_map_upload_bo(struct wined3d_device_context wined3d_resource_incref(resource); upload->sub_resource_idx = sub_resource_idx; upload->sysmem = sysmem; + upload->map_ptr = (void *)align((size_t)upload->sysmem, RESOURCE_ALIGNMENT); upload->box = *box;
- map_desc->data = (void *)align((size_t)upload->sysmem, RESOURCE_ALIGNMENT); + map_desc->data = upload->map_ptr; return true; }
@@ -4217,7 +4218,7 @@ static bool wined3d_deferred_context_unmap_upload_bo(struct wined3d_device_conte { *box = upload->box; bo->addr.buffer_object = 0; - bo->addr.addr = (uint8_t *)align((size_t)upload->sysmem, RESOURCE_ALIGNMENT); + bo->addr.addr = upload->map_ptr; bo->flags = upload->upload_flags; upload->upload_flags = 0; return true;
From: Zebediah Figura zfigura@codeweavers.com
--- dlls/wined3d/cs.c | 80 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 19 deletions(-)
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index 1c78ff6512e..e7ccd3f9125 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -29,6 +29,7 @@ struct wined3d_deferred_upload { struct wined3d_resource *resource; unsigned int sub_resource_idx; + struct wined3d_bo *bo; uint8_t *sysmem, *map_ptr; struct wined3d_box box; uint32_t upload_flags; @@ -4137,7 +4138,9 @@ static bool wined3d_deferred_context_map_upload_bo(struct wined3d_device_context { struct wined3d_deferred_context *deferred = wined3d_deferred_context_from_context(context); const struct wined3d_format *format = resource->format; + struct wined3d_device *device = context->device; struct wined3d_deferred_upload *upload; + struct wined3d_bo_address addr; uint8_t *sysmem; size_t size;
@@ -4174,34 +4177,54 @@ static bool wined3d_deferred_context_map_upload_bo(struct wined3d_device_context deferred->upload_count + 1, sizeof(*deferred->uploads))) return false;
- if (!deferred->upload_heap) + upload = &deferred->uploads[deferred->upload_count++]; + + if ((flags & WINED3D_MAP_DISCARD) + && device->adapter->adapter_ops->adapter_alloc_bo(device, resource, sub_resource_idx, &addr)) { - if (!(deferred->upload_heap = HeapCreate(0, 0, 0))) + upload->bo = addr.buffer_object; + upload->sysmem = NULL; + + TRACE("Allocated BO %s.\n", debug_bo_address(&addr)); + + wined3d_device_bo_map_lock(device); + upload->map_ptr = addr.buffer_object->map_ptr; + wined3d_device_bo_map_unlock(device); + upload->map_ptr += addr.buffer_object->memory_offset; + assert(upload->map_ptr); + } + else + { + if (!deferred->upload_heap) { - ERR("Failed to create upload heap.\n"); - return false; + if (!(deferred->upload_heap = HeapCreate(0, 0, 0))) + { + ERR("Failed to create upload heap.\n"); + return false; + } + + if (!(deferred->upload_heap_refcount = heap_alloc(sizeof(*deferred->upload_heap_refcount)))) + { + HeapDestroy(deferred->upload_heap); + deferred->upload_heap = 0; + return false; + } + + *deferred->upload_heap_refcount = 1; }
- if (!(deferred->upload_heap_refcount = heap_alloc(sizeof(*deferred->upload_heap_refcount)))) - { - HeapDestroy(deferred->upload_heap); - deferred->upload_heap = 0; + if (!(sysmem = HeapAlloc(deferred->upload_heap, 0, size + RESOURCE_ALIGNMENT - 1))) return false; - }
- *deferred->upload_heap_refcount = 1; + upload->bo = NULL; + upload->sysmem = sysmem; + upload->map_ptr = (void *)align((size_t)upload->sysmem, RESOURCE_ALIGNMENT); }
- if (!(sysmem = HeapAlloc(deferred->upload_heap, 0, size + RESOURCE_ALIGNMENT - 1))) - return false; - - upload = &deferred->uploads[deferred->upload_count++]; upload->upload_flags = UPLOAD_BO_UPLOAD_ON_UNMAP; upload->resource = resource; wined3d_resource_incref(resource); upload->sub_resource_idx = sub_resource_idx; - upload->sysmem = sysmem; - upload->map_ptr = (void *)align((size_t)upload->sysmem, RESOURCE_ALIGNMENT); upload->box = *box;
map_desc->data = upload->map_ptr; @@ -4217,8 +4240,10 @@ static bool wined3d_deferred_context_unmap_upload_bo(struct wined3d_device_conte if ((upload = deferred_context_get_upload(deferred, resource, sub_resource_idx))) { *box = upload->box; - bo->addr.buffer_object = 0; - bo->addr.addr = upload->map_ptr; + if ((bo->addr.buffer_object = upload->bo)) + bo->addr.addr = NULL; + else + bo->addr.addr = upload->map_ptr; bo->flags = upload->upload_flags; upload->upload_flags = 0; return true; @@ -4456,12 +4481,29 @@ HRESULT CDECL wined3d_deferred_context_record_command_list(struct wined3d_device static void wined3d_command_list_destroy_object(void *object) { struct wined3d_command_list *list = object; + struct wined3d_context *context; unsigned int i;
TRACE("list %p.\n", list);
+ context = context_acquire(list->device, NULL, 0); + for (i = 0; i < list->upload_count; ++i) - HeapFree(list->upload_heap, 0, list->uploads[i].sysmem); + { + struct wined3d_bo *bo; + + if ((bo = list->uploads[i].bo)) + { + wined3d_context_destroy_bo(context, bo); + heap_free(bo); + } + else + { + HeapFree(list->upload_heap, 0, list->uploads[i].sysmem); + } + } + + context_release(context);
if (list->upload_heap) {
From: Zebediah Figura zfigura@codeweavers.com
This makes it easier to invalidate in the case where multiple resources share the same BO, which in turn is necessary to implement copy-on-write semantics for BOs.
This is also necessary if we ever want to evict resources which have views, although it's not clear if this will ever be necessary. If nothing else, though, it removes that implicit dependency. --- dlls/wined3d/adapter_vk.c | 11 +++++------ dlls/wined3d/buffer.c | 26 +++++++++++++------------- dlls/wined3d/context_gl.c | 24 ++++++++++++------------ dlls/wined3d/context_vk.c | 2 ++ dlls/wined3d/state.c | 6 +++--- dlls/wined3d/texture.c | 3 ++- dlls/wined3d/view.c | 21 +++++++++++++++++---- dlls/wined3d/wined3d_private.h | 8 ++++++++ 8 files changed, 62 insertions(+), 39 deletions(-)
diff --git a/dlls/wined3d/adapter_vk.c b/dlls/wined3d/adapter_vk.c index a03ec37161e..6a030f0440a 100644 --- a/dlls/wined3d/adapter_vk.c +++ b/dlls/wined3d/adapter_vk.c @@ -921,16 +921,15 @@ static void *adapter_vk_map_bo_address(struct wined3d_context *context, if (wined3d_context_vk_create_bo(context_vk, bo->size, bo->usage, bo->memory_type, &tmp)) { bool host_synced = bo->host_synced; - list_move_head(&tmp.b.users, &bo->b.users); + + LIST_FOR_EACH_ENTRY(bo_user, &bo->b.users, struct wined3d_bo_user, entry) + bo_user->valid = false; + list_init(&bo->b.users); + wined3d_context_vk_destroy_bo(context_vk, bo); *bo = tmp; bo->host_synced = host_synced; list_init(&bo->b.users); - list_move_head(&bo->b.users, &tmp.b.users); - LIST_FOR_EACH_ENTRY(bo_user, &bo->b.users, struct wined3d_bo_user, entry) - { - bo_user->valid = false; - }
goto map; } diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c index efbd3233428..de1cf90d055 100644 --- a/dlls/wined3d/buffer.c +++ b/dlls/wined3d/buffer.c @@ -194,8 +194,11 @@ static void wined3d_buffer_gl_destroy_buffer_object(struct wined3d_buffer_gl *bu wined3d_context_gl_end_transform_feedback(context_gl); }
- buffer_gl->b.bo_user.valid = false; - list_remove(&buffer_gl->b.bo_user.entry); + if (buffer_gl->b.bo_user.valid) + { + buffer_gl->b.bo_user.valid = false; + list_remove(&buffer_gl->b.bo_user.entry); + } wined3d_context_gl_destroy_bo(context_gl, bo_gl); heap_free(bo_gl); buffer_gl->b.buffer_object = NULL; @@ -237,7 +240,6 @@ static BOOL wined3d_buffer_gl_create_buffer_object(struct wined3d_buffer_gl *buf return FALSE; }
- list_add_head(&bo->b.users, &buffer_gl->b.bo_user.entry); buffer_gl->b.buffer_object = &bo->b; buffer_invalidate_bo_range(&buffer_gl->b, 0, 0);
@@ -1127,16 +1129,13 @@ static void wined3d_buffer_set_bo(struct wined3d_buffer *buffer, struct wined3d_
LIST_FOR_EACH_ENTRY(bo_user, &prev_bo->users, struct wined3d_bo_user, entry) bo_user->valid = false; + list_init(&prev_bo->users); + assert(list_empty(&bo->users)); - list_move_head(&bo->users, &prev_bo->users);
wined3d_context_destroy_bo(context, prev_bo); heap_free(prev_bo); } - else - { - list_add_head(&bo->users, &buffer->bo_user.entry); - }
buffer->buffer_object = bo; } @@ -1516,8 +1515,6 @@ static BOOL wined3d_buffer_vk_create_buffer_object(struct wined3d_buffer_vk *buf return FALSE; }
- list_init(&buffer_vk->b.bo_user.entry); - list_add_head(&bo_vk->b.users, &buffer_vk->b.bo_user.entry); buffer_vk->b.buffer_object = &bo_vk->b; buffer_invalidate_bo_range(&buffer_vk->b, 0, 0);
@@ -1534,7 +1531,7 @@ const VkDescriptorBufferInfo *wined3d_buffer_vk_get_buffer_info(struct wined3d_b buffer_vk->buffer_info.buffer = bo->vk_buffer; buffer_vk->buffer_info.offset = bo->b.buffer_offset; buffer_vk->buffer_info.range = buffer_vk->b.resource.size; - buffer_vk->b.bo_user.valid = true; + wined3d_buffer_validate_user(&buffer_vk->b);
return &buffer_vk->buffer_info; } @@ -1570,8 +1567,11 @@ static void wined3d_buffer_vk_unload_location(struct wined3d_buffer *buffer, switch (location) { case WINED3D_LOCATION_BUFFER: - buffer->bo_user.valid = false; - list_remove(&buffer->bo_user.entry); + if (buffer->bo_user.valid) + { + buffer->bo_user.valid = false; + list_remove(&buffer->bo_user.entry); + } wined3d_context_vk_destroy_bo(context_vk, bo_vk); heap_free(bo_vk); buffer->buffer_object = NULL; diff --git a/dlls/wined3d/context_gl.c b/dlls/wined3d/context_gl.c index 03638666268..d6827c2c0de 100644 --- a/dlls/wined3d/context_gl.c +++ b/dlls/wined3d/context_gl.c @@ -2859,15 +2859,13 @@ static void *wined3d_bo_gl_map(struct wined3d_bo_gl *bo, struct wined3d_context_ if (wined3d_device_gl_create_bo(device_gl, context_gl, bo->size, bo->binding, bo->usage, bo->b.coherent, bo->flags, &tmp)) { - list_move_head(&tmp.b.users, &bo->b.users); + LIST_FOR_EACH_ENTRY(bo_user, &bo->b.users, struct wined3d_bo_user, entry) + bo_user->valid = false; + list_init(&bo->b.users); + wined3d_context_gl_destroy_bo(context_gl, bo); *bo = tmp; list_init(&bo->b.users); - list_move_head(&bo->b.users, &tmp.b.users); - LIST_FOR_EACH_ENTRY(bo_user, &bo->b.users, struct wined3d_bo_user, entry) - { - bo_user->valid = false; - }
goto map; } @@ -3153,6 +3151,8 @@ void wined3d_context_gl_destroy_bo(struct wined3d_context_gl *context_gl, struct
TRACE("context_gl %p, bo %p.\n", context_gl, bo);
+ assert(list_empty(&bo->b.users)); + if (bo->memory) { unsigned int order = bo->memory->order; @@ -5421,7 +5421,7 @@ void wined3d_context_gl_load_tex_coords(const struct wined3d_context_gl *context gl_info->gl_ops.gl.p_glTexCoordPointer(format_gl->vtx_format, format_gl->vtx_type, e->stride, get_vertex_attrib_pointer(e, state)); gl_info->gl_ops.gl.p_glEnableClientState(GL_TEXTURE_COORD_ARRAY); - state->streams[e->stream_idx].buffer->bo_user.valid = true; + wined3d_buffer_validate_user(state->streams[e->stream_idx].buffer); } else { @@ -5508,7 +5508,7 @@ static void wined3d_context_gl_load_vertex_data(struct wined3d_context_gl *conte checkGLcall("glVertexPointer(...)"); gl_info->gl_ops.gl.p_glEnableClientState(GL_VERTEX_ARRAY); checkGLcall("glEnableClientState(GL_VERTEX_ARRAY)"); - state->streams[e->stream_idx].buffer->bo_user.valid = true; + wined3d_buffer_validate_user(state->streams[e->stream_idx].buffer); }
/* Normals */ @@ -5531,7 +5531,7 @@ static void wined3d_context_gl_load_vertex_data(struct wined3d_context_gl *conte checkGLcall("glNormalPointer(...)"); gl_info->gl_ops.gl.p_glEnableClientState(GL_NORMAL_ARRAY); checkGLcall("glEnableClientState(GL_NORMAL_ARRAY)"); - state->streams[e->stream_idx].buffer->bo_user.valid = true; + wined3d_buffer_validate_user(state->streams[e->stream_idx].buffer); } else { @@ -5560,7 +5560,7 @@ static void wined3d_context_gl_load_vertex_data(struct wined3d_context_gl *conte checkGLcall("glColorPointer(4, GL_UNSIGNED_BYTE, ...)"); gl_info->gl_ops.gl.p_glEnableClientState(GL_COLOR_ARRAY); checkGLcall("glEnableClientState(GL_COLOR_ARRAY)"); - state->streams[e->stream_idx].buffer->bo_user.valid = true; + wined3d_buffer_validate_user(state->streams[e->stream_idx].buffer); } else { @@ -5624,7 +5624,7 @@ static void wined3d_context_gl_load_vertex_data(struct wined3d_context_gl *conte } gl_info->gl_ops.gl.p_glEnableClientState(GL_SECONDARY_COLOR_ARRAY_EXT); checkGLcall("glEnableClientState(GL_SECONDARY_COLOR_ARRAY_EXT)"); - state->streams[e->stream_idx].buffer->bo_user.valid = true; + wined3d_buffer_validate_user(state->streams[e->stream_idx].buffer); } else { @@ -5725,7 +5725,7 @@ static void wined3d_context_gl_load_numbered_arrays(struct wined3d_context_gl *c
format_gl = wined3d_format_gl(element->format); stream = &state->streams[element->stream_idx]; - stream->buffer->bo_user.valid = true; + wined3d_buffer_validate_user(stream->buffer);
if (gl_info->supported[ARB_INSTANCED_ARRAYS]) diff --git a/dlls/wined3d/context_vk.c b/dlls/wined3d/context_vk.c index efc18e2ff69..932d2f5aa77 100644 --- a/dlls/wined3d/context_vk.c +++ b/dlls/wined3d/context_vk.c @@ -1080,6 +1080,8 @@ void wined3d_context_vk_destroy_bo(struct wined3d_context_vk *context_vk, const
TRACE("context_vk %p, bo %p.\n", context_vk, bo);
+ assert(list_empty(&bo->b.users)); + if (bo->command_buffer_id == context_vk->current_command_buffer.id) context_vk->retired_bo_size += bo->size;
diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c index 26cae888345..502c59c1c52 100644 --- a/dlls/wined3d/state.c +++ b/dlls/wined3d/state.c @@ -4264,7 +4264,7 @@ static void indexbuffer(struct wined3d_context *context, const struct wined3d_st if (buffer->buffer_object) { GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, wined3d_bo_gl(buffer->buffer_object)->id)); - buffer->bo_user.valid = true; + wined3d_buffer_validate_user(buffer); } else { @@ -4394,7 +4394,7 @@ static void state_cb(struct wined3d_context *context, const struct wined3d_state bo_gl->id, bo_gl->b.buffer_offset + buffer_state->offset, min(buffer_state->size, buffer->resource.size - buffer_state->offset)));
- buffer->bo_user.valid = true; + wined3d_buffer_validate_user(buffer); } checkGLcall("bind constant buffers"); } @@ -4471,7 +4471,7 @@ static void state_so(struct wined3d_context *context, const struct wined3d_state size = buffer->resource.size - offset; GL_EXTCALL(glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, i, bo_gl->id, bo_gl->b.buffer_offset + offset, size)); - buffer->bo_user.valid = true; + wined3d_buffer_validate_user(buffer); } checkGLcall("bind transform feedback buffers"); } diff --git a/dlls/wined3d/texture.c b/dlls/wined3d/texture.c index 2b9d6e3a8b3..6b5d9eaf510 100644 --- a/dlls/wined3d/texture.c +++ b/dlls/wined3d/texture.c @@ -4649,8 +4649,9 @@ static void wined3d_texture_set_bo(struct wined3d_texture *texture,
LIST_FOR_EACH_ENTRY(bo_user, &prev_bo->users, struct wined3d_bo_user, entry) bo_user->valid = false; + list_init(&prev_bo->users); + assert(list_empty(&bo->users)); - list_move_head(&bo->users, &prev_bo->users);
wined3d_context_destroy_bo(context, prev_bo); heap_free(prev_bo); diff --git a/dlls/wined3d/view.c b/dlls/wined3d/view.c index fb6f5d545ba..755fd5b2648 100644 --- a/dlls/wined3d/view.c +++ b/dlls/wined3d/view.c @@ -997,9 +997,13 @@ void * CDECL wined3d_shader_resource_view_get_parent(const struct wined3d_shader void wined3d_shader_resource_view_gl_update(struct wined3d_shader_resource_view_gl *srv_gl, struct wined3d_context_gl *context_gl) { - create_buffer_view(&srv_gl->gl_view, &context_gl->c, &srv_gl->v.desc, - buffer_from_resource(srv_gl->v.resource), srv_gl->v.format); + struct wined3d_buffer *buffer = buffer_from_resource(srv_gl->v.resource); + + assert(!srv_gl->bo_user.valid); + + create_buffer_view(&srv_gl->gl_view, &context_gl->c, &srv_gl->v.desc, buffer, srv_gl->v.format); srv_gl->bo_user.valid = true; + list_add_head(&buffer->buffer_object->users, &srv_gl->bo_user.entry); }
static void wined3d_shader_resource_view_gl_cs_init(void *object) @@ -1112,12 +1116,15 @@ void wined3d_shader_resource_view_vk_update_buffer(struct wined3d_shader_resourc struct wined3d_buffer_vk *buffer_vk; VkBufferView vk_buffer_view;
+ assert(!view_vk->bo_user.valid); + buffer_vk = wined3d_buffer_vk(buffer_from_resource(resource)); wined3d_context_vk_destroy_vk_buffer_view(context_vk, view_vk->u.vk_buffer_view, view_vk->command_buffer_id); if ((vk_buffer_view = wined3d_view_vk_create_vk_buffer_view(context_vk, desc, buffer_vk, view_format_vk))) { view_vk->u.vk_buffer_view = vk_buffer_view; view_vk->bo_user.valid = true; + list_add_head(&buffer_vk->b.buffer_object->users, &view_vk->bo_user.entry); } }
@@ -1715,9 +1722,12 @@ void wined3d_unordered_access_view_copy_counter(struct wined3d_unordered_access_ void wined3d_unordered_access_view_gl_update(struct wined3d_unordered_access_view_gl *uav_gl, struct wined3d_context_gl *context_gl) { - create_buffer_view(&uav_gl->gl_view, &context_gl->c, &uav_gl->v.desc, - buffer_from_resource(uav_gl->v.resource), uav_gl->v.format); + struct wined3d_buffer *buffer = buffer_from_resource(uav_gl->v.resource); + + assert(!uav_gl->bo_user.valid); + create_buffer_view(&uav_gl->gl_view, &context_gl->c, &uav_gl->v.desc, buffer, uav_gl->v.format); uav_gl->bo_user.valid = true; + list_add_head(&buffer->buffer_object->users, &uav_gl->bo_user.entry); }
static void wined3d_unordered_access_view_gl_cs_init(void *object) @@ -2280,12 +2290,15 @@ void wined3d_unordered_access_view_vk_update(struct wined3d_unordered_access_vie struct wined3d_buffer_vk *buffer_vk; VkBufferView vk_buffer_view;
+ assert(!view_vk->bo_user.valid); + buffer_vk = wined3d_buffer_vk(buffer_from_resource(resource)); wined3d_context_vk_destroy_vk_buffer_view(context_vk, view_vk->u.vk_buffer_view, view_vk->command_buffer_id); if ((vk_buffer_view = wined3d_view_vk_create_vk_buffer_view(context_vk, desc, buffer_vk, view_format_vk))) { view_vk->u.vk_buffer_view = vk_buffer_view; view_vk->bo_user.valid = true; + list_add_head(&buffer_vk->b.buffer_object->users, &view_vk->bo_user.entry); } }
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index c2c8f4035bd..91e1571224d 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -4549,6 +4549,14 @@ static inline struct wined3d_buffer *buffer_from_resource(struct wined3d_resourc return CONTAINING_RECORD(resource, struct wined3d_buffer, resource); }
+static inline void wined3d_buffer_validate_user(struct wined3d_buffer *buffer) +{ + if (buffer->bo_user.valid) + return; + buffer->bo_user.valid = true; + list_add_head(&buffer->buffer_object->users, &buffer->bo_user.entry); +} + void wined3d_buffer_cleanup(struct wined3d_buffer *buffer) DECLSPEC_HIDDEN; void wined3d_buffer_copy(struct wined3d_buffer *dst_buffer, unsigned int dst_offset, struct wined3d_buffer *src_buffer, unsigned int src_offset, unsigned int size) DECLSPEC_HIDDEN;
From: Zebediah Figura zfigura@codeweavers.com
Mostly just because wined3d_context_vk_apply_draw_state() is large and it's nice to split it up a bit. --- dlls/wined3d/context_vk.c | 98 +++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 44 deletions(-)
diff --git a/dlls/wined3d/context_vk.c b/dlls/wined3d/context_vk.c index 932d2f5aa77..efc93b94539 100644 --- a/dlls/wined3d/context_vk.c +++ b/dlls/wined3d/context_vk.c @@ -3388,6 +3388,59 @@ static VkPipeline wined3d_context_vk_get_graphics_pipeline(struct wined3d_contex return pipeline_vk->vk_pipeline; }
+static void wined3d_context_vk_load_buffers(struct wined3d_context_vk *context_vk, + const struct wined3d_state *state, struct wined3d_buffer_vk *indirect_vk, bool indexed) +{ + const struct wined3d_vk_info *vk_info = context_vk->vk_info; + struct wined3d_buffer_vk *buffer_vk; + struct wined3d_buffer *buffer; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(state->streams); ++i) + { + if (!(buffer = state->streams[i].buffer)) + continue; + + buffer_vk = wined3d_buffer_vk(buffer); + wined3d_buffer_load(&buffer_vk->b, &context_vk->c, state); + wined3d_buffer_vk_barrier(buffer_vk, context_vk, WINED3D_BIND_VERTEX_BUFFER); + if (!buffer_vk->b.bo_user.valid) + context_invalidate_state(&context_vk->c, STATE_STREAMSRC); + } + + if (use_transform_feedback(state) && vk_info->supported[WINED3D_VK_EXT_TRANSFORM_FEEDBACK]) + { + for (i = 0; i < ARRAY_SIZE(state->stream_output); ++i) + { + if (!(buffer = state->stream_output[i].buffer)) + continue; + + buffer_vk = wined3d_buffer_vk(buffer); + wined3d_buffer_load(&buffer_vk->b, &context_vk->c, state); + wined3d_buffer_vk_barrier(buffer_vk, context_vk, WINED3D_BIND_STREAM_OUTPUT); + wined3d_buffer_invalidate_location(&buffer_vk->b, ~WINED3D_LOCATION_BUFFER); + if (!buffer_vk->b.bo_user.valid) + context_vk->update_stream_output = 1; + } + context_vk->c.transform_feedback_active = 1; + } + + if (indexed || (wined3d_context_is_graphics_state_dirty(&context_vk->c, STATE_INDEXBUFFER) && state->index_buffer)) + { + buffer_vk = wined3d_buffer_vk(state->index_buffer); + wined3d_buffer_load(&buffer_vk->b, &context_vk->c, state); + wined3d_buffer_vk_barrier(buffer_vk, context_vk, WINED3D_BIND_INDEX_BUFFER); + if (!buffer_vk->b.bo_user.valid) + context_invalidate_state(&context_vk->c, STATE_INDEXBUFFER); + } + + if (indirect_vk) + { + wined3d_buffer_load(&indirect_vk->b, &context_vk->c, state); + wined3d_buffer_vk_barrier(indirect_vk, context_vk, WINED3D_BIND_INDIRECT_BUFFER); + } +} + static void wined3d_context_vk_load_shader_resources(struct wined3d_context_vk *context_vk, const struct wined3d_state *state, enum wined3d_pipeline pipeline) { @@ -3520,7 +3573,6 @@ VkCommandBuffer wined3d_context_vk_apply_draw_state(struct wined3d_context_vk *c VkSampleCountFlagBits sample_count; VkCommandBuffer vk_command_buffer; unsigned int i, invalidate_rt = 0; - struct wined3d_buffer *buffer; uint32_t null_buffer_binding; bool invalidate_ds = false;
@@ -3608,49 +3660,7 @@ VkCommandBuffer wined3d_context_vk_apply_draw_state(struct wined3d_context_vk *c
wined3d_context_vk_load_shader_resources(context_vk, state, WINED3D_PIPELINE_GRAPHICS);
- for (i = 0; i < ARRAY_SIZE(state->streams); ++i) - { - if (!(buffer = state->streams[i].buffer)) - continue; - - buffer_vk = wined3d_buffer_vk(buffer); - wined3d_buffer_load(&buffer_vk->b, &context_vk->c, state); - wined3d_buffer_vk_barrier(buffer_vk, context_vk, WINED3D_BIND_VERTEX_BUFFER); - if (!buffer_vk->b.bo_user.valid) - context_invalidate_state(&context_vk->c, STATE_STREAMSRC); - } - - if (use_transform_feedback(state) && vk_info->supported[WINED3D_VK_EXT_TRANSFORM_FEEDBACK]) - { - for (i = 0; i < ARRAY_SIZE(state->stream_output); ++i) - { - if (!(buffer = state->stream_output[i].buffer)) - continue; - - buffer_vk = wined3d_buffer_vk(buffer); - wined3d_buffer_load(&buffer_vk->b, &context_vk->c, state); - wined3d_buffer_vk_barrier(buffer_vk, context_vk, WINED3D_BIND_STREAM_OUTPUT); - wined3d_buffer_invalidate_location(&buffer_vk->b, ~WINED3D_LOCATION_BUFFER); - if (!buffer_vk->b.bo_user.valid) - context_vk->update_stream_output = 1; - } - context_vk->c.transform_feedback_active = 1; - } - - if (indexed || (wined3d_context_is_graphics_state_dirty(&context_vk->c, STATE_INDEXBUFFER) && state->index_buffer)) - { - buffer_vk = wined3d_buffer_vk(state->index_buffer); - wined3d_buffer_load(&buffer_vk->b, &context_vk->c, state); - wined3d_buffer_vk_barrier(buffer_vk, context_vk, WINED3D_BIND_INDEX_BUFFER); - if (!buffer_vk->b.bo_user.valid) - context_invalidate_state(&context_vk->c, STATE_INDEXBUFFER); - } - - if (indirect_vk) - { - wined3d_buffer_load(&indirect_vk->b, &context_vk->c, state); - wined3d_buffer_vk_barrier(indirect_vk, context_vk, WINED3D_BIND_INDIRECT_BUFFER); - } + wined3d_context_vk_load_buffers(context_vk, state, indirect_vk, indexed);
if (!(vk_command_buffer = wined3d_context_vk_get_command_buffer(context_vk))) {
From: Zebediah Figura zfigura@codeweavers.com
For discard maps on deferred contexts, we currently blit from the upload buffer to the resource. This is necessary because command lists can be reused and submitted multiple times—we cannot simply have the buffer take ownership, as for discard maps on immediate contexts.
However, it is very common for applications to use command lists only once before throwing them away—in essence taking advantage of the feature only for the multithreading advantages it allows.
Therefore we take advantage of this pattern by trying to rename the buffer anyway. In order to do this we introduce a refcount for BO pointers. When writing to a buffer BO, we first check whether the buffer "owns" the BO—i.e. whether it has a refcount of 1—and if not, we create a new BO for the buffer and copy the contents of the old BO to the new BO. That is, we perform mostly-transparent copy-on-write.
This improves performance, and reduces CPU usage, in Assassin's Creed: Unity. --- dlls/wined3d/buffer.c | 87 ++++++++++++++++++++++++++++++---- dlls/wined3d/context_gl.c | 3 ++ dlls/wined3d/context_vk.c | 5 ++ dlls/wined3d/cs.c | 7 ++- dlls/wined3d/device.c | 1 + dlls/wined3d/view.c | 10 ++++ dlls/wined3d/wined3d_private.h | 7 +++ 7 files changed, 109 insertions(+), 11 deletions(-)
diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c index de1cf90d055..c71c1ac2a87 100644 --- a/dlls/wined3d/buffer.c +++ b/dlls/wined3d/buffer.c @@ -199,8 +199,11 @@ static void wined3d_buffer_gl_destroy_buffer_object(struct wined3d_buffer_gl *bu buffer_gl->b.bo_user.valid = false; list_remove(&buffer_gl->b.bo_user.entry); } - wined3d_context_gl_destroy_bo(context_gl, bo_gl); - heap_free(bo_gl); + if (!--bo_gl->b.refcount) + { + wined3d_context_gl_destroy_bo(context_gl, bo_gl); + heap_free(bo_gl); + } buffer_gl->b.buffer_object = NULL; }
@@ -1015,6 +1018,7 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc
if (flags & WINED3D_MAP_WRITE) { + wined3d_buffer_acquire_bo_for_write(buffer, context); wined3d_buffer_invalidate_location(buffer, ~WINED3D_LOCATION_BUFFER); buffer_invalidate_bo_range(buffer, dirty_offset, dirty_size); } @@ -1127,19 +1131,60 @@ static void wined3d_buffer_set_bo(struct wined3d_buffer *buffer, struct wined3d_ { struct wined3d_bo_user *bo_user;
+ /* The previous BO might have users in other buffers which were valid, + * and should in theory remain valid. The problem is that it's not easy + * to tell which users belong to this buffer and which don't. We could + * add a field, but for now it's easier and probably fine to just + * invalidate every user. */ LIST_FOR_EACH_ENTRY(bo_user, &prev_bo->users, struct wined3d_bo_user, entry) bo_user->valid = false; list_init(&prev_bo->users);
- assert(list_empty(&bo->users)); - - wined3d_context_destroy_bo(context, prev_bo); - heap_free(prev_bo); + if (!--prev_bo->refcount) + { + wined3d_context_destroy_bo(context, prev_bo); + heap_free(prev_bo); + } }
buffer->buffer_object = bo; }
+void wined3d_buffer_acquire_bo_for_write(struct wined3d_buffer *buffer, struct wined3d_context *context) +{ + const struct wined3d_range range = {.size = buffer->resource.size}; + struct wined3d_bo_address dst, src; + struct wined3d_bo *bo; + + if (!(bo = buffer->buffer_object)) + return; + + /* If we are the only owner of this BO, there is nothing to do. */ + if (bo->refcount == 1) + return; + + TRACE("Performing copy-on-write for BO %p.\n", bo); + + /* Grab a reference to the current BO. It's okay if this overflows, because + * the following unload will release it. */ + ++bo->refcount; + + /* Unload and re-prepare to get a new buffer. This is a bit cheap and not + * perfectly idiomatic—we should really just factor out an adapter-agnostic + * function to create a BO and then use wined3d_buffer_set_bo()—but it'll + * do nonetheless. */ + wined3d_buffer_unload_location(buffer, context, WINED3D_LOCATION_BUFFER); + wined3d_buffer_prepare_location(buffer, context, WINED3D_LOCATION_BUFFER); + + /* And finally, perform the actual copy. */ + assert(buffer->buffer_object != bo); + dst.buffer_object = buffer->buffer_object; + dst.addr = NULL; + src.buffer_object = bo; + src.addr = NULL; + wined3d_context_copy_bo_address(context, &dst, &src, 1, &range, WINED3D_MAP_WRITE | WINED3D_MAP_DISCARD); +} + void wined3d_buffer_copy_bo_address(struct wined3d_buffer *dst_buffer, struct wined3d_context *context, unsigned int dst_offset, const struct wined3d_const_bo_address *src_addr, unsigned int size) { @@ -1151,6 +1196,9 @@ void wined3d_buffer_copy_bo_address(struct wined3d_buffer *dst_buffer, struct wi if (!dst_offset && size == dst_buffer->resource.size) map_flags |= WINED3D_MAP_DISCARD;
+ if (map_flags & WINED3D_MAP_DISCARD) + wined3d_buffer_acquire_bo_for_write(dst_buffer, context); + dst_location = wined3d_buffer_get_memory(dst_buffer, context, &dst_addr); dst_addr.addr += dst_offset;
@@ -1182,8 +1230,26 @@ void wined3d_buffer_copy(struct wined3d_buffer *dst_buffer, unsigned int dst_off void wined3d_buffer_update_sub_resource(struct wined3d_buffer *buffer, struct wined3d_context *context, const struct upload_bo *upload_bo, unsigned int offset, unsigned int size) { - if (upload_bo->flags & UPLOAD_BO_RENAME_ON_UNMAP) + struct wined3d_bo *bo = upload_bo->addr.buffer_object; + uint32_t flags = upload_bo->flags; + + /* Try to take this buffer for COW. Don't take it if we've saturated the + * refcount. */ + if (!offset && size == buffer->resource.size + && bo && bo->refcount < UINT8_MAX && !(upload_bo->flags & UPLOAD_BO_RENAME_ON_UNMAP)) { + flags |= UPLOAD_BO_RENAME_ON_UNMAP; + ++bo->refcount; + } + + if (flags & UPLOAD_BO_RENAME_ON_UNMAP) + { + /* Don't increment the refcount. UPLOAD_BO_RENAME_ON_UNMAP transfers an + * existing reference. + * + * FIXME: We could degenerate RENAME to a copy + free and rely on the + * COW logic to detect this case. + */ wined3d_buffer_set_bo(buffer, context, upload_bo->addr.buffer_object); wined3d_buffer_validate_location(buffer, WINED3D_LOCATION_BUFFER); wined3d_buffer_invalidate_location(buffer, ~WINED3D_LOCATION_BUFFER); @@ -1572,8 +1638,11 @@ static void wined3d_buffer_vk_unload_location(struct wined3d_buffer *buffer, buffer->bo_user.valid = false; list_remove(&buffer->bo_user.entry); } - wined3d_context_vk_destroy_bo(context_vk, bo_vk); - heap_free(bo_vk); + if (!--bo_vk->b.refcount) + { + wined3d_context_vk_destroy_bo(context_vk, bo_vk); + heap_free(bo_vk); + } buffer->buffer_object = NULL; break;
diff --git a/dlls/wined3d/context_gl.c b/dlls/wined3d/context_gl.c index d6827c2c0de..36786cc5a29 100644 --- a/dlls/wined3d/context_gl.c +++ b/dlls/wined3d/context_gl.c @@ -4163,6 +4163,7 @@ static void context_gl_load_unordered_access_resources(struct wined3d_context_gl if (view->resource->type == WINED3D_RTYPE_BUFFER) { buffer = buffer_from_resource(view->resource); + wined3d_buffer_acquire_bo_for_write(buffer, &context_gl->c); wined3d_buffer_load_location(buffer, &context_gl->c, WINED3D_LOCATION_BUFFER); wined3d_unordered_access_view_invalidate_location(view, ~WINED3D_LOCATION_BUFFER); wined3d_context_gl_reference_buffer(context_gl, buffer); @@ -4193,6 +4194,8 @@ static void context_gl_load_stream_output_buffers(struct wined3d_context_gl *con if (!(buffer = state->stream_output[i].buffer)) continue;
+ wined3d_buffer_acquire_bo_for_write(buffer, &context_gl->c); + wined3d_buffer_load(buffer, &context_gl->c, state); wined3d_buffer_invalidate_location(buffer, ~WINED3D_LOCATION_BUFFER); wined3d_context_gl_reference_buffer(context_gl, buffer); diff --git a/dlls/wined3d/context_vk.c b/dlls/wined3d/context_vk.c index efc93b94539..859e8fb5949 100644 --- a/dlls/wined3d/context_vk.c +++ b/dlls/wined3d/context_vk.c @@ -478,6 +478,7 @@ static bool wined3d_context_vk_create_slab_bo(struct wined3d_context_vk *context *bo = slab->bo; bo->memory = NULL; bo->slab = slab; + bo->b.refcount = 1; bo->b.client_map_count = 0; bo->b.map_ptr = NULL; bo->b.buffer_offset = idx * object_size; @@ -557,6 +558,7 @@ BOOL wined3d_context_vk_create_bo(struct wined3d_context_vk *context_vk, VkDevic return FALSE; }
+ bo->b.refcount = 1; bo->b.client_map_count = 0; bo->b.map_ptr = NULL; bo->b.buffer_offset = 0; @@ -3415,6 +3417,8 @@ static void wined3d_context_vk_load_buffers(struct wined3d_context_vk *context_v if (!(buffer = state->stream_output[i].buffer)) continue;
+ wined3d_buffer_acquire_bo_for_write(buffer, &context_vk->c); + buffer_vk = wined3d_buffer_vk(buffer); wined3d_buffer_load(&buffer_vk->b, &context_vk->c, state); wined3d_buffer_vk_barrier(buffer_vk, context_vk, WINED3D_BIND_STREAM_OUTPUT); @@ -3525,6 +3529,7 @@ static void wined3d_context_vk_load_shader_resources(struct wined3d_context_vk * uav_vk = wined3d_unordered_access_view_vk(uav); if (uav->resource->type == WINED3D_RTYPE_BUFFER) { + wined3d_buffer_acquire_bo_for_write(buffer_from_resource(uav->resource), &context_vk->c); if (!uav_vk->view_vk.bo_user.valid) { wined3d_unordered_access_view_vk_update(uav_vk, context_vk); diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index e7ccd3f9125..9a4f2cdfc94 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -4494,8 +4494,11 @@ static void wined3d_command_list_destroy_object(void *object)
if ((bo = list->uploads[i].bo)) { - wined3d_context_destroy_bo(context, bo); - heap_free(bo); + if (!--bo->refcount) + { + wined3d_context_destroy_bo(context, bo); + heap_free(bo); + } } else { diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index dce2b65b6d3..d62e9f03338 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -1181,6 +1181,7 @@ bool wined3d_device_gl_create_bo(struct wined3d_device_gl *device_gl, struct win bo->b.memory_offset = bo->b.buffer_offset; bo->b.map_ptr = NULL; bo->b.client_map_count = 0; + bo->b.refcount = 1;
return true; } diff --git a/dlls/wined3d/view.c b/dlls/wined3d/view.c index 755fd5b2648..675cc186b36 100644 --- a/dlls/wined3d/view.c +++ b/dlls/wined3d/view.c @@ -1667,9 +1667,14 @@ void wined3d_unordered_access_view_gl_clear(struct wined3d_unordered_access_view get_buffer_view_range(buffer, &view_gl->v.desc, &format_gl->f, &offset, &size);
if (!offset && size == buffer->resource.size) + { wined3d_buffer_prepare_location(buffer, &context_gl->c, WINED3D_LOCATION_BUFFER); + } else + { + wined3d_buffer_acquire_bo_for_write(buffer, &context_gl->c); wined3d_buffer_load_location(buffer, &context_gl->c, WINED3D_LOCATION_BUFFER); + } wined3d_unordered_access_view_invalidate_location(&view_gl->v, ~WINED3D_LOCATION_BUFFER);
bo_gl = wined3d_bo_gl(buffer->buffer_object); @@ -2073,9 +2078,14 @@ void wined3d_unordered_access_view_vk_clear(struct wined3d_unordered_access_view
get_buffer_view_range(buffer, view_desc, view_format, &offset, &size); if (!offset && size == buffer->resource.size) + { wined3d_buffer_prepare_location(buffer, &context_vk->c, WINED3D_LOCATION_BUFFER); + } else + { + wined3d_buffer_acquire_bo_for_write(buffer, &context_vk->c); wined3d_buffer_load_location(buffer, &context_vk->c, WINED3D_LOCATION_BUFFER); + } wined3d_buffer_validate_location(buffer, WINED3D_LOCATION_BUFFER); wined3d_buffer_invalidate_location(buffer, ~WINED3D_LOCATION_BUFFER); } diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 91e1571224d..6c4f41ab653 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -1611,6 +1611,11 @@ struct wined3d_bo size_t memory_offset; unsigned int client_map_count; bool coherent; + /* Number of resources referencing this BO, used for COW tracking. + * If a resource has this BO as a location and wants to write to it, it + * needs to make a copy unless it's the only owner (refcount == 1). + * Deferred contexts may also hold a reference. */ + uint8_t refcount; };
struct wined3d_bo_gl @@ -4557,6 +4562,8 @@ static inline void wined3d_buffer_validate_user(struct wined3d_buffer *buffer) list_add_head(&buffer->buffer_object->users, &buffer->bo_user.entry); }
+void wined3d_buffer_acquire_bo_for_write(struct wined3d_buffer *buffer, + struct wined3d_context *context) DECLSPEC_HIDDEN; void wined3d_buffer_cleanup(struct wined3d_buffer *buffer) DECLSPEC_HIDDEN; void wined3d_buffer_copy(struct wined3d_buffer *dst_buffer, unsigned int dst_offset, struct wined3d_buffer *src_buffer, unsigned int src_offset, unsigned int size) DECLSPEC_HIDDEN;
This merge request was approved by Jan Sikorski.