Regardless of whether we are mapping persistently. Matches the Vulkan backend.
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- dlls/wined3d/context_gl.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/dlls/wined3d/context_gl.c b/dlls/wined3d/context_gl.c index 7b40b1de0fb..9153b7fd95f 100644 --- a/dlls/wined3d/context_gl.c +++ b/dlls/wined3d/context_gl.c @@ -2831,7 +2831,6 @@ static void *wined3d_bo_gl_map(struct wined3d_bo_gl *bo, struct wined3d_context_ const struct wined3d_gl_info *gl_info; struct wined3d_bo_user *bo_user; struct wined3d_bo_gl tmp; - uint8_t *map_ptr;
if (flags & WINED3D_MAP_NOOVERWRITE) goto map; @@ -2869,9 +2868,9 @@ map: { struct wined3d_allocator_chunk_gl *chunk_gl = wined3d_allocator_chunk_gl(bo->memory->chunk);
- if (!(map_ptr = wined3d_allocator_chunk_gl_map(chunk_gl, context_gl))) + if (!(bo->b.map_ptr = wined3d_allocator_chunk_gl_map(chunk_gl, context_gl))) ERR("Failed to map chunk.\n"); - return map_ptr; + return bo->b.map_ptr; }
gl_info = context_gl->gl_info; @@ -2906,31 +2905,32 @@ map: } gl_flags |= GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
- if ((map_ptr = GL_EXTCALL(glMapBufferRange(bo->binding, 0, bo->size, gl_flags))) && wined3d_map_persistent()) - bo->b.map_ptr = map_ptr; + bo->b.map_ptr = GL_EXTCALL(glMapBufferRange(bo->binding, 0, bo->size, gl_flags)); } else if (gl_info->supported[ARB_MAP_BUFFER_RANGE]) { - map_ptr = GL_EXTCALL(glMapBufferRange(bo->binding, 0, bo->size, wined3d_resource_gl_map_flags(bo, flags))); + bo->b.map_ptr = GL_EXTCALL(glMapBufferRange(bo->binding, 0, bo->size, wined3d_resource_gl_map_flags(bo, flags))); } else { - map_ptr = GL_EXTCALL(glMapBuffer(bo->binding, wined3d_resource_gl_legacy_map_flags(flags))); + bo->b.map_ptr = GL_EXTCALL(glMapBuffer(bo->binding, wined3d_resource_gl_legacy_map_flags(flags))); }
wined3d_context_gl_bind_bo(context_gl, bo->binding, 0); checkGLcall("Map buffer object");
- return map_ptr; + return bo->b.map_ptr; }
static void wined3d_bo_gl_unmap(struct wined3d_bo_gl *bo, struct wined3d_context_gl *context_gl) { const struct wined3d_gl_info *gl_info = context_gl->gl_info;
- if (bo->b.map_ptr) + if (wined3d_map_persistent()) return;
+ bo->b.map_ptr = NULL; + wined3d_context_gl_bind_bo(context_gl, bo->binding, bo->id);
if (bo->memory)
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- This is a second version of [1].
I've reduced the cap to 128 MB, which is enough for at least one and ideally two chunk buffers. If necessary it can probably be raised further in the future. I haven't done much testing personally—I'm trying to springboard off of others' testing, and also to get the code structure correct and upstream first and tune numbers later.
This still uses the same crude scheme for determining what buffers to keep mapped. An LRU list, as proposed in a reply to [1], would be an obviously better scheme, although that perhaps is less true with the kind of chunk allocation that we do in practice. I haven't implemented that now because it's not nearly as trivial, and not clear enough that it would offer a performance improvement. Unlike [1], though, it should be easier to change the scheme with this patch, and with the subsequent patches for 32-bit map acceleration.
[1] https://www.winehq.org/pipermail/wine-devel/2022-January/206187.html
dlls/wined3d/adapter_vk.c | 24 +++++++++---- dlls/wined3d/context_gl.c | 61 ++++++++++++++++++++-------------- dlls/wined3d/context_vk.c | 20 ++++++++--- dlls/wined3d/directx.c | 9 +++++ dlls/wined3d/wined3d_private.h | 9 +++++ 5 files changed, 87 insertions(+), 36 deletions(-)
diff --git a/dlls/wined3d/adapter_vk.c b/dlls/wined3d/adapter_vk.c index 2dcc2008f72..c2e71342bb0 100644 --- a/dlls/wined3d/adapter_vk.c +++ b/dlls/wined3d/adapter_vk.c @@ -381,7 +381,10 @@ static void wined3d_allocator_vk_destroy_chunk(struct wined3d_allocator_chunk *c vk_info = &device_vk->vk_info;
if (chunk_vk->c.map_ptr) + { VK_CALL(vkUnmapMemory(device_vk->vk_device, chunk_vk->vk_memory)); + adapter_adjust_mapped_memory(device_vk->d.adapter, -WINED3D_ALLOCATOR_CHUNK_SIZE); + } VK_CALL(vkFreeMemory(device_vk->vk_device, chunk_vk->vk_memory, NULL)); TRACE("Freed memory 0x%s.\n", wine_dbgstr_longlong(chunk_vk->vk_memory)); wined3d_allocator_chunk_cleanup(&chunk_vk->c); @@ -805,10 +808,15 @@ static void *wined3d_bo_vk_map(struct wined3d_bo_vk *bo, struct wined3d_context_ return NULL; } } - else if ((vr = VK_CALL(vkMapMemory(device_vk->vk_device, bo->vk_memory, 0, VK_WHOLE_SIZE, 0, &bo->b.map_ptr))) < 0) + else { - ERR("Failed to map memory, vr %s.\n", wined3d_debug_vkresult(vr)); - return NULL; + if ((vr = VK_CALL(vkMapMemory(device_vk->vk_device, bo->vk_memory, 0, VK_WHOLE_SIZE, 0, &bo->b.map_ptr))) < 0) + { + ERR("Failed to map memory, vr %s.\n", wined3d_debug_vkresult(vr)); + return NULL; + } + + adapter_adjust_mapped_memory(device_vk->d.adapter, bo->size); }
return bo->b.map_ptr; @@ -816,12 +824,16 @@ static void *wined3d_bo_vk_map(struct wined3d_bo_vk *bo, struct wined3d_context_
static void wined3d_bo_vk_unmap(struct wined3d_bo_vk *bo, struct wined3d_context_vk *context_vk) { + struct wined3d_device_vk *device_vk = wined3d_device_vk(context_vk->c.device); const struct wined3d_vk_info *vk_info; - struct wined3d_device_vk *device_vk; struct wined3d_bo_slab_vk *slab;
- if (wined3d_map_persistent()) + /* This may race with the client thread, but it's not a hard limit anyway. */ + if (device_vk->d.adapter->mapped_size <= MAX_PERSISTENT_MAPPED_BYTES) + { + TRACE("Not unmapping BO %p.\n", bo); return; + }
bo->b.map_ptr = NULL;
@@ -838,8 +850,8 @@ static void wined3d_bo_vk_unmap(struct wined3d_bo_vk *bo, struct wined3d_context }
vk_info = context_vk->vk_info; - device_vk = wined3d_device_vk(context_vk->c.device); VK_CALL(vkUnmapMemory(device_vk->vk_device, bo->vk_memory)); + adapter_adjust_mapped_memory(device_vk->d.adapter, -bo->size); }
static void wined3d_bo_slab_vk_lock(struct wined3d_bo_slab_vk *slab_vk, struct wined3d_context_vk *context_vk) diff --git a/dlls/wined3d/context_gl.c b/dlls/wined3d/context_gl.c index 9153b7fd95f..98458c0d44b 100644 --- a/dlls/wined3d/context_gl.c +++ b/dlls/wined3d/context_gl.c @@ -2803,6 +2803,8 @@ static void *wined3d_allocator_chunk_gl_map(struct wined3d_allocator_chunk_gl *c ERR("Failed to map chunk memory.\n"); return NULL; } + + adapter_adjust_mapped_memory(context_gl->c.device->adapter, WINED3D_ALLOCATOR_CHUNK_SIZE); }
++chunk_gl->c.map_count; @@ -2817,12 +2819,14 @@ static void wined3d_allocator_chunk_gl_unmap(struct wined3d_allocator_chunk_gl *
TRACE("chunk_gl %p, context_gl %p.\n", chunk_gl, context_gl);
- if (!--chunk_gl->c.map_count && !wined3d_map_persistent()) - { - wined3d_context_gl_bind_bo(context_gl, GL_PIXEL_UNPACK_BUFFER, chunk_gl->gl_buffer); - GL_EXTCALL(glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER)); - chunk_gl->c.map_ptr = NULL; - } + if (--chunk_gl->c.map_count) + return; + + wined3d_context_gl_bind_bo(context_gl, GL_PIXEL_UNPACK_BUFFER, chunk_gl->gl_buffer); + GL_EXTCALL(glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER)); + chunk_gl->c.map_ptr = NULL; + + adapter_adjust_mapped_memory(context_gl->c.device->adapter, -WINED3D_ALLOCATOR_CHUNK_SIZE); }
static void *wined3d_bo_gl_map(struct wined3d_bo_gl *bo, struct wined3d_context_gl *context_gl, uint32_t flags) @@ -2891,18 +2895,11 @@ map: * resources are mapped. On the other hand, we don't want to use the * access flags used to create the bo for non-persistent maps, because * that may imply dropping GL_MAP_UNSYNCHRONIZED_BIT. */ - if (wined3d_map_persistent()) - { - gl_flags = bo->flags & ~GL_CLIENT_STORAGE_BIT; - if (!(gl_flags & GL_MAP_READ_BIT)) - gl_flags |= GL_MAP_UNSYNCHRONIZED_BIT; - if (gl_flags & GL_MAP_WRITE_BIT) - gl_flags |= GL_MAP_FLUSH_EXPLICIT_BIT; - } - else - { - gl_flags = wined3d_resource_gl_map_flags(bo, flags); - } + gl_flags = bo->flags & ~GL_CLIENT_STORAGE_BIT; + if (!(gl_flags & GL_MAP_READ_BIT)) + gl_flags |= GL_MAP_UNSYNCHRONIZED_BIT; + if (gl_flags & GL_MAP_WRITE_BIT) + gl_flags |= GL_MAP_FLUSH_EXPLICIT_BIT; gl_flags |= GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
bo->b.map_ptr = GL_EXTCALL(glMapBufferRange(bo->binding, 0, bo->size, gl_flags)); @@ -2916,6 +2913,9 @@ map: bo->b.map_ptr = GL_EXTCALL(glMapBuffer(bo->binding, wined3d_resource_gl_legacy_map_flags(flags))); }
+ if (bo->b.map_ptr) + adapter_adjust_mapped_memory(device_gl->d.adapter, -bo->size); + wined3d_context_gl_bind_bo(context_gl, bo->binding, 0); checkGLcall("Map buffer object");
@@ -2926,20 +2926,30 @@ static void wined3d_bo_gl_unmap(struct wined3d_bo_gl *bo, struct wined3d_context { const struct wined3d_gl_info *gl_info = context_gl->gl_info;
- if (wined3d_map_persistent()) + if (context_gl->c.device->adapter->mapped_size <= MAX_PERSISTENT_MAPPED_BYTES) + { + TRACE("Not unmapping BO %p.\n", bo); return; + } + + if (bo->memory) + { + struct wined3d_allocator_chunk_gl *chunk_gl = wined3d_allocator_chunk_gl(bo->memory->chunk); + + wined3d_allocator_chunk_gl_unmap(chunk_gl, context_gl); + if (!chunk_gl->c.map_ptr) + bo->b.map_ptr = NULL; + return; + }
bo->b.map_ptr = NULL;
wined3d_context_gl_bind_bo(context_gl, bo->binding, bo->id); - - if (bo->memory) - wined3d_allocator_chunk_gl_unmap(wined3d_allocator_chunk_gl(bo->memory->chunk), context_gl); - else - GL_EXTCALL(glUnmapBuffer(bo->binding)); - + GL_EXTCALL(glUnmapBuffer(bo->binding)); wined3d_context_gl_bind_bo(context_gl, bo->binding, 0); checkGLcall("Unmap buffer object"); + + adapter_adjust_mapped_memory(context_gl->c.device->adapter, -bo->size); }
void *wined3d_context_gl_map_bo_address(struct wined3d_context_gl *context_gl, @@ -3104,6 +3114,7 @@ void wined3d_context_gl_destroy_bo(struct wined3d_context_gl *context_gl, struct { wined3d_context_gl_bind_bo(context_gl, bo->binding, bo->id); GL_EXTCALL(glUnmapBuffer(bo->binding)); + adapter_adjust_mapped_memory(context_gl->c.device->adapter, -bo->size); }
TRACE("Destroying GL buffer %u.\n", bo->id); diff --git a/dlls/wined3d/context_vk.c b/dlls/wined3d/context_vk.c index f3ec5c03c85..74e23bbb5e1 100644 --- a/dlls/wined3d/context_vk.c +++ b/dlls/wined3d/context_vk.c @@ -269,12 +269,17 @@ void *wined3d_allocator_chunk_vk_map(struct wined3d_allocator_chunk_vk *chunk_vk
wined3d_allocator_chunk_vk_lock(chunk_vk);
- if (!chunk_vk->c.map_ptr && (vr = VK_CALL(vkMapMemory(device_vk->vk_device, - chunk_vk->vk_memory, 0, VK_WHOLE_SIZE, 0, &chunk_vk->c.map_ptr))) < 0) + if (!chunk_vk->c.map_ptr) { - ERR("Failed to map chunk memory, vr %s.\n", wined3d_debug_vkresult(vr)); - wined3d_allocator_chunk_vk_unlock(chunk_vk); - return NULL; + if ((vr = VK_CALL(vkMapMemory(device_vk->vk_device, + chunk_vk->vk_memory, 0, VK_WHOLE_SIZE, 0, &chunk_vk->c.map_ptr))) < 0) + { + ERR("Failed to map chunk memory, vr %s.\n", wined3d_debug_vkresult(vr)); + wined3d_allocator_chunk_vk_unlock(chunk_vk); + return NULL; + } + + adapter_adjust_mapped_memory(device_vk->d.adapter, WINED3D_ALLOCATOR_CHUNK_SIZE); }
++chunk_vk->c.map_count; @@ -305,6 +310,8 @@ void wined3d_allocator_chunk_vk_unmap(struct wined3d_allocator_chunk_vk *chunk_v chunk_vk->c.map_ptr = NULL;
wined3d_allocator_chunk_vk_unlock(chunk_vk); + + adapter_adjust_mapped_memory(device_vk->d.adapter, -WINED3D_ALLOCATOR_CHUNK_SIZE); }
VkDeviceMemory wined3d_context_vk_allocate_vram_chunk_memory(struct wined3d_context_vk *context_vk, @@ -977,7 +984,10 @@ void wined3d_context_vk_destroy_bo(struct wined3d_context_vk *context_vk, const }
if (bo->b.map_ptr) + { VK_CALL(vkUnmapMemory(device_vk->vk_device, bo->vk_memory)); + adapter_adjust_mapped_memory(device_vk->d.adapter, -bo->size); + } wined3d_context_vk_destroy_vk_memory(context_vk, bo->vk_memory, bo->command_buffer_id); }
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c index 1b34d2eceaa..36ca1be5b79 100644 --- a/dlls/wined3d/directx.c +++ b/dlls/wined3d/directx.c @@ -157,6 +157,15 @@ UINT64 adapter_adjust_memory(struct wined3d_adapter *adapter, INT64 amount) return adapter->vram_bytes_used; }
+ssize_t adapter_adjust_mapped_memory(struct wined3d_adapter *adapter, ssize_t size) +{ + /* Note that this needs to be thread-safe; the Vulkan adapter may map from + * client threads. */ + ssize_t ret = InterlockedExchangeAddSizeT(&adapter->mapped_size, size) + size; + TRACE("Adjusted mapped adapter memory by %zd to %zd.\n", size, ret); + return ret; +} + void wined3d_adapter_cleanup(struct wined3d_adapter *adapter) { unsigned int output_idx; diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index ef1062ec9b4..adf1a15926f 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -3461,6 +3461,12 @@ struct wined3d_output
HRESULT wined3d_output_get_gamma_ramp(struct wined3d_output *output, struct wined3d_gamma_ramp *ramp) DECLSPEC_HIDDEN;
+#ifdef _WIN64 +#define MAX_PERSISTENT_MAPPED_BYTES SSIZE_MAX +#else +#define MAX_PERSISTENT_MAPPED_BYTES (128 * 1024 * 1024) +#endif + /* The adapter structure */ struct wined3d_adapter { @@ -3479,6 +3485,8 @@ struct wined3d_adapter void *formats; size_t format_size;
+ ssize_t mapped_size; + const struct wined3d_vertex_pipe_ops *vertex_pipe; const struct wined3d_fragment_pipe_ops *fragment_pipe; const struct wined3d_state_entry_template *misc_state_template; @@ -3558,6 +3566,7 @@ BOOL wined3d_adapter_gl_init_format_info(struct wined3d_adapter *adapter, BOOL wined3d_adapter_no3d_init_format_info(struct wined3d_adapter *adapter) DECLSPEC_HIDDEN; BOOL wined3d_adapter_vk_init_format_info(struct wined3d_adapter_vk *adapter_vk, const struct wined3d_vk_info *vk_info) DECLSPEC_HIDDEN; +ssize_t adapter_adjust_mapped_memory(struct wined3d_adapter *adapter, ssize_t size) DECLSPEC_HIDDEN; UINT64 adapter_adjust_memory(struct wined3d_adapter *adapter, INT64 amount) DECLSPEC_HIDDEN;
BOOL wined3d_caps_gl_ctx_test_viewport_subpixel_bits(struct wined3d_caps_gl_ctx *ctx) DECLSPEC_HIDDEN;
On Fri, 11 Feb 2022 at 02:03, Zebediah Figura zfigura@codeweavers.com wrote:
dlls/wined3d/adapter_vk.c | 24 +++++++++---- dlls/wined3d/context_gl.c | 61 ++++++++++++++++++++-------------- dlls/wined3d/context_vk.c | 20 ++++++++--- dlls/wined3d/directx.c | 9 +++++ dlls/wined3d/wined3d_private.h | 9 +++++ 5 files changed, 87 insertions(+), 36 deletions(-)
The subject line is probably missing a "MiB".
@@ -2916,6 +2913,9 @@ map: bo->b.map_ptr = GL_EXTCALL(glMapBuffer(bo->binding, wined3d_resource_gl_legacy_map_flags(flags))); }
- if (bo->b.map_ptr)
adapter_adjust_mapped_memory(device_gl->d.adapter, -bo->size);
Shouldn't we be increasing the mapped memory count here?
From: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- dlls/wined3d/context_gl.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/dlls/wined3d/context_gl.c b/dlls/wined3d/context_gl.c index 98458c0d44b..aae7fbdddba 100644 --- a/dlls/wined3d/context_gl.c +++ b/dlls/wined3d/context_gl.c @@ -3128,6 +3128,7 @@ static bool use_buffer_chunk_suballocation(const struct wined3d_gl_info *gl_info { switch (binding) { + case GL_ARRAY_BUFFER: case GL_ATOMIC_COUNTER_BUFFER: case GL_DRAW_INDIRECT_BUFFER: case GL_PIXEL_UNPACK_BUFFER:
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- dlls/wined3d/adapter_vk.c | 8 ++------ dlls/wined3d/wined3d_private.h | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/dlls/wined3d/adapter_vk.c b/dlls/wined3d/adapter_vk.c index c2e71342bb0..f09cd1b0fae 100644 --- a/dlls/wined3d/adapter_vk.c +++ b/dlls/wined3d/adapter_vk.c @@ -521,9 +521,7 @@ static HRESULT adapter_vk_create_device(struct wined3d *wined3d, const struct wi goto fail; }
- InitializeCriticalSection(&device_vk->allocator_cs); - if (device_vk->allocator_cs.DebugInfo != (RTL_CRITICAL_SECTION_DEBUG *)-1) - device_vk->allocator_cs.DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": wined3d_device_vk.allocator_cs"); + wined3d_lock_init(&device_vk->allocator_cs, "wined3d_device_vk.allocator_cs");
*device = &device_vk->d;
@@ -543,9 +541,7 @@ static void adapter_vk_destroy_device(struct wined3d_device *device) wined3d_device_cleanup(&device_vk->d); wined3d_allocator_cleanup(&device_vk->allocator);
- if (device_vk->allocator_cs.DebugInfo != (RTL_CRITICAL_SECTION_DEBUG *)-1) - device_vk->allocator_cs.DebugInfo->Spare[0] = 0; - DeleteCriticalSection(&device_vk->allocator_cs); + wined3d_lock_cleanup(&device_vk->allocator_cs);
VK_CALL(vkDestroyDevice(device_vk->vk_device, NULL)); heap_free(device_vk); diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index adf1a15926f..807a002b1f1 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -5659,6 +5659,21 @@ struct wined3d_shader_limits unsigned int packed_input; };
+#define wined3d_lock_init(lock, name) wined3d_lock_init_(lock, __FILE__ ": " name) +static inline void wined3d_lock_init_(CRITICAL_SECTION *lock, const char *name) +{ + InitializeCriticalSection(lock); + if (lock->DebugInfo != (RTL_CRITICAL_SECTION_DEBUG *)-1) + lock->DebugInfo->Spare[0] = (DWORD_PTR)name; +} + +static inline void wined3d_lock_cleanup(CRITICAL_SECTION *lock) +{ + if (lock->DebugInfo != (RTL_CRITICAL_SECTION_DEBUG *)-1) + lock->DebugInfo->Spare[0] = 0; + DeleteCriticalSection(lock); +} + #ifdef __GNUC__ #define PRINTF_ATTR(fmt,args) __attribute__((format (printf,fmt,args))) #else
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Currently this has no effect. Depending on whether wined3d_map_persistent() returns true, either the client thread doesn't access the map pointer outside of d3d map requests, or the BO is never unmapped. However, we'd like to be able to let NOOVERWRITE maps be accelerated while still being able to unmap arbitrary BOs at arbitrary times from the CS thread.
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- dlls/wined3d/adapter_vk.c | 11 ++++++++++ dlls/wined3d/context_gl.c | 12 +++++++++++ dlls/wined3d/context_vk.c | 2 ++ dlls/wined3d/cs.c | 38 ++++++++++++++++++++++++++++------ dlls/wined3d/device.c | 4 ++++ dlls/wined3d/wined3d_private.h | 15 ++++++++++++++ 6 files changed, 76 insertions(+), 6 deletions(-)
diff --git a/dlls/wined3d/adapter_vk.c b/dlls/wined3d/adapter_vk.c index f09cd1b0fae..a47eb1758ac 100644 --- a/dlls/wined3d/adapter_vk.c +++ b/dlls/wined3d/adapter_vk.c @@ -831,7 +831,18 @@ static void wined3d_bo_vk_unmap(struct wined3d_bo_vk *bo, struct wined3d_context return; }
+ wined3d_device_bo_map_lock(context_vk->c.device); + /* The mapping is still in use by the client (viz. for an accelerated + * NOOVERWRITE map). The client will trigger another unmap request when the + * d3d application requests to unmap the BO. */ + if (bo->b.client_map_count) + { + wined3d_device_bo_map_unlock(context_vk->c.device); + TRACE("BO %p is still in use by a client thread; not unmapping.\n", bo); + return; + } bo->b.map_ptr = NULL; + wined3d_device_bo_map_unlock(context_vk->c.device);
if ((slab = bo->slab)) { diff --git a/dlls/wined3d/context_gl.c b/dlls/wined3d/context_gl.c index aae7fbdddba..0a35f193f0f 100644 --- a/dlls/wined3d/context_gl.c +++ b/dlls/wined3d/context_gl.c @@ -2942,7 +2942,18 @@ static void wined3d_bo_gl_unmap(struct wined3d_bo_gl *bo, struct wined3d_context return; }
+ wined3d_device_bo_map_lock(context_gl->c.device); + /* The mapping is still in use by the client (viz. for an accelerated + * NOOVERWRITE map). The client will trigger another unmap request when the + * d3d application requests to unmap the BO. */ + if (bo->b.client_map_count) + { + wined3d_device_bo_map_unlock(context_gl->c.device); + TRACE("BO %p is still in use by a client thread; not unmapping.\n", bo); + return; + } bo->b.map_ptr = NULL; + wined3d_device_bo_map_unlock(context_gl->c.device);
wined3d_context_gl_bind_bo(context_gl, bo->binding, bo->id); GL_EXTCALL(glUnmapBuffer(bo->binding)); @@ -3209,6 +3220,7 @@ bool wined3d_context_gl_create_bo(struct wined3d_context_gl *context_gl, GLsizei bo->b.buffer_offset = buffer_offset; bo->b.memory_offset = bo->b.buffer_offset; bo->b.map_ptr = NULL; + bo->b.client_map_count = 0;
return true; } diff --git a/dlls/wined3d/context_vk.c b/dlls/wined3d/context_vk.c index 74e23bbb5e1..23d94fd32d2 100644 --- a/dlls/wined3d/context_vk.c +++ b/dlls/wined3d/context_vk.c @@ -463,6 +463,7 @@ static bool wined3d_context_vk_create_slab_bo(struct wined3d_context_vk *context *bo = slab->bo; bo->memory = NULL; bo->slab = slab; + bo->b.client_map_count = 0; bo->b.map_ptr = NULL; bo->b.buffer_offset = idx * object_size; bo->b.memory_offset = slab->bo.b.memory_offset + bo->b.buffer_offset; @@ -541,6 +542,7 @@ BOOL wined3d_context_vk_create_bo(struct wined3d_context_vk *context_vk, VkDevic return FALSE; }
+ bo->b.client_map_count = 0; bo->b.map_ptr = NULL; bo->b.buffer_offset = 0; bo->size = size; diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index ed5b289b8d8..5d3a2cc9076 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -22,6 +22,7 @@ #include "wined3d_private.h"
WINE_DEFAULT_DEBUG_CHANNEL(d3d); +WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); WINE_DECLARE_DEBUG_CHANNEL(d3d_sync); WINE_DECLARE_DEBUG_CHANNEL(fps);
@@ -3080,7 +3081,7 @@ static bool wined3d_cs_map_upload_bo(struct wined3d_device_context *context, str struct wined3d_client_resource *client = &resource->client; struct wined3d_device *device = context->device; struct wined3d_bo_address addr; - const struct wined3d_bo *bo; + struct wined3d_bo *bo; uint8_t *map_ptr;
if (flags & WINED3D_MAP_DISCARD) @@ -3096,13 +3097,29 @@ static bool wined3d_cs_map_upload_bo(struct wined3d_device_context *context, str addr = client->addr; }
- bo = addr.buffer_object; - map_ptr = bo ? bo->map_ptr : NULL; + map_ptr = NULL; + if ((bo = addr.buffer_object)) + { + wined3d_device_bo_map_lock(device); + if ((map_ptr = bo->map_ptr)) + ++bo->client_map_count; + wined3d_device_bo_map_unlock(device); + + if (!map_ptr) + { + /* adapter_alloc_bo() should have given us a mapped BO if we are + * discarding. */ + assert(flags & WINED3D_MAP_NOOVERWRITE); + WARN_(d3d_perf)("Not accelerating a NOOVERWRITE map because the BO is not mapped.\n"); + return false; + } + } map_ptr += (uintptr_t)addr.addr;
if (!map_ptr) { - TRACE("Sub-resource is not mapped.\n"); + assert(flags & WINED3D_MAP_NOOVERWRITE); + WARN_(d3d_perf)("Not accelerating a NOOVERWRITE map because the sub-resource has no valid address.\n"); return false; }
@@ -3138,14 +3155,23 @@ static bool wined3d_bo_address_is_null(struct wined3d_const_bo_address *addr) }
static bool wined3d_cs_unmap_upload_bo(struct wined3d_device_context *context, struct wined3d_resource *resource, - unsigned int sub_resource_idx, struct wined3d_box *box, struct upload_bo *bo) + unsigned int sub_resource_idx, struct wined3d_box *box, struct upload_bo *upload_bo) { struct wined3d_client_resource *client = &resource->client; + struct wined3d_device *device = context->device; + struct wined3d_bo *bo;
if (wined3d_bo_address_is_null(&client->mapped_upload.addr)) return false;
- *bo = client->mapped_upload; + if ((bo = client->mapped_upload.addr.buffer_object)) + { + wined3d_device_bo_map_lock(device); + --bo->client_map_count; + wined3d_device_bo_map_unlock(device); + } + + *upload_bo = client->mapped_upload; *box = client->mapped_box; memset(&client->mapped_upload, 0, sizeof(client->mapped_upload)); memset(&client->mapped_box, 0, sizeof(client->mapped_box)); diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index 09be992dd2f..4dac9220f0f 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -246,6 +246,8 @@ void wined3d_device_cleanup(struct wined3d_device *device) wine_rb_destroy(&device->depth_stencil_states, device_leftover_depth_stencil_state, NULL); wine_rb_destroy(&device->so_descs, device_free_so_desc, NULL);
+ wined3d_lock_cleanup(&device->bo_map_lock); + wined3d_decref(device->wined3d); device->wined3d = NULL; } @@ -5971,6 +5973,8 @@ HRESULT wined3d_device_init(struct wined3d_device *device, struct wined3d *wined goto err; }
+ wined3d_lock_init(&device->bo_map_lock, "wined3d_device.bo_map_lock"); + return WINED3D_OK;
err: diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 807a002b1f1..253986db18d 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -1605,10 +1605,13 @@ do { \
struct wined3d_bo { + /* client_map_count and map_ptr are accessed from both the client and CS + * threads, and protected by wined3d_device.bo_map_lock. */ struct list users; void *map_ptr; size_t buffer_offset; size_t memory_offset; + unsigned int client_map_count; bool coherent; };
@@ -3937,6 +3940,8 @@ struct wined3d_device /* Context management */ struct wined3d_context **contexts; UINT context_count; + + CRITICAL_SECTION bo_map_lock; };
void wined3d_device_cleanup(struct wined3d_device *device) DECLSPEC_HIDDEN; @@ -3958,6 +3963,16 @@ HRESULT wined3d_device_set_implicit_swapchain(struct wined3d_device *device, struct wined3d_swapchain *swapchain) DECLSPEC_HIDDEN; void wined3d_device_uninit_3d(struct wined3d_device *device) DECLSPEC_HIDDEN;
+static inline void wined3d_device_bo_map_lock(struct wined3d_device *device) +{ + EnterCriticalSection(&device->bo_map_lock); +} + +static inline void wined3d_device_bo_map_unlock(struct wined3d_device *device) +{ + LeaveCriticalSection(&device->bo_map_lock); +} + struct wined3d_device_no3d { struct wined3d_device d;
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- This is a bit unfortunate, because, but for this patch, we could get rid of wined3d_map_persistent() entirely. On the other hand, this could potentially go away if we change to a different scheme for determining when to unmap buffers.
dlls/wined3d/cs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index 5d3a2cc9076..f92c584500b 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -3131,7 +3131,10 @@ static bool wined3d_cs_map_upload_bo(struct wined3d_device_context *context, str if (bo) { map_ptr += bo->memory_offset; - if (!bo->coherent) + /* If we are not mapping all buffers persistently, use + * UPDATE_SUB_RESOURCE as a means of telling the CS thread to try + * to unmap the resource, so that we can free VA space. */ + if (!bo->coherent || !wined3d_map_persistent()) client->mapped_upload.flags |= UPLOAD_BO_UPLOAD_ON_UNMAP; } map_desc->data = resource_offset_map_pointer(resource, sub_resource_idx, map_ptr, box);
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- These haven't been my favourite patches to write exactly. In some respects they make the code nicer, in others I'm left racking my brains making sure that this actually works.
The idea here is to avoid reasoning about whether a buffer will remain mapped from the client thread, and in fact to avoid making implicit assumptions about how long a buffer will be mapped in general, sort of in line with [1]. We want to make the conditions for how long a buffer remains mapped more complex than "always" or "never".
One consequence of this is that we have to refcount mapped BOs. Previously we only mapped BOs from the CS thread, and never nested those mappings. That's not true anymore. This is fine in itself, but where it gets awkward is that we sometimes really do need BOs to be unmapped, in particular for GL draws if the BO is mapped without MAP_PERSISTENT_BIT (i.e. when ARB_buffer_storage isn't available). Thus we have to introduce another implicit agreement, and one that's not exactly easy to reason about: don't ever increase the map count from the client thread if we can't map persistently. There are multiple ways to do this in practice, some more restrictive than others, but the one I elected to depend on in this patch set (although it isn't actually there yet, because wined3d_adapter_gl_alloc_bo() isn't implemented) is "just never accelerate maps if we don't have ARB_buffer_storage". It's not my favourite solution, but I couldn't find one that was less mentally taxing.
[1] https://www.winehq.org/pipermail/wine-devel/2022-January/206325.html
dlls/wined3d/buffer.c | 3 +-- dlls/wined3d/cs.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c index 50e3e9fe2b4..902ab720396 100644 --- a/dlls/wined3d/buffer.c +++ b/dlls/wined3d/buffer.c @@ -1002,8 +1002,7 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc * but it's safe because the client thread will wait for the * map to return, thus completely serializing this call with * other client code. */ - if (wined3d_map_persistent()) - buffer->resource.client.addr = addr; + buffer->resource.client.addr = addr;
if (((DWORD_PTR)buffer->map_ptr) & (RESOURCE_ALIGNMENT - 1)) { diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index f92c584500b..fefa4fde524 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -3089,8 +3089,7 @@ static bool wined3d_cs_map_upload_bo(struct wined3d_device_context *context, str if (!device->adapter->adapter_ops->adapter_alloc_bo(device, resource, sub_resource_idx, &addr)) return false;
- if (wined3d_map_persistent()) - client->addr = addr; + client->addr = addr; } else {