Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- This is a second version of [1].
I've reduced the cap to 128 MB, which is enough for at least one and ideally two chunk buffers. If necessary it can probably be raised further in the future. I haven't done much testing personally—I'm trying to springboard off of others' testing, and also to get the code structure correct and upstream first and tune numbers later.
This still uses the same crude scheme for determining what buffers to keep mapped. An LRU list, as proposed in a reply to [1], would be an obviously better scheme, although that perhaps is less true with the kind of chunk allocation that we do in practice. I haven't implemented that now because it's not nearly as trivial, and not clear enough that it would offer a performance improvement. Unlike [1], though, it should be easier to change the scheme with this patch, and with the subsequent patches for 32-bit map acceleration.
[1] https://www.winehq.org/pipermail/wine-devel/2022-January/206187.html
dlls/wined3d/adapter_vk.c | 24 +++++++++---- dlls/wined3d/context_gl.c | 61 ++++++++++++++++++++-------------- dlls/wined3d/context_vk.c | 20 ++++++++--- dlls/wined3d/directx.c | 9 +++++ dlls/wined3d/wined3d_private.h | 9 +++++ 5 files changed, 87 insertions(+), 36 deletions(-)
diff --git a/dlls/wined3d/adapter_vk.c b/dlls/wined3d/adapter_vk.c index 2dcc2008f72..c2e71342bb0 100644 --- a/dlls/wined3d/adapter_vk.c +++ b/dlls/wined3d/adapter_vk.c @@ -381,7 +381,10 @@ static void wined3d_allocator_vk_destroy_chunk(struct wined3d_allocator_chunk *c vk_info = &device_vk->vk_info;
if (chunk_vk->c.map_ptr) + { VK_CALL(vkUnmapMemory(device_vk->vk_device, chunk_vk->vk_memory)); + adapter_adjust_mapped_memory(device_vk->d.adapter, -WINED3D_ALLOCATOR_CHUNK_SIZE); + } VK_CALL(vkFreeMemory(device_vk->vk_device, chunk_vk->vk_memory, NULL)); TRACE("Freed memory 0x%s.\n", wine_dbgstr_longlong(chunk_vk->vk_memory)); wined3d_allocator_chunk_cleanup(&chunk_vk->c); @@ -805,10 +808,15 @@ static void *wined3d_bo_vk_map(struct wined3d_bo_vk *bo, struct wined3d_context_ return NULL; } } - else if ((vr = VK_CALL(vkMapMemory(device_vk->vk_device, bo->vk_memory, 0, VK_WHOLE_SIZE, 0, &bo->b.map_ptr))) < 0) + else { - ERR("Failed to map memory, vr %s.\n", wined3d_debug_vkresult(vr)); - return NULL; + if ((vr = VK_CALL(vkMapMemory(device_vk->vk_device, bo->vk_memory, 0, VK_WHOLE_SIZE, 0, &bo->b.map_ptr))) < 0) + { + ERR("Failed to map memory, vr %s.\n", wined3d_debug_vkresult(vr)); + return NULL; + } + + adapter_adjust_mapped_memory(device_vk->d.adapter, bo->size); }
return bo->b.map_ptr; @@ -816,12 +824,16 @@ static void *wined3d_bo_vk_map(struct wined3d_bo_vk *bo, struct wined3d_context_
static void wined3d_bo_vk_unmap(struct wined3d_bo_vk *bo, struct wined3d_context_vk *context_vk) { + struct wined3d_device_vk *device_vk = wined3d_device_vk(context_vk->c.device); const struct wined3d_vk_info *vk_info; - struct wined3d_device_vk *device_vk; struct wined3d_bo_slab_vk *slab;
- if (wined3d_map_persistent()) + /* This may race with the client thread, but it's not a hard limit anyway. */ + if (device_vk->d.adapter->mapped_size <= MAX_PERSISTENT_MAPPED_BYTES) + { + TRACE("Not unmapping BO %p.\n", bo); return; + }
bo->b.map_ptr = NULL;
@@ -838,8 +850,8 @@ static void wined3d_bo_vk_unmap(struct wined3d_bo_vk *bo, struct wined3d_context }
vk_info = context_vk->vk_info; - device_vk = wined3d_device_vk(context_vk->c.device); VK_CALL(vkUnmapMemory(device_vk->vk_device, bo->vk_memory)); + adapter_adjust_mapped_memory(device_vk->d.adapter, -bo->size); }
static void wined3d_bo_slab_vk_lock(struct wined3d_bo_slab_vk *slab_vk, struct wined3d_context_vk *context_vk) diff --git a/dlls/wined3d/context_gl.c b/dlls/wined3d/context_gl.c index 9153b7fd95f..98458c0d44b 100644 --- a/dlls/wined3d/context_gl.c +++ b/dlls/wined3d/context_gl.c @@ -2803,6 +2803,8 @@ static void *wined3d_allocator_chunk_gl_map(struct wined3d_allocator_chunk_gl *c ERR("Failed to map chunk memory.\n"); return NULL; } + + adapter_adjust_mapped_memory(context_gl->c.device->adapter, WINED3D_ALLOCATOR_CHUNK_SIZE); }
++chunk_gl->c.map_count; @@ -2817,12 +2819,14 @@ static void wined3d_allocator_chunk_gl_unmap(struct wined3d_allocator_chunk_gl *
TRACE("chunk_gl %p, context_gl %p.\n", chunk_gl, context_gl);
- if (!--chunk_gl->c.map_count && !wined3d_map_persistent()) - { - wined3d_context_gl_bind_bo(context_gl, GL_PIXEL_UNPACK_BUFFER, chunk_gl->gl_buffer); - GL_EXTCALL(glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER)); - chunk_gl->c.map_ptr = NULL; - } + if (--chunk_gl->c.map_count) + return; + + wined3d_context_gl_bind_bo(context_gl, GL_PIXEL_UNPACK_BUFFER, chunk_gl->gl_buffer); + GL_EXTCALL(glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER)); + chunk_gl->c.map_ptr = NULL; + + adapter_adjust_mapped_memory(context_gl->c.device->adapter, -WINED3D_ALLOCATOR_CHUNK_SIZE); }
static void *wined3d_bo_gl_map(struct wined3d_bo_gl *bo, struct wined3d_context_gl *context_gl, uint32_t flags) @@ -2891,18 +2895,11 @@ map: * resources are mapped. On the other hand, we don't want to use the * access flags used to create the bo for non-persistent maps, because * that may imply dropping GL_MAP_UNSYNCHRONIZED_BIT. */ - if (wined3d_map_persistent()) - { - gl_flags = bo->flags & ~GL_CLIENT_STORAGE_BIT; - if (!(gl_flags & GL_MAP_READ_BIT)) - gl_flags |= GL_MAP_UNSYNCHRONIZED_BIT; - if (gl_flags & GL_MAP_WRITE_BIT) - gl_flags |= GL_MAP_FLUSH_EXPLICIT_BIT; - } - else - { - gl_flags = wined3d_resource_gl_map_flags(bo, flags); - } + gl_flags = bo->flags & ~GL_CLIENT_STORAGE_BIT; + if (!(gl_flags & GL_MAP_READ_BIT)) + gl_flags |= GL_MAP_UNSYNCHRONIZED_BIT; + if (gl_flags & GL_MAP_WRITE_BIT) + gl_flags |= GL_MAP_FLUSH_EXPLICIT_BIT; gl_flags |= GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
bo->b.map_ptr = GL_EXTCALL(glMapBufferRange(bo->binding, 0, bo->size, gl_flags)); @@ -2916,6 +2913,9 @@ map: bo->b.map_ptr = GL_EXTCALL(glMapBuffer(bo->binding, wined3d_resource_gl_legacy_map_flags(flags))); }
+ if (bo->b.map_ptr) + adapter_adjust_mapped_memory(device_gl->d.adapter, -bo->size); + wined3d_context_gl_bind_bo(context_gl, bo->binding, 0); checkGLcall("Map buffer object");
@@ -2926,20 +2926,30 @@ static void wined3d_bo_gl_unmap(struct wined3d_bo_gl *bo, struct wined3d_context { const struct wined3d_gl_info *gl_info = context_gl->gl_info;
- if (wined3d_map_persistent()) + if (context_gl->c.device->adapter->mapped_size <= MAX_PERSISTENT_MAPPED_BYTES) + { + TRACE("Not unmapping BO %p.\n", bo); return; + } + + if (bo->memory) + { + struct wined3d_allocator_chunk_gl *chunk_gl = wined3d_allocator_chunk_gl(bo->memory->chunk); + + wined3d_allocator_chunk_gl_unmap(chunk_gl, context_gl); + if (!chunk_gl->c.map_ptr) + bo->b.map_ptr = NULL; + return; + }
bo->b.map_ptr = NULL;
wined3d_context_gl_bind_bo(context_gl, bo->binding, bo->id); - - if (bo->memory) - wined3d_allocator_chunk_gl_unmap(wined3d_allocator_chunk_gl(bo->memory->chunk), context_gl); - else - GL_EXTCALL(glUnmapBuffer(bo->binding)); - + GL_EXTCALL(glUnmapBuffer(bo->binding)); wined3d_context_gl_bind_bo(context_gl, bo->binding, 0); checkGLcall("Unmap buffer object"); + + adapter_adjust_mapped_memory(context_gl->c.device->adapter, -bo->size); }
void *wined3d_context_gl_map_bo_address(struct wined3d_context_gl *context_gl, @@ -3104,6 +3114,7 @@ void wined3d_context_gl_destroy_bo(struct wined3d_context_gl *context_gl, struct { wined3d_context_gl_bind_bo(context_gl, bo->binding, bo->id); GL_EXTCALL(glUnmapBuffer(bo->binding)); + adapter_adjust_mapped_memory(context_gl->c.device->adapter, -bo->size); }
TRACE("Destroying GL buffer %u.\n", bo->id); diff --git a/dlls/wined3d/context_vk.c b/dlls/wined3d/context_vk.c index f3ec5c03c85..74e23bbb5e1 100644 --- a/dlls/wined3d/context_vk.c +++ b/dlls/wined3d/context_vk.c @@ -269,12 +269,17 @@ void *wined3d_allocator_chunk_vk_map(struct wined3d_allocator_chunk_vk *chunk_vk
wined3d_allocator_chunk_vk_lock(chunk_vk);
- if (!chunk_vk->c.map_ptr && (vr = VK_CALL(vkMapMemory(device_vk->vk_device, - chunk_vk->vk_memory, 0, VK_WHOLE_SIZE, 0, &chunk_vk->c.map_ptr))) < 0) + if (!chunk_vk->c.map_ptr) { - ERR("Failed to map chunk memory, vr %s.\n", wined3d_debug_vkresult(vr)); - wined3d_allocator_chunk_vk_unlock(chunk_vk); - return NULL; + if ((vr = VK_CALL(vkMapMemory(device_vk->vk_device, + chunk_vk->vk_memory, 0, VK_WHOLE_SIZE, 0, &chunk_vk->c.map_ptr))) < 0) + { + ERR("Failed to map chunk memory, vr %s.\n", wined3d_debug_vkresult(vr)); + wined3d_allocator_chunk_vk_unlock(chunk_vk); + return NULL; + } + + adapter_adjust_mapped_memory(device_vk->d.adapter, WINED3D_ALLOCATOR_CHUNK_SIZE); }
++chunk_vk->c.map_count; @@ -305,6 +310,8 @@ void wined3d_allocator_chunk_vk_unmap(struct wined3d_allocator_chunk_vk *chunk_v chunk_vk->c.map_ptr = NULL;
wined3d_allocator_chunk_vk_unlock(chunk_vk); + + adapter_adjust_mapped_memory(device_vk->d.adapter, -WINED3D_ALLOCATOR_CHUNK_SIZE); }
VkDeviceMemory wined3d_context_vk_allocate_vram_chunk_memory(struct wined3d_context_vk *context_vk, @@ -977,7 +984,10 @@ void wined3d_context_vk_destroy_bo(struct wined3d_context_vk *context_vk, const }
if (bo->b.map_ptr) + { VK_CALL(vkUnmapMemory(device_vk->vk_device, bo->vk_memory)); + adapter_adjust_mapped_memory(device_vk->d.adapter, -bo->size); + } wined3d_context_vk_destroy_vk_memory(context_vk, bo->vk_memory, bo->command_buffer_id); }
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c index 1b34d2eceaa..36ca1be5b79 100644 --- a/dlls/wined3d/directx.c +++ b/dlls/wined3d/directx.c @@ -157,6 +157,15 @@ UINT64 adapter_adjust_memory(struct wined3d_adapter *adapter, INT64 amount) return adapter->vram_bytes_used; }
+ssize_t adapter_adjust_mapped_memory(struct wined3d_adapter *adapter, ssize_t size) +{ + /* Note that this needs to be thread-safe; the Vulkan adapter may map from + * client threads. */ + ssize_t ret = InterlockedExchangeAddSizeT(&adapter->mapped_size, size) + size; + TRACE("Adjusted mapped adapter memory by %zd to %zd.\n", size, ret); + return ret; +} + void wined3d_adapter_cleanup(struct wined3d_adapter *adapter) { unsigned int output_idx; diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index ef1062ec9b4..adf1a15926f 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -3461,6 +3461,12 @@ struct wined3d_output
HRESULT wined3d_output_get_gamma_ramp(struct wined3d_output *output, struct wined3d_gamma_ramp *ramp) DECLSPEC_HIDDEN;
+#ifdef _WIN64 +#define MAX_PERSISTENT_MAPPED_BYTES SSIZE_MAX +#else +#define MAX_PERSISTENT_MAPPED_BYTES (128 * 1024 * 1024) +#endif + /* The adapter structure */ struct wined3d_adapter { @@ -3479,6 +3485,8 @@ struct wined3d_adapter void *formats; size_t format_size;
+ ssize_t mapped_size; + const struct wined3d_vertex_pipe_ops *vertex_pipe; const struct wined3d_fragment_pipe_ops *fragment_pipe; const struct wined3d_state_entry_template *misc_state_template; @@ -3558,6 +3566,7 @@ BOOL wined3d_adapter_gl_init_format_info(struct wined3d_adapter *adapter, BOOL wined3d_adapter_no3d_init_format_info(struct wined3d_adapter *adapter) DECLSPEC_HIDDEN; BOOL wined3d_adapter_vk_init_format_info(struct wined3d_adapter_vk *adapter_vk, const struct wined3d_vk_info *vk_info) DECLSPEC_HIDDEN; +ssize_t adapter_adjust_mapped_memory(struct wined3d_adapter *adapter, ssize_t size) DECLSPEC_HIDDEN; UINT64 adapter_adjust_memory(struct wined3d_adapter *adapter, INT64 amount) DECLSPEC_HIDDEN;
BOOL wined3d_caps_gl_ctx_test_viewport_subpixel_bits(struct wined3d_caps_gl_ctx *ctx) DECLSPEC_HIDDEN;