From: Matteo Bruni mbruni@codeweavers.com
--- Helpfully, I haven't taken note of which game this helped but it did in fact fix a game crash... --- dlls/wined3d/context_gl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dlls/wined3d/context_gl.c b/dlls/wined3d/context_gl.c index 7f7324c8b91..9322963775c 100644 --- a/dlls/wined3d/context_gl.c +++ b/dlls/wined3d/context_gl.c @@ -5675,7 +5675,6 @@ static void wined3d_context_gl_load_numbered_arrays(struct wined3d_context_gl *c for (i = 0; i < gl_info->limits.vertex_attribs; ++i) { const struct wined3d_stream_info_element *element = &stream_info->elements[i]; - const void *offset = get_vertex_attrib_pointer(element, state); const struct wined3d_stream_state *stream; const struct wined3d_format_gl *format_gl;
@@ -5733,6 +5732,7 @@ static void wined3d_context_gl_load_numbered_arrays(struct wined3d_context_gl *c
if (element->stride) { + const void *offset = get_vertex_attrib_pointer(element, state); unsigned int format_attrs = format_gl->f.attrs;
bo = wined3d_bo_gl_id(element->data.buffer_object);
From: Matteo Bruni mbruni@codeweavers.com
--- dlls/wined3d/context_gl.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/dlls/wined3d/context_gl.c b/dlls/wined3d/context_gl.c index 9322963775c..7bb2ca910d3 100644 --- a/dlls/wined3d/context_gl.c +++ b/dlls/wined3d/context_gl.c @@ -2663,8 +2663,6 @@ static void wined3d_context_gl_cleanup_resources(struct wined3d_context_gl *cont wined3d_device_gl_free_memory(device_gl, r->block); if (i != --count) *r = blocks[count]; - else - ++i; } device_gl->retired_block_count = count; }
From: Matteo Bruni mbruni@codeweavers.com
--- dlls/wined3d/device.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index 1f9774767ba..34b3a4bd6fe 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -5848,6 +5848,8 @@ HRESULT CDECL wined3d_device_reset(struct wined3d_device *device, device_init_swapchain_state(device, swapchain); if (wined3d_settings.logo) device_load_logo(device, wined3d_settings.logo); + + hr = device->adapter->adapter_ops->adapter_init_3d(device); } else { @@ -5857,12 +5859,8 @@ HRESULT CDECL wined3d_device_reset(struct wined3d_device *device, wined3d_device_context_set_depth_stencil_view(context, view); }
- if (reset_state) - hr = device->adapter->adapter_ops->adapter_init_3d(device); - - /* All done. There is no need to reload resources or shaders, this will happen automatically on the - * first use - */ + /* All done. There is no need to reload resources or shaders, this will + * happen automatically on the first use. */ return hr; }
From: Matteo Bruni mbruni@codeweavers.com
Acquiring a GL context might be problematic during teardown (e.g. the CS thread might end up running this code after wined3d_device_uninit_3d() has already completed and device->swapchains has become NULL). --- dlls/wined3d/context_gl.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-)
diff --git a/dlls/wined3d/context_gl.c b/dlls/wined3d/context_gl.c index 7bb2ca910d3..b1f217b4e50 100644 --- a/dlls/wined3d/context_gl.c +++ b/dlls/wined3d/context_gl.c @@ -1391,23 +1391,7 @@ static void wined3d_context_gl_cleanup(struct wined3d_context_gl *context_gl)
if (context_gl->valid) { - /* If we're here because we're switching away from a previously - * destroyed context, acquiring a context in order to submit a fence - * is problematic. In particular, we'd end up back here again in the - * process of switching to the newly acquired context. - * - * If fences aren't supported there should be nothing to wait for - * anyway, so just do nothing in that case. */ - if (context_gl->c.destroyed) - { - gl_info->gl_ops.gl.p_glFinish(); - } - else if (context_gl->c.d3d_info->fences) - { - wined3d_context_gl_submit_command_fence(context_gl); - wined3d_context_gl_wait_command_fence(context_gl, - wined3d_device_gl(context_gl->c.device)->current_fence_id - 1); - } + gl_info->gl_ops.gl.p_glFinish();
if (context_gl->dummy_arbfp_prog) GL_EXTCALL(glDeleteProgramsARB(1, &context_gl->dummy_arbfp_prog));
From: Matteo Bruni mbruni@codeweavers.com
--- Originally written to avoid triggering a Mesa slowpath introduced by Mesa commit e7f3a8d6959c74f63c877dd8776fe519d54f946f. It seems to me like this would be a good idea for more reasons than just that though. --- dlls/wined3d/adapter_gl.c | 2 +- dlls/wined3d/context_gl.c | 26 ++++++++---- dlls/wined3d/device.c | 42 ++++++++++++++++--- dlls/wined3d/swapchain.c | 76 +++++----------------------------- dlls/wined3d/wined3d_private.h | 13 ++---- 5 files changed, 70 insertions(+), 89 deletions(-)
diff --git a/dlls/wined3d/adapter_gl.c b/dlls/wined3d/adapter_gl.c index 60a168b5cef..0c03fcf1739 100644 --- a/dlls/wined3d/adapter_gl.c +++ b/dlls/wined3d/adapter_gl.c @@ -4567,7 +4567,7 @@ static HRESULT adapter_gl_init_3d(struct wined3d_device *device)
wined3d_cs_init_object(device->cs, wined3d_device_gl_create_primary_opengl_context_cs, wined3d_device_gl(device)); wined3d_cs_finish(device->cs, WINED3D_CS_QUEUE_DEFAULT); - if (!wined3d_swapchain_gl(device->swapchains[0])->context_count) + if (!device->blitter) return E_FAIL;
return WINED3D_OK; diff --git a/dlls/wined3d/context_gl.c b/dlls/wined3d/context_gl.c index b1f217b4e50..187d7c2ce59 100644 --- a/dlls/wined3d/context_gl.c +++ b/dlls/wined3d/context_gl.c @@ -1258,9 +1258,11 @@ success:
static BOOL wined3d_context_gl_set_gl_context(struct wined3d_context_gl *context_gl) { - struct wined3d_swapchain_gl *swapchain_gl = wined3d_swapchain_gl(context_gl->c.swapchain); + struct wined3d_device_gl *device_gl = wined3d_device_gl(context_gl->c.device); BOOL backup = FALSE;
+ TRACE("context_gl %p.\n", context_gl); + if (!wined3d_context_gl_set_pixel_format(context_gl)) { WARN("Failed to set pixel format %d on device context %p.\n", @@ -1275,23 +1277,20 @@ static BOOL wined3d_context_gl_set_gl_context(struct wined3d_context_gl *context context_gl->valid = 0; WARN("Trying fallback to the backup window.\n");
- /* FIXME: If the context is destroyed it's no longer associated with - * a swapchain, so we can't use the swapchain to get a backup dc. To - * make this work windowless contexts would need to be handled by the - * device. */ - if (context_gl->c.destroyed || !swapchain_gl) + if (context_gl->c.destroyed) { FIXME("Unable to get backup dc for destroyed context %p.\n", context_gl); wined3d_context_gl_set_current(NULL); return FALSE; }
- if (!(context_gl->dc = wined3d_swapchain_gl_get_backup_dc(swapchain_gl))) + if (!(context_gl->dc = wined3d_device_gl_get_backup_dc(device_gl))) { wined3d_context_gl_set_current(NULL); return FALSE; }
+ TRACE("Using backup DC %p.\n", context_gl->dc); context_gl->dc_is_private = TRUE; context_gl->dc_has_format = FALSE;
@@ -1369,6 +1368,8 @@ static void wined3d_context_gl_cleanup(struct wined3d_context_gl *context_gl) HDC restore_dc; unsigned int i;
+ TRACE("context_gl %p.\n", context_gl); + restore_ctx = wglGetCurrentContext(); restore_dc = wglGetCurrentDC();
@@ -2016,7 +2017,7 @@ static BOOL wined3d_context_gl_create_wgl_ctx(struct wined3d_context_gl *context context_gl->pixel_format, context_gl->dc);
wined3d_release_dc(context_gl->window, context_gl->dc); - if (!(context_gl->dc = wined3d_swapchain_gl_get_backup_dc(swapchain_gl))) + if (!(context_gl->dc = wined3d_device_gl_get_backup_dc(wined3d_device_gl(device)))) { ERR("Failed to retrieve the backup device context.\n"); return FALSE; @@ -2093,7 +2094,7 @@ HRESULT wined3d_context_gl_init(struct wined3d_context_gl *context_gl, struct wi
if (!context_gl->dc) { - if (!(context_gl->dc = wined3d_swapchain_gl_get_backup_dc(swapchain_gl))) + if (!(context_gl->dc = wined3d_device_gl_get_backup_dc(wined3d_device_gl(device)))) { ERR("Failed to retrieve a device context.\n"); return E_FAIL; @@ -2338,6 +2339,7 @@ void wined3d_context_gl_destroy(struct wined3d_context_gl *context_gl) context_gl->c.destroy_delayed = 1; /* FIXME: Get rid of a pointer to swapchain from wined3d_context. */ context_gl->c.swapchain = NULL; + context_gl->c.device = NULL; return; }
@@ -4467,6 +4469,12 @@ static void wined3d_context_gl_activate(struct wined3d_context_gl *context_gl, struct wined3d_texture *texture, unsigned int sub_resource_idx) { wined3d_context_gl_enter(context_gl); + if (texture && texture->swapchain && texture->swapchain != context_gl->c.swapchain) + { + TRACE("Switching context_gl %p from swapchain %p to swapchain %p.\n", + context_gl, context_gl->c.swapchain, texture->swapchain); + context_gl->c.swapchain = texture->swapchain; + } wined3d_context_gl_update_window(context_gl); wined3d_context_gl_setup_target(context_gl, texture, sub_resource_idx); if (!context_gl->valid) diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index 34b3a4bd6fe..6d9c56c9ace 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -1187,7 +1187,6 @@ bool wined3d_device_gl_create_bo(struct wined3d_device_gl *device_gl, struct win void wined3d_device_gl_delete_opengl_contexts_cs(void *object) { struct wined3d_device_gl *device_gl = object; - struct wined3d_swapchain_gl *swapchain_gl; struct wined3d_context_gl *context_gl; struct wined3d_context *context; struct wined3d_device *device; @@ -1219,11 +1218,14 @@ void wined3d_device_gl_delete_opengl_contexts_cs(void *object) context_release(context);
while (device->context_count) + wined3d_context_gl_destroy(wined3d_context_gl(device->contexts[0])); + + if (device_gl->backup_dc) { - if ((swapchain_gl = wined3d_swapchain_gl(device->contexts[0]->swapchain))) - wined3d_swapchain_gl_destroy_contexts(swapchain_gl); - else - wined3d_context_gl_destroy(wined3d_context_gl(device->contexts[0])); + TRACE("Destroying backup wined3d window %p, dc %p.\n", device_gl->backup_wnd, device_gl->backup_dc); + + wined3d_release_dc(device_gl->backup_wnd, device_gl->backup_dc); + DestroyWindow(device_gl->backup_wnd); } }
@@ -1252,6 +1254,7 @@ void wined3d_device_gl_create_primary_opengl_context_cs(void *object) { WARN("Failed to initialise allocator.\n"); context_release(context); + wined3d_context_gl_destroy(wined3d_context_gl(device->contexts[0])); return; }
@@ -1261,6 +1264,7 @@ void wined3d_device_gl_create_primary_opengl_context_cs(void *object) ERR("Failed to allocate shader private data, hr %#x.\n", hr); wined3d_allocator_cleanup(&device_gl->allocator); context_release(context); + wined3d_context_gl_destroy(wined3d_context_gl(device->contexts[0])); return; }
@@ -1270,6 +1274,7 @@ void wined3d_device_gl_create_primary_opengl_context_cs(void *object) device->shader_backend->shader_free_private(device, NULL); wined3d_allocator_cleanup(&device_gl->allocator); context_release(context); + wined3d_context_gl_destroy(wined3d_context_gl(device->contexts[0])); return; }
@@ -5913,6 +5918,33 @@ void CDECL wined3d_device_get_gamma_ramp(const struct wined3d_device *device, wined3d_swapchain_get_gamma_ramp(swapchain, ramp); }
+HDC wined3d_device_gl_get_backup_dc(struct wined3d_device_gl *device_gl) +{ + TRACE("device_gl %p.\n", device_gl); + + if (!device_gl->backup_dc) + { + TRACE("Creating the backup window for device %p.\n", device_gl); + + if (!(device_gl->backup_wnd = CreateWindowA(WINED3D_OPENGL_WINDOW_CLASS_NAME, "WineD3D fake window", + WS_OVERLAPPEDWINDOW, 10, 10, 10, 10, NULL, NULL, NULL, NULL))) + { + ERR("Failed to create a window.\n"); + return NULL; + } + + if (!(device_gl->backup_dc = GetDC(device_gl->backup_wnd))) + { + ERR("Failed to get a DC.\n"); + DestroyWindow(device_gl->backup_wnd); + device_gl->backup_wnd = NULL; + return NULL; + } + } + + return device_gl->backup_dc; +} + void device_resource_add(struct wined3d_device *device, struct wined3d_resource *resource) { TRACE("device %p, resource %p.\n", device, resource); diff --git a/dlls/wined3d/swapchain.c b/dlls/wined3d/swapchain.c index 346fd1e6497..53f075785cb 100644 --- a/dlls/wined3d/swapchain.c +++ b/dlls/wined3d/swapchain.c @@ -23,6 +23,7 @@ #include "wined3d_private.h"
WINE_DEFAULT_DEBUG_CHANNEL(d3d); +WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
void wined3d_swapchain_cleanup(struct wined3d_swapchain *swapchain) { @@ -87,7 +88,6 @@ void wined3d_swapchain_cleanup(struct wined3d_swapchain *swapchain)
static void wined3d_swapchain_gl_destroy_object(void *object) { - wined3d_swapchain_gl_destroy_contexts(object); }
void wined3d_swapchain_gl_cleanup(struct wined3d_swapchain_gl *swapchain_gl) @@ -98,14 +98,6 @@ void wined3d_swapchain_gl_cleanup(struct wined3d_swapchain_gl *swapchain_gl)
wined3d_cs_destroy_object(cs, wined3d_swapchain_gl_destroy_object, swapchain_gl); wined3d_cs_finish(cs, WINED3D_CS_QUEUE_DEFAULT); - - if (swapchain_gl->backup_dc) - { - TRACE("Destroying backup wined3d window %p, dc %p.\n", swapchain_gl->backup_wnd, swapchain_gl->backup_dc); - - wined3d_release_dc(swapchain_gl->backup_wnd, swapchain_gl->backup_dc); - DestroyWindow(swapchain_gl->backup_wnd); - } }
static void wined3d_swapchain_vk_destroy_vulkan_swapchain(struct wined3d_swapchain_vk *swapchain_vk) @@ -591,7 +583,6 @@ static bool swapchain_present_is_partial_copy(struct wined3d_swapchain *swapchai static void swapchain_gl_present(struct wined3d_swapchain *swapchain, const RECT *src_rect, const RECT *dst_rect, unsigned int swap_interval, DWORD flags) { - struct wined3d_swapchain_gl *swapchain_gl = wined3d_swapchain_gl(swapchain); struct wined3d_texture *back_buffer = swapchain->back_buffers[0]; const struct wined3d_pixel_format *pixel_format; const struct wined3d_gl_info *gl_info; @@ -610,7 +601,8 @@ static void swapchain_gl_present(struct wined3d_swapchain *swapchain, TRACE("Presenting DC %p.\n", context_gl->dc);
pixel_format = &wined3d_adapter_gl(swapchain->device->adapter)->pixel_formats[context_gl->pixel_format]; - if (context_gl->dc == swapchain_gl->backup_dc || (pixel_format->swap_method != WGL_SWAP_COPY_ARB + if (context_gl->dc == wined3d_device_gl(swapchain->device)->backup_dc + || (pixel_format->swap_method != WGL_SWAP_COPY_ARB && swapchain_present_is_partial_copy(swapchain, dst_rect))) { swapchain_blit_gdi(swapchain, context, src_rect, dst_rect); @@ -626,8 +618,11 @@ static void swapchain_gl_present(struct wined3d_swapchain *swapchain, if (wined3d_settings.offscreen_rendering_mode == ORM_FBO) swapchain_blit(swapchain, context, src_rect, dst_rect);
- if (swapchain_gl->context_count > 1) + if (swapchain->device->context_count > 1) + { + WARN_(d3d_perf)("Multiple contexts, calling glFinish() to enforce ordering.\n"); gl_info->gl_ops.gl.p_glFinish(); + }
/* call wglSwapBuffers through the gl table to avoid confusing the Steam overlay */ gl_info->gl_ops.wgl.p_wglSwapBuffers(context_gl->dc); @@ -1799,74 +1794,25 @@ static struct wined3d_context_gl *wined3d_swapchain_gl_create_context(struct win
context_release(&context_gl->c);
- if (!wined3d_array_reserve((void **)&swapchain_gl->contexts, &swapchain_gl->contexts_size, - swapchain_gl->context_count + 1, sizeof(*swapchain_gl->contexts))) - { - ERR("Failed to allocate new context array memory.\n"); - wined3d_context_gl_destroy(context_gl); - return NULL; - } - swapchain_gl->contexts[swapchain_gl->context_count++] = context_gl; - return context_gl; }
-void wined3d_swapchain_gl_destroy_contexts(struct wined3d_swapchain_gl *swapchain_gl) -{ - unsigned int i; - - TRACE("swapchain_gl %p.\n", swapchain_gl); - - for (i = 0; i < swapchain_gl->context_count; ++i) - { - wined3d_context_gl_destroy(swapchain_gl->contexts[i]); - } - heap_free(swapchain_gl->contexts); - swapchain_gl->contexts_size = 0; - swapchain_gl->context_count = 0; - swapchain_gl->contexts = NULL; -} - struct wined3d_context_gl *wined3d_swapchain_gl_get_context(struct wined3d_swapchain_gl *swapchain_gl) { + struct wined3d_device *device = swapchain_gl->s.device; DWORD tid = GetCurrentThreadId(); unsigned int i;
- for (i = 0; i < swapchain_gl->context_count; ++i) + for (i = 0; i < device->context_count; ++i) { - if (swapchain_gl->contexts[i]->tid == tid) - return swapchain_gl->contexts[i]; + if (wined3d_context_gl(device->contexts[i])->tid == tid) + return wined3d_context_gl(device->contexts[i]); }
/* Create a new context for the thread. */ return wined3d_swapchain_gl_create_context(swapchain_gl); }
-HDC wined3d_swapchain_gl_get_backup_dc(struct wined3d_swapchain_gl *swapchain_gl) -{ - if (!swapchain_gl->backup_dc) - { - TRACE("Creating the backup window for swapchain %p.\n", swapchain_gl); - - if (!(swapchain_gl->backup_wnd = CreateWindowA(WINED3D_OPENGL_WINDOW_CLASS_NAME, "WineD3D fake window", - WS_OVERLAPPEDWINDOW, 10, 10, 10, 10, NULL, NULL, NULL, NULL))) - { - ERR("Failed to create a window.\n"); - return NULL; - } - - if (!(swapchain_gl->backup_dc = GetDC(swapchain_gl->backup_wnd))) - { - ERR("Failed to get a DC.\n"); - DestroyWindow(swapchain_gl->backup_wnd); - swapchain_gl->backup_wnd = NULL; - return NULL; - } - } - - return swapchain_gl->backup_dc; -} - void swapchain_update_draw_bindings(struct wined3d_swapchain *swapchain) { UINT i; diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index b7d3b579ed2..fee803eef8e 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -4294,6 +4294,9 @@ struct wined3d_device_gl } *retired_blocks; SIZE_T retired_blocks_size; SIZE_T retired_block_count; + + HWND backup_wnd; + HDC backup_dc; };
static inline struct wined3d_device_gl *wined3d_device_gl(struct wined3d_device *device) @@ -4331,6 +4334,7 @@ bool wined3d_device_gl_create_bo(struct wined3d_device_gl *device_gl, GLenum usage, bool coherent, GLbitfield flags, struct wined3d_bo_gl *bo) DECLSPEC_HIDDEN; void wined3d_device_gl_create_primary_opengl_context_cs(void *object) DECLSPEC_HIDDEN; void wined3d_device_gl_delete_opengl_contexts_cs(void *object) DECLSPEC_HIDDEN; +HDC wined3d_device_gl_get_backup_dc(struct wined3d_device_gl *device_gl) DECLSPEC_HIDDEN; GLbitfield wined3d_device_gl_get_memory_type_flags(unsigned int memory_type_idx) DECLSPEC_HIDDEN;
static inline float wined3d_alpha_ref(const struct wined3d_state *state) @@ -5704,13 +5708,6 @@ HRESULT wined3d_swapchain_no3d_init(struct wined3d_swapchain *swapchain_no3d, struct wined3d_swapchain_gl { struct wined3d_swapchain s; - - struct wined3d_context_gl **contexts; - SIZE_T contexts_size; - SIZE_T context_count; - - HDC backup_dc; - HWND backup_wnd; };
static inline struct wined3d_swapchain_gl *wined3d_swapchain_gl(struct wined3d_swapchain *swapchain) @@ -5719,8 +5716,6 @@ static inline struct wined3d_swapchain_gl *wined3d_swapchain_gl(struct wined3d_s }
void wined3d_swapchain_gl_cleanup(struct wined3d_swapchain_gl *swapchain_gl) DECLSPEC_HIDDEN; -void wined3d_swapchain_gl_destroy_contexts(struct wined3d_swapchain_gl *swapchain_gl) DECLSPEC_HIDDEN; -HDC wined3d_swapchain_gl_get_backup_dc(struct wined3d_swapchain_gl *swapchain_gl) DECLSPEC_HIDDEN; struct wined3d_context_gl *wined3d_swapchain_gl_get_context(struct wined3d_swapchain_gl *swapchain_gl) DECLSPEC_HIDDEN; HRESULT wined3d_swapchain_gl_init(struct wined3d_swapchain_gl *swapchain_gl, struct wined3d_device *device, struct wined3d_swapchain_desc *desc,
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=126505
Your paranoid android.
=== debian11 (32 bit report) ===
d3d8: stateblock: Timeout visual: Timeout
d3d9: d3d9ex: Timeout device: Timeout stateblock: Timeout visual: Timeout
d3dcompiler_43: asm: Timeout blob: Timeout hlsl_d3d11: Timeout hlsl_d3d9: Timeout reflection: Timeout
d3dcompiler_46: asm: Timeout blob: Timeout hlsl_d3d11: Timeout hlsl_d3d9: Timeout reflection: Timeout
d3dcompiler_47: asm: Timeout blob: Timeout hlsl_d3d11: Timeout hlsl_d3d9: Timeout reflection: Timeout
Report validation errors: d3drm: Timeout
=== debian11 (build log) ===
WineRunWineTest.pl:error: The task timed out
From patch 1/5:
Helpfully, I haven't taken note of which game this helped but it did
in fact fix a game crash...
I remember discussing this change before... I think this was for FFXIV. While the change is fine in terms of reducing the scope of "offset" and avoiding a calculation we may end up not needing, it wasn't clear why this would avoid a crash, and that seems somewhat concerning.
From patch 4/5:
Acquiring a GL context might be problematic during teardown (e.g. the CS thread might end up running this code after wined3d_device_uninit_3d() has already completed and device->swapchains has become NULL).
This too is a patch I've seen before. From last time:
On Wed, 9 Feb 2022 at 20:25, Matteo Bruni matteo.mystral@gmail.com wrote:
On Wed, Feb 9, 2022 at 7:44 PM Henri Verbeet hverbeet@gmail.com wrote:
I suppose this is probably fine in practice, but glFinish() always makes me a bit uncomfortable; I'd prefer just checking for the availability of fences, like in 5/5. Taking that one step further, if fences aren't supported, I don't think there's anything we would need to wait for in the first place; we can probably just avoid the glFinish() in that case as well.
Good point, I'll give that a try.
From patch 5/5:
Originally written to avoid triggering a Mesa slowpath introduced by Mesa commit e7f3a8d6959c74f63c877dd8776fe519d54f946f. It seems to me like this would be a good idea for more reasons than just that though.
It may be worth mentioning those reasons then. :)
For reference, from last time:
2020-12-03 18:52:45 @henri Mystral: 197149 caught my eye; do we actually hit that? 2020-12-03 18:56:47 @henri looking at https://cgit.freedesktop.org/mesa/mesa/commit/?id=e7f3a8d6959c74f63c877dd8776fe519d54f946f my guess would be the issue would mostly go away if we did event queries the Vulkan way 2020-12-03 18:57:03 @henri (i.e., wined3d_query_event_vk_issue()/wined3d_query_event_vk_poll()) 2020-12-03 20:31:10 @Mystral I don't know if it matters with the new event queries stuff but it did destroy perf with my branch 2020-12-03 20:32:38 @Mystral IIUC it flushes all the pending commands every time you FenceSync 2020-12-03 20:34:03 @Mystral if there are more than 1 contexts around, which is usually the case with current Wine and d3d11
From patch 1/5:
Helpfully, I haven't taken note of which game this helped but it did in fact fix a game crash...
I remember discussing this change before... I think this was for FFXIV. While the change is fine in terms of reducing the scope of "offset" and avoiding a calculation we may end up not needing, it wasn't clear why this would avoid a crash, and that seems somewhat concerning.
We also discussed it further, it did not seem like a huge mystery that it fixes a crash. get_vertex_attrib_pointer() dereferences a buffer object that could have been freed unless we make sure to only access it when the element is in use AND element->stride is not 0.
Quoting the relevant chat:
``` (11:08:52 PM) Mystral: I think it's because sometimes there are stray, unused buffers in the stream_info structure (11:12:59 PM) Mystral: I haven't checked but I guess element->stride is 0 or something (11:13:05 PM) henri: oh, that probably makes sense, yeah (11:13:43 PM) henri: well, stream_info->use_map in particular I'd think (11:14:03 PM) Mystral: or that (11:14:37 PM) henri: e.g. wined3d_stream_info_from_declaration() only updates used elements ```
From patch 4/5:
Acquiring a GL context might be problematic during teardown (e.g. the CS thread might end up running this code after wined3d_device_uninit_3d() has already completed and device->swapchains has become NULL).
This too is a patch I've seen before. From last time:
On Wed, 9 Feb 2022 at 20:25, Matteo Bruni [matteo.mystral@gmail.com](mailto:matteo.mystral@gmail.com) wrote:
On Wed, Feb 9, 2022 at 7:44 PM Henri Verbeet [hverbeet@gmail.com](mailto:hverbeet@gmail.com) wrote:
I suppose this is probably fine in practice, but glFinish() always makes me a bit uncomfortable; I'd prefer just checking for the availability of fences, like in 5/5. Taking that one step further, if fences aren't supported, I don't think there's anything we would need to wait for in the first place; we can probably just avoid the glFinish() in that case as well.
Good point, I'll give that a try.
Not quite. There was a follow up to that which became commit ab72e336411a17e0108eb0fbc927e2c1ffb98978 and fixed that specific case.
It turns out that there is at least one more case where the same code is a problem, i.e. what I mention in the commit message. To explain it further, in specific circumstances wined3d_context_gl_wait_command_fence() attempts to acquire a different context from the one that's current at that time and, if that happens after wined3d_device_uninit_3d() has completed, that will end up accessing a NULL device->swapchain[0].
I guess we could potentially complicate the code further to address this case but honestly I don't see the point. Especially if we get rid of multiple contexts in the first place (i.e. the next patch).
From patch 5/5:
Originally written to avoid triggering a Mesa slowpath introduced by Mesa commit e7f3a8d6959c74f63c877dd8776fe519d54f946f. It seems to me like this would be a good idea for more reasons than just that though.
It may be worth mentioning those reasons then.
I guess. Avoiding multiple contexts in the most common case (which is, CSMT enabled) avoids a lot of unnecessary complication, like the context reacquire thing for event queries. Also it should fix bug 43773 (see https://bugs.winehq.org/show_bug.cgi?id=43773#c3 in particular). I suspect there are more bugs due to the same root cause (I seem to have a vague recollection of at least another application incurring the same issue but as usual I didn't keep notes, or I can't find them anyway :/)
For reference, from last time:
2020-12-03 18:52:45 @henri Mystral: 197149 caught my eye; do we actually hit that? 2020-12-03 18:56:47 @henri looking at https://cgit.freedesktop.org/mesa/mesa/commit/?id=e7f3a8d6959c74f63c877dd8776fe519d54f946f my guess would be the issue would mostly go away if we did event queries the Vulkan way 2020-12-03 18:57:03 @henri (i.e., wined3d_query_event_vk_issue()/wined3d_query_event_vk_poll()) 2020-12-03 20:31:10 @Mystral I don't know if it matters with the new event queries stuff but it did destroy perf with my branch 2020-12-03 20:32:38 @Mystral IIUC it flushes all the pending commands every time you FenceSync 2020-12-03 20:34:03 @Mystral if there are more than 1 contexts around, which is usually the case with current Wine and d3d11
Yeah, we do hit it and it was bad last time I checked (which was with upstream Wine as of a few months ago IIRC). I don't think I followed up with the event queries rework, as I understand it it wouldn't solve the issue and there seem to be more reason than just that to remove the per-swapchain constraint to our GL contexts.
I can put some of this discussion into the relevant commit messages, if that's useful.
From patch 5/5:
Originally written to avoid triggering a Mesa slowpath introduced by Mesa commit e7f3a8d6959c74f63c877dd8776fe519d54f946f. It seems to me like this would be a good idea for more reasons than just that though.
It may be worth mentioning those reasons then.
I guess. Avoiding multiple contexts in the most common case (which is, CSMT enabled) avoids a lot of unnecessary complication, like the context reacquire thing for event queries. Also it should fix bug 43773 (see https://bugs.winehq.org/show_bug.cgi?id=43773#c3 in particular). I suspect there are more bugs due to the same root cause (I seem to have a vague recollection of at least another application incurring the same issue but as usual I didn't keep notes, or I can't find them anyway :/)
For reference, from last time:
2020-12-03 18:52:45 @henri Mystral: 197149 caught my eye; do we actually hit that? 2020-12-03 18:56:47 @henri looking at https://cgit.freedesktop.org/mesa/mesa/commit/?id=e7f3a8d6959c74f63c877dd8776fe519d54f946f my guess would be the issue would mostly go away if we did event queries the Vulkan way 2020-12-03 18:57:03 @henri (i.e., wined3d_query_event_vk_issue()/wined3d_query_event_vk_poll()) 2020-12-03 20:31:10 @Mystral I don't know if it matters with the new event queries stuff but it did destroy perf with my branch 2020-12-03 20:32:38 @Mystral IIUC it flushes all the pending commands every time you FenceSync 2020-12-03 20:34:03 @Mystral if there are more than 1 contexts around, which is usually the case with current Wine and d3d11
Yeah, we do hit it and it was bad last time I checked (which was with upstream Wine as of a few months ago IIRC). I don't think I followed up with the event queries rework, as I understand it it wouldn't solve the issue and there seem to be more reason than just that to remove the per-swapchain constraint to our GL contexts.
In general, yes, I agree; after we got wined3d_context_gl_update_window() and GL3 windowless context, there no longer seems to be a huge need to tie GL contexts to a swapchain, and it's probably more of a hindrance than a benefit at this point.
I do wonder whether you did much testing (or had it done by someone else) with applications using multiple swapchains for this patch. In particular, MS Office and various WPF applications come to mind.
Finally, there's the question of whether less than a month before the code freeze is the right time to make this kind of fundamental change. I suppose we've already done worse this cycle, so perhaps it's fine.
I can put some of this discussion into the relevant commit messages, if that's useful.
I generally think it is, yes.
I do wonder whether you did much testing (or had it done by someone else) with applications using multiple swapchains for this patch. In particular, MS Office and various WPF applications come to mind.
I wouldn't call it extensive testing but I've been using various Office 2016 applications with some version of this patch over time and I don't recall encountering issues related to it.
An older version of the patch has also been included in a certain CrossOver branch which, as far as I'm aware, sees heavy use of Office applications.
It's probably not as much testing as we'd like but I am sufficiently confident that, if not specifically this particular version of the patch, this change in principle should be fine.
Finally, there's the question of whether less than a month before the code freeze is the right time to make this kind of fundamental change. I suppose we've already done worse this cycle, so perhaps it's fine.
That's a fair point. I'd be inclined to say "what's one more big-ish change anyway :D" but yeah, it's up to you.
That's a fair point. I'd be inclined to say "what's one more big-ish change anyway :D" but yeah, it's up to you.
Nope :D I'm only raising the point; an actual decision would be entirely up to the various maintainers.
I don't think I can review 4/5 without understanding why that code is there in the first place. It was introduced by a7c320d184fb, which didn't provide a reason. I'd assume that there's a Vulkan-like "all GPU commands have to be done before destroying an object" requirement, but a quick (too quick?) reading of ARB_sync (specifically § D.2.1) implies that's specifically not the case here. And that doesn't explain the glFinish() added in 3cc8147594de either. Why do we need that?
Wrt 5/5, I think that reducing the complexity of multiple contexts is a good idea (and worth the risk if it fixes bugs—do we ), but I also don't know if there are any reasons to reject it besides "churn and/or risk of regressions". From private conversation, I got the impression there's no real reason [performance advantage or anything else?] to use multiple contexts, and that it was just done because it was somehow the easiest thing to do at the time?
(Also, is it possible to split out wined3d_swapchain_gl_get_backup_dc() -> wined3d_device_gl_get_backup_dc() into a separate change?)
Zebediah Figura (@zfigura) commented about dlls/wined3d/adapter_gl.c:
wined3d_cs_init_object(device->cs, wined3d_device_gl_create_primary_opengl_context_cs, wined3d_device_gl(device)); wined3d_cs_finish(device->cs, WINED3D_CS_QUEUE_DEFAULT);
- if (!wined3d_swapchain_gl(device->swapchains[0])->context_count)
- if (!device->blitter) return E_FAIL;
Probably for my own education rather than anything else, but why "device->blitter" rather than "device->context_count"?
Zebediah Figura (@zfigura) commented about dlls/wined3d/device.c:
{ WARN("Failed to initialise allocator.\n"); context_release(context);
wined3d_context_gl_destroy(wined3d_context_gl(device->contexts[0]));
This (and the similar cleanup below) seems desirable but also probably deserves its own commit?
Zebediah Figura (@zfigura) commented about dlls/wined3d/swapchain.c:
static void wined3d_swapchain_gl_destroy_object(void *object) {
- wined3d_swapchain_gl_destroy_contexts(object);
}
We don't need the wined3d_cs_destroy_object() anymore then, do we?
Zebediah Figura (@zfigura) commented about dlls/wined3d/swapchain.c:
if (wined3d_settings.offscreen_rendering_mode == ORM_FBO) swapchain_blit(swapchain, context, src_rect, dst_rect);
if (swapchain_gl->context_count > 1)
if (swapchain->device->context_count > 1)
{
WARN_(d3d_perf)("Multiple contexts, calling glFinish() to enforce ordering.\n"); gl_info->gl_ops.gl.p_glFinish();
}
Is there a potential performance problem from this?
I don't think I can review 4/5 without understanding why that code is there in the first place. It was introduced by a7c320d184fb, which didn't provide a reason. I'd assume that there's a Vulkan-like "all GPU commands have to be done before destroying an object" requirement, but a quick (too quick?) reading of ARB_sync (specifically § D.2.1) implies that's specifically not the case here. And that doesn't explain the glFinish() added in 3cc8147594de either. Why do we need that?
It's not about destroying the fences themselves, but about destroying the resources protected by those fences. In particular, wined3d_context_gl_cleanup_resources() is run from wined3d_context_gl_wait_command_fence(). There may also be a requirement to flush any pending (draw) commands on the context; I don't quite remember whether the wglDeleteContext()/wglMakeCurrent() in wined3d_context_gl_cleanup() may do that implicitly, but they might.
As for the glFinish() from commit 3cc8147594de868884d3e57babff8eef058c63a3, the idea was to make sure all pending GL commands have finished like wined3d_context_gl_wait_command_fence() would. Unfortunately we can't just run wined3d_context_gl_cleanup_resources() on a destroyed context, but it should be fine; we'll either get a chance to clean things up when the new context we switch to submits a fence or waits for one, or we're destroying the device and device destruction will take care of it.
Could we get rid of this code entirely? Possibly. We'd end up with slightly different semantics for GL context cleanup and Vulkan context cleanup in terms of what commands are guaranteed to have been completed. We may end up holding on the bo allocations a bit longer than we otherwise would. Perhaps that's fine.
It turns out that there is at least one more case where the same code is a problem, i.e. what I mention in the commit message. To explain it further, in specific circumstances wined3d_context_gl_wait_command_fence() attempts to acquire a different context from the one that's current at that time and, if that happens after wined3d_device_uninit_3d() has completed, that will end up accessing a NULL device->swapchain[0].
Thinking about it a bit more, that sounds slightly suspicious, actually. In particular, wined3d_device_uninit_3d() is supposed to destroy all contexts, so why are we running context_gl_cleanup() after wined3d_device_uninit_3d() has completed? You also seem to imply we somehow have a command fence submitted on a different context than the one it was allocated from?
Wrt 5/5, I think that reducing the complexity of multiple contexts is a good idea (and worth the risk if it fixes bugs—do we ), but I also don't know if there are any reasons to reject it besides "churn and/or risk of regressions". From private conversation, I got the impression there's no real reason [performance advantage or anything else?] to use multiple contexts, and that it was just done because it was somehow the easiest thing to do at the time?
I can't think of any real downside, except for the glFinish() below that you already commented on.
(Also, is it possible to split out wined3d_swapchain_gl_get_backup_dc() -> wined3d_device_gl_get_backup_dc() into a separate change?)
It was a separate, following change initially. Moving the contexts from the swapchain to the device without also moving backup_dc handling seemed a bit awkward to me. It can certainly be separated again though.
Actually, I guess there's the option to do it before the other change instead. Hmm, I might try that one.
On Wed Nov 23 00:22:40 2022 +0000, Zebediah Figura wrote:
Probably for my own education rather than anything else, but why "device->blitter" rather than "device->context_count"?
Hmm, context_count should also work now. It didn't initially, before I introduced the wined3d_context_gl_destroy() calls in wined3d_device_gl_create_primary_opengl_context_cs().
Actually, I'm not sure that it works correctly now either. Will have a look...
On Wed Nov 23 00:22:41 2022 +0000, Zebediah Figura wrote:
This (and the similar cleanup below) seems desirable but also probably deserves its own commit?
Related to the previous one, I'll try to split it off. One problem that I see is that I don't think that currently there is any function to remove an individual GL context from a swapchain (and its 'contexts' array). I guess I could introduce it just to get rid of it right after...
On Wed Nov 23 00:22:41 2022 +0000, Zebediah Figura wrote:
We don't need the wined3d_cs_destroy_object() anymore then, do we?
Hurr, yep, I can get rid of a bunch of things here. I blame one of the rebases...
On Wed Nov 23 00:22:42 2022 +0000, Zebediah Figura wrote:
Is there a potential performance problem from this?
Right, yes. It should only apply to the CSMT disabled case in practice.
Which I guess is an argument for not disabling CSMT even on single-core CPUs.
It turns out that there is at least one more case where the same code is a problem, i.e. what I mention in the commit message. To explain it further, in specific circumstances wined3d_context_gl_wait_command_fence() attempts to acquire a different context from the one that's current at that time and, if that happens after wined3d_device_uninit_3d() has completed, that will end up accessing a NULL device->swapchain[0].
Thinking about it a bit more, that sounds slightly suspicious, actually. In particular, wined3d_device_uninit_3d() is supposed to destroy all contexts, so why are we running context_gl_cleanup() after wined3d_device_uninit_3d() has completed? You also seem to imply we somehow have a command fence submitted on a different context than the one it was allocated from?
You're right, that explanation doesn't seem sound. I guess I'll retract the patch until I find out.
I don't think I can review 4/5 without understanding why that code is there in the first place. It was introduced by a7c320d184fb, which didn't provide a reason. I'd assume that there's a Vulkan-like "all GPU commands have to be done before destroying an object" requirement, but a quick (too quick?) reading of ARB_sync (specifically § D.2.1) implies that's specifically not the case here. And that doesn't explain the glFinish() added in 3cc8147594de either. Why do we need that?
It's not about destroying the fences themselves, but about destroying the resources protected by those fences. In particular, wined3d_context_gl_cleanup_resources() is run from wined3d_context_gl_wait_command_fence(). There may also be a requirement to flush any pending (draw) commands on the context; I don't quite remember whether the wglDeleteContext()/wglMakeCurrent() in wined3d_context_gl_cleanup() may do that implicitly, but they might.
As for the glFinish() from commit 3cc8147594de868884d3e57babff8eef058c63a3, the idea was to make sure all pending GL commands have finished like wined3d_context_gl_wait_command_fence() would. Unfortunately we can't just run wined3d_context_gl_cleanup_resources() on a destroyed context, but it should be fine; we'll either get a chance to clean things up when the new context we switch to submits a fence or waits for one, or we're destroying the device and device destruction will take care of it.
I think I'm still missing something. I assume that the rub is "GL sync is implicit *until* we start using ARB_sync and persistent maps, or similarly APPLE_flush_buffer_range", but that doesn't explain the use of glFinish(), which presumably wouldn't be required in that case. [I also don't understand why a7c320d184fb specifically adds that wait to wined3d_context_gl_cleanup()? Unless it's incidental to that change.]
Could we get rid of this code entirely? Possibly. We'd end up with slightly different semantics for GL context cleanup and Vulkan context cleanup in terms of what commands are guaranteed to have been completed. We may end up holding on the bo allocations a bit longer than we otherwise would. Perhaps that's fine.
I think it makes sense to use similar code, I'm just struggling a little to understand why it's necessary to explicitly synchronize here.
On Wed Nov 23 19:16:18 2022 +0000, Matteo Bruni wrote:
Right, yes. It should only apply to the CSMT disabled case in practice. Which I guess is an argument for not disabling CSMT even on single-core CPUs.
Ah, of course. It makes me a *bit* nervous, but I guess the main reason to disable CSMT these days is CPU usage, not even performance, so this probably is fine.
Not that I know what this glFinish is for in the first place, though... I don't suppose that it can be asynchronous?
(Also, is it possible to split out wined3d_swapchain_gl_get_backup_dc() -> wined3d_device_gl_get_backup_dc() into a separate change?)
It was a separate, following change initially. Moving the contexts from the swapchain to the device without also moving backup_dc handling seemed a bit awkward to me. It can certainly be separated again though.
Actually, I guess there's the option to do it before the other change instead. Hmm, I might try that one.
I was thinking of making it a preceding change, yeah.
It'd be nice for review purposes but honestly I think it'd also be nice for regression purposes.
On Wed Nov 23 19:01:07 2022 +0000, Matteo Bruni wrote:
Hmm, context_count should also work now. It didn't initially, before I introduced the wined3d_context_gl_destroy() calls in wined3d_device_gl_create_primary_opengl_context_cs(). Actually, I'm not sure that it works correctly now either. Will have a look...
Yeah, it's not like it matters to use device->blitter, but we probably should fix that code path if it's broken; I would kind of expect context_count to be in a consistent state...
On Wed Nov 23 19:14:09 2022 +0000, Matteo Bruni wrote:
Related to the previous one, I'll try to split it off. One problem that I see is that I don't think that currently there is any function to remove an individual GL context from a swapchain (and its 'contexts' array). I guess I could introduce it just to get rid of it right after...
Yeah, I'll admit I don't like the cleanup path here, but probably better to just fix the leaks than bikeshed how to make it prettier.
I think I'm still missing something. I assume that the rub is "GL sync is implicit *until* we start using ARB_sync and persistent maps, or similarly APPLE_flush_buffer_range", but that doesn't explain the use of glFinish(), which presumably wouldn't be required in that case. [I also don't understand why a7c320d184fb specifically adds that wait to wined3d_context_gl_cleanup()? Unless it's incidental to that change.]
It's already a while ago now, but the reason a7c320d184fb9d48239caf0f5529b6992477b82b introduced those lines in wined3d_context_gl_cleanup() is probably mostly a consequence of how that series was upstreamed; these patches went through a couple of iterations, and there probably isn't anything specifically in that commit that requires this.
However, it does simplify some things. In particular, notice that in commit a7c320d184fb9d48239caf0f5529b6992477b82b wined3d_context_gl_poll_fences() is called directly from wined3d_context_gl_wait_command_fence(). (As opposed to from wined3d_context_gl_cleanup_resources(), which was introduced in commit 7c844cd3c9c465bdeaf3d98d6da5f2f5e460e650.) Further, notice that wined3d_context_gl_poll_fences() is responsible for calling wined3d_fence_destroy() on completed fences. It would have been possible to avoid waiting on those fences before cleaning them up in a7c320d184fb9d48239caf0f5529b6992477b82b, but there didn't seem to be much of a point given the direction we were going with wined3d_context_gl_cleanup_resources() and the existing design in the Vulkan backend. Thinking about it, that does probably mean we may leak some fences when taking the glFinish() path.
As for the glFinish(), as mentioned before, we could probably get rid of it. The way I approached 3cc8147594de868884d3e57babff8eef058c63a3 was to try to maintain the existing behaviour of the code as much as possible without having to acquire a new context; in retrospect, perhaps that wasn't quite the ideal approach. It does have the benefit of not having to check the code for places that may depend on the existing behaviour, of course.
Bottom line, the original idea here was to make sure nothing was in flight here, fences and allocator blocks in particular, and then clean those objects up. It may certainly be possible to relax that somewhat. The glFinish() always was a bit of an awkward fit; on the one hand it's a fair bit heavier than the fences, while on the other hand it doesn't clean everything up.
On Thu Nov 24 00:42:24 2022 +0000, Zebediah Figura wrote:
Yeah, I'll admit I don't like the cleanup path here, but probably better to just fix the leaks than bikeshed how to make it prettier.
BTW the cleanup issue is already there. I wrote a patch doing what I mentioned above.
On Thu Nov 24 00:42:23 2022 +0000, Zebediah Figura wrote:
Ah, of course. It makes me a *bit* nervous, but I guess the main reason to disable CSMT these days is CPU usage, not even performance, so this probably is fine. Not that I know what this glFinish is for in the first place, though... I don't suppose that it can be asynchronous?
The glFinish() here is to force some kind of ordering if there are multiple threads drawing to the same swapchain. Basically the same idea that was behind StrictDrawOrdering before CSMT was a thing.
Obviously this only flushes the current context, so it doesn't necessarily help anyway. I don't think it's too crazy to just get rid of it entirely.