First commit fixes a deadlock when resizing a window.
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/winewayland.drv/wayland_surface.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/dlls/winewayland.drv/wayland_surface.c b/dlls/winewayland.drv/wayland_surface.c index 39c3976cdfd..5e7b04842f6 100644 --- a/dlls/winewayland.drv/wayland_surface.c +++ b/dlls/winewayland.drv/wayland_surface.c @@ -38,7 +38,7 @@ static void xdg_surface_handle_configure(void *data, struct xdg_surface *xdg_sur uint32_t serial) { struct wayland_surface *surface; - BOOL initial_configure = FALSE; + BOOL should_post = FALSE, initial_configure = FALSE; HWND hwnd = data;
TRACE("serial=%u\n", serial); @@ -52,16 +52,17 @@ static void xdg_surface_handle_configure(void *data, struct xdg_surface *xdg_sur /* If we have a previously requested config, we have already sent a * WM_WAYLAND_CONFIGURE which hasn't been handled yet. In that case, * avoid sending another message to reduce message queue traffic. */ - BOOL should_post = surface->requested.serial == 0; + should_post = surface->requested.serial == 0; initial_configure = surface->current.serial == 0; surface->pending.serial = serial; surface->requested = surface->pending; memset(&surface->pending, 0, sizeof(surface->pending)); - if (should_post) NtUserPostMessage(hwnd, WM_WAYLAND_CONFIGURE, 0, 0); }
pthread_mutex_unlock(&surface->mutex);
+ if (should_post) NtUserPostMessage(hwnd, WM_WAYLAND_CONFIGURE, 0, 0); + /* Flush the window surface in case there is content that we weren't * able to flush before due to the lack of the initial configure. */ if (initial_configure) wayland_window_flush(hwnd);
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/winewayland.drv/window_surface.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/dlls/winewayland.drv/window_surface.c b/dlls/winewayland.drv/window_surface.c index e9eb74b17ca..3b354925a8e 100644 --- a/dlls/winewayland.drv/window_surface.c +++ b/dlls/winewayland.drv/window_surface.c @@ -154,7 +154,7 @@ static struct wayland_shm_buffer *wayland_buffer_queue_get_free_buffer(struct wa if (nbuffers < 3) { shm_buffer = wayland_shm_buffer_create(queue->width, queue->height, - WL_SHM_FORMAT_XRGB8888); + WL_SHM_FORMAT_ARGB8888); if (shm_buffer) { /* Buffer events go to their own queue so that we can dispatch @@ -262,7 +262,7 @@ static void copy_pixel_region(const char *src_pixels, RECT *src_rect, { const char *src; char *dst; - int y, width_bytes, height; + int x, y, width_bytes, height; RECT rc;
TRACE("rect %s\n", wine_dbgstr_rect(rgn_rect)); @@ -279,12 +279,14 @@ static void copy_pixel_region(const char *src_pixels, RECT *src_rect, if (width_bytes == src_stride && width_bytes == dst_stride) { memcpy(dst, src, height * width_bytes); + for (x = 3; x < height * width_bytes; x += bpp) dst[x] = 0xff; continue; }
for (y = 0; y < height; y++) { memcpy(dst, src, width_bytes); + for (x = 3; x < width_bytes; x += bpp) dst[x] = 0xff; src += src_stride; dst += dst_stride; }
From: Rémi Bernon rbernon@codeweavers.com
--- dlls/winewayland.drv/window_surface.c | 29 +++++++++++++++++++++++++++ 1 file changed, 29 insertions(+)
diff --git a/dlls/winewayland.drv/window_surface.c b/dlls/winewayland.drv/window_surface.c index 3b354925a8e..961ba511c7a 100644 --- a/dlls/winewayland.drv/window_surface.c +++ b/dlls/winewayland.drv/window_surface.c @@ -317,6 +317,34 @@ static void wayland_shm_buffer_copy(struct wayland_shm_buffer *src, copy_pixel_region(src->map_data, &src_rect, dst->map_data, &dst_rect, region); }
+/********************************************************************** + * wayland_shm_buffer_copy_data + */ +static void wayland_shm_buffer_copy_shape(struct wayland_shm_buffer *buffer, const RECT *dirty, + const BITMAPINFO *shape_info, const void *shape_bits) +{ + RECT dst_rect = {0, 0, buffer->width, buffer->height}; + UINT32 *color, shape_stride, color_stride, x, y; + const BYTE *shape; + RECT rect; + + shape_stride = shape_info->bmiHeader.biSizeImage / abs(shape_info->bmiHeader.biHeight); + color_stride = dst_rect.right - dst_rect.left; + + if (!intersect_rect(&rect, &dst_rect, dirty)) return; + + color = (UINT32 *)buffer->map_data + rect.top * color_stride; + shape = (const BYTE *)shape_bits + rect.top * shape_stride; + + for (y = rect.top; y < rect.bottom; y++, color += color_stride, shape += shape_stride) + { + for (x = rect.left; x < rect.right; x++) + { + if (!(shape[x / 8] & (1 << (7 - (x & 7))))) color[x] = 0; + } + } +} + /*********************************************************************** * wayland_window_surface_flush */ @@ -387,6 +415,7 @@ static BOOL wayland_window_surface_flush(struct window_surface *window_surface, }
wayland_shm_buffer_copy_data(shm_buffer, color_bits, &surface_rect, copy_from_window_region); + if (shape_bits) wayland_shm_buffer_copy_shape(shm_buffer, rect, shape_info, shape_bits);
pthread_mutex_lock(&wws->wayland_surface->mutex); if (wayland_surface_reconfigure(wws->wayland_surface))
Alexandros Frantzis (@afrantzis) commented about dlls/winewayland.drv/wayland_surface.c:
/* If we have a previously requested config, we have already sent a * WM_WAYLAND_CONFIGURE which hasn't been handled yet. In that case, * avoid sending another message to reduce message queue traffic. */
BOOL should_post = surface->requested.serial == 0;
should_post = surface->requested.serial == 0; initial_configure = surface->current.serial == 0; surface->pending.serial = serial; surface->requested = surface->pending; memset(&surface->pending, 0, sizeof(surface->pending));
if (should_post) NtUserPostMessage(hwnd, WM_WAYLAND_CONFIGURE, 0, 0);
}
pthread_mutex_unlock(&surface->mutex);
- if (should_post) NtUserPostMessage(hwnd, WM_WAYLAND_CONFIGURE, 0, 0);
IIRC, the reason the post was inside the lock was to ensure that the target HWND hadn't been reassigned if the window/surface was destroyed by the (different) window thread. At some point in the various MR discussions we established that this is not something we need to worry about in general, so moving it out of the lock looks fine.
I am curious, though, under which circumstances posting that message leads to a deadlock?
Alexandros Frantzis (@afrantzis) commented about dlls/winewayland.drv/window_surface.c:
if (width_bytes == src_stride && width_bytes == dst_stride) { memcpy(dst, src, height * width_bytes);
for (x = 3; x < height * width_bytes; x += bpp) dst[x] = 0xff;
From a few synthetic benchmarks I ran locally, this change has a significant performance impact (1.7x-3x compared to just doing the memcpy, with higher overheads for smaller regions).
Switching to a manual 32-bit pixel copy while setting the alpha is a definite improvement, with the results I am seeing being in the range of 1.2x-2x compared to just the memcpy:
``` width = rc.right - rc.left; for (x = 0; x < height * width; ++x) ((UINT32 *)dst)[x] = ((UINT32 *)src)[x] | 0xff000000; ```
So perhaps it's worth switching to such a loop?
Alexandros Frantzis (@afrantzis) commented about dlls/winewayland.drv/window_surface.c:
if (nbuffers < 3) { shm_buffer = wayland_shm_buffer_create(queue->width, queue->height,
WL_SHM_FORMAT_XRGB8888);
WL_SHM_FORMAT_ARGB8888);
Always using ARGB8888 is not very efficient, both due to the overhead of manual alpha handling (i.e., the copy_pixel_region changes) but also because it disables some potential optimizations on the compositor side. We could help the compositor regain some of these optimizations by using wl_surface opaque region hints.
In the experimental branch I had implemented a dynamic switch between XRGB and ARGB depending on window shape and translucency needs, which allowed more efficient copy pixel handling, so this is also a possible avenue to explore if we find the extra complexity to be worth it.
In any case, I don't mind this change as long as we are aware that there are performance concerns we can improve on in the future.
On Tue Jul 9 11:52:46 2024 +0000, Alexandros Frantzis wrote:
IIRC, the reason the post was inside the lock was to ensure that the target HWND hadn't been reassigned if the window/surface was destroyed by the (different) window thread. At some point in the various MR discussions we established that this is not something we need to worry about in general, so moving it out of the lock looks fine. I am curious, though, under which circumstances posting that message leads to a deadlock?
It's probably a regression from some of my window surface locking changes, but it now deadlocks when window surface flush tries to acquire the wayland surface lock, and a resize event at the same time tries to post a message to that window (which requires the "user" global lock) while holding the wayland surface lock itself.
On Tue Jul 9 11:52:47 2024 +0000, Alexandros Frantzis wrote:
Always using ARGB8888 is not very efficient, both due to the overhead of manual alpha handling (i.e., the copy_pixel_region changes) but also because it disables some potential optimizations on the compositor side. We could help the compositor regain some of these optimizations by using wl_surface opaque region hints. In the experimental branch I had implemented a dynamic switch between XRGB and ARGB depending on window shape and translucency needs, which allowed more efficient copy pixel handling, so this is also a possible avenue to explore if we find the extra complexity to be worth it. In any case, I don't mind this change as long as we are aware that there are performance concerns we can improve on in the future.
Does it affect performance of GL/VK surfaces too or just the performance of basic window surfaces?
On Tue Jul 9 11:53:41 2024 +0000, Alexandros Frantzis wrote:
From a few synthetic benchmarks I ran locally, this change has a significant performance impact (1.7x-3x compared to just doing the memcpy, with higher overheads for smaller regions). Switching to a manual 32-bit pixel copy while setting the alpha is a definite improvement, with the results I am seeing being in the range of 1.2x-2x compared to just the memcpy:
width = rc.right - rc.left; for (x = 0; x < height * width; ++x) ((UINT32 *)dst)[x] = ((UINT32 *)src)[x] | 0xff000000;
So perhaps it's worth switching to such a loop?
Sure, now I'm wondering whether we could/should move alpha setting to win32u as well.
On Tue Jul 9 14:55:46 2024 +0000, Rémi Bernon wrote:
Does it affect performance of GL/VK surfaces too or just the performance of basic window surfaces?
Only basic window surfaces are directly affected by this change. However, there may be second order effects for GL/VK in windowed mode, because the compositor will have to enable blending of the main surface (which in this scenario is behind the GL/VK subsurface).
These particular compositor-side concerns are not hard to remedy, though, with something along the lines of: https://gitlab.winehq.org/afrantzis/wine/-/commit/8195813019ed6fb15c0763ea89...