First part of Proton shared memory series. The full branch can be seen at https://gitlab.winehq.org/rbernon/wine/-/commits/mr/shared-memories.
From: Rémi Bernon rbernon@codeweavers.com
Based on a patch by Huw Davies huw@codeweavers.com. --- server/directory.c | 17 +++++++++++++++++ server/file.h | 6 ++++++ server/mapping.c | 29 +++++++++++++++++++++++++++++ server/protocol.def | 6 ++++++ server/user.h | 2 ++ server/winstation.c | 23 +++++++++++++++++++++++ 6 files changed, 83 insertions(+)
diff --git a/server/directory.c b/server/directory.c index 23d7eb0a2b7..5c8671d6146 100644 --- a/server/directory.c +++ b/server/directory.c @@ -37,6 +37,7 @@ #include "process.h" #include "file.h" #include "unicode.h" +#include "user.h"
#define HASH_SIZE 7 /* default hash size */
@@ -277,6 +278,22 @@ struct object *get_directory_obj( struct process *process, obj_handle_t handle ) return get_handle_obj( process, handle, 0, &directory_ops ); }
+struct object *create_desktop_map_directory( struct winstation *winstation ) +{ + static const WCHAR dir_desktop_mapsW[] = {'_','_','w','i','n','e','_','d','e','s','k','t','o','p','_','m','a','p','p','i','n','g','s'}; + static const struct unicode_str dir_desktop_maps_str = {dir_desktop_mapsW, sizeof(dir_desktop_mapsW)}; + struct object *root; + struct directory *mapping_root, *ret; + const struct unicode_str winsta_name = {winstation->obj.name->name, winstation->obj.name->len}; + + root = winstation->obj.name->parent; + mapping_root = create_directory( root, &dir_desktop_maps_str, OBJ_OPENIF, HASH_SIZE, NULL ); + ret = create_directory( &mapping_root->obj, &winsta_name, OBJ_OPENIF, HASH_SIZE, NULL ); + release_object( &mapping_root->obj ); + + return &ret->obj; +} + /* Global initialization */
static void create_session( unsigned int id ) diff --git a/server/file.h b/server/file.h index 210fcec5e78..d0723f96360 100644 --- a/server/file.h +++ b/server/file.h @@ -156,6 +156,10 @@ extern struct timeout_user *add_timeout_user( timeout_t when, timeout_callback f extern void remove_timeout_user( struct timeout_user *user ); extern const char *get_timeout_str( timeout_t timeout );
+/* directory functions */ + +extern struct object *create_desktop_map_directory( struct winstation *winstation ); + /* file functions */
extern struct file *get_file_obj( struct process *process, obj_handle_t handle, @@ -182,6 +186,8 @@ extern void free_mapped_views( struct process *process ); extern int get_page_size(void); extern struct mapping *create_fd_mapping( struct object *root, const struct unicode_str *name, struct fd *fd, unsigned int attr, const struct security_descriptor *sd ); +extern struct object *create_shared_mapping( struct object *root, const struct unicode_str *name, mem_size_t size, + unsigned int attr, const struct security_descriptor *sd, void **ptr ); extern struct object *create_user_data_mapping( struct object *root, const struct unicode_str *name, unsigned int attr, const struct security_descriptor *sd );
diff --git a/server/mapping.c b/server/mapping.c index e730486ac77..4e06bf0c6f3 100644 --- a/server/mapping.c +++ b/server/mapping.c @@ -29,6 +29,7 @@ #include <sys/stat.h> #include <sys/mman.h> #include <unistd.h> +#include <errno.h>
#include "ntstatus.h" #define WIN32_NO_STATUS @@ -161,6 +162,7 @@ struct mapping pe_image_info_t image; /* image info (for PE image mapping) */ struct ranges *committed; /* list of committed ranges in this mapping */ struct shared_map *shared; /* temp file for shared PE mapping */ + void *shared_ptr; /* mmaped pointer for shared mappings */ };
static void mapping_dump( struct object *obj, int verbose ); @@ -896,6 +898,7 @@ static struct mapping *create_mapping( struct object *root, const struct unicode mapping->fd = NULL; mapping->shared = NULL; mapping->committed = NULL; + mapping->shared_ptr = MAP_FAILED;
if (!(mapping->flags = get_mapping_flags( handle, flags ))) goto error;
@@ -1096,6 +1099,7 @@ static void mapping_destroy( struct object *obj ) if (mapping->fd) release_object( mapping->fd ); if (mapping->committed) release_object( mapping->committed ); if (mapping->shared) release_object( mapping->shared ); + if (mapping->shared_ptr != MAP_FAILED) munmap( mapping->shared_ptr, mapping->size ); }
static enum server_fd_type mapping_get_fd_type( struct fd *fd ) @@ -1109,6 +1113,31 @@ int get_page_size(void) return page_mask + 1; }
+struct object *create_shared_mapping( struct object *root, const struct unicode_str *name, mem_size_t size, + unsigned int attr, const struct security_descriptor *sd, void **ptr ) +{ + static unsigned int access = FILE_READ_DATA | FILE_WRITE_DATA; + struct mapping *mapping; + + if (!(mapping = create_mapping( root, name, attr, size, SEC_COMMIT, 0, access, sd ))) return NULL; + + if (mapping->shared_ptr == MAP_FAILED) + { + int fd = get_unix_fd( mapping->fd ); + + mapping->shared_ptr = mmap( NULL, mapping->size, PROT_WRITE, MAP_SHARED, fd, 0 ); + if (mapping->shared_ptr == MAP_FAILED) + { + fprintf( stderr, "wine: Failed to map shared memory: %u %m\n", errno ); + release_object( &mapping->obj ); + return NULL; + } + } + + *ptr = mapping->shared_ptr; + return &mapping->obj; +} + struct object *create_user_data_mapping( struct object *root, const struct unicode_str *name, unsigned int attr, const struct security_descriptor *sd ) { diff --git a/server/protocol.def b/server/protocol.def index 54059b6b580..1f660a90437 100644 --- a/server/protocol.def +++ b/server/protocol.def @@ -890,6 +890,12 @@ typedef struct lparam_t info; } cursor_pos_t;
+struct desktop_shared_memory +{ + int placeholder; +}; +typedef volatile struct desktop_shared_memory desktop_shm_t; + /****************************************************************/ /* Request declarations */
diff --git a/server/user.h b/server/user.h index b6f47bcac1c..75e6a0a37e6 100644 --- a/server/user.h +++ b/server/user.h @@ -77,6 +77,8 @@ struct desktop unsigned int users; /* processes and threads using this desktop */ struct global_cursor cursor; /* global cursor information */ unsigned char keystate[256]; /* asynchronous key state */ + struct object *shared_mapping; /* desktop shared memory mapping */ + desktop_shm_t *shared; /* desktop shared memory */ };
/* user handles functions */ diff --git a/server/winstation.c b/server/winstation.c index 5903497d61e..783e167113a 100644 --- a/server/winstation.c +++ b/server/winstation.c @@ -217,6 +217,22 @@ struct desktop *get_desktop_obj( struct process *process, obj_handle_t handle, u return (struct desktop *)get_handle_obj( process, handle, access, &desktop_ops ); }
+static volatile void *init_desktop_mapping( struct desktop *desktop, const struct unicode_str *name ) +{ + struct object *dir; + + desktop->shared = NULL; + desktop->shared_mapping = NULL; + + if (!(dir = create_desktop_map_directory( desktop->winstation ))) return NULL; + if ((desktop->shared_mapping = create_shared_mapping( dir, name, sizeof(struct desktop_shared_memory), + OBJ_OPENIF, NULL, (void **)&desktop->shared ))) + memset( (void *)desktop->shared, 0, sizeof(*desktop->shared) ); + release_object( dir ); + + return desktop->shared; +} + /* create a desktop object */ static struct desktop *create_desktop( const struct unicode_str *name, unsigned int attr, unsigned int flags, struct winstation *winstation ) @@ -240,6 +256,11 @@ static struct desktop *create_desktop( const struct unicode_str *name, unsigned memset( desktop->keystate, 0, sizeof(desktop->keystate) ); list_add_tail( &winstation->desktops, &desktop->entry ); list_init( &desktop->hotkeys ); + if (!init_desktop_mapping( desktop, name )) + { + release_object( desktop ); + return NULL; + } } else { @@ -297,6 +318,8 @@ static void desktop_destroy( struct object *obj ) if (desktop->global_hooks) release_object( desktop->global_hooks ); if (desktop->close_timeout) remove_timeout_user( desktop->close_timeout ); list_remove( &desktop->entry ); + if (desktop->shared_mapping) release_object( desktop->shared_mapping ); + desktop->shared_mapping = NULL; release_object( desktop->winstation ); }
From: Rémi Bernon rbernon@codeweavers.com
Based on a patch by Huw Davies huw@codeweavers.com. --- server/queue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/server/queue.c b/server/queue.c index fcc946ff0cb..51e01a471bf 100644 --- a/server/queue.c +++ b/server/queue.c @@ -1873,7 +1873,9 @@ static int queue_mouse_message( struct desktop *desktop, user_handle_t win, cons WM_MOUSEHWHEEL /* 0x1000 = MOUSEEVENTF_HWHEEL */ };
- desktop->cursor.last_change = get_tick_count(); + /* update last desktop cursor change time */ + update_desktop_cursor_pos( desktop, desktop->cursor.win, desktop->cursor.x, desktop->cursor.y ); + flags = input->mouse.flags; time = input->mouse.time; if (!time) time = desktop->cursor.last_change;
From: Rémi Bernon rbernon@codeweavers.com
Based on a patch by Huw Davies huw@codeweavers.com. --- server/protocol.def | 9 ++++++- server/queue.c | 66 ++++++++++++++++++++++----------------------- server/user.h | 3 --- 3 files changed, 41 insertions(+), 37 deletions(-)
diff --git a/server/protocol.def b/server/protocol.def index 1f660a90437..9ed4a4a6f85 100644 --- a/server/protocol.def +++ b/server/protocol.def @@ -890,9 +890,16 @@ typedef struct lparam_t info; } cursor_pos_t;
+struct shared_cursor +{ + int x; /* cursor position */ + int y; + unsigned int last_change; /* time of last position change */ +}; + struct desktop_shared_memory { - int placeholder; + struct shared_cursor cursor; /* global cursor information */ }; typedef volatile struct desktop_shared_memory desktop_shm_t;
diff --git a/server/queue.c b/server/queue.c index 51e01a471bf..9ee01af89b5 100644 --- a/server/queue.c +++ b/server/queue.c @@ -423,8 +423,8 @@ static void queue_cursor_message( struct desktop *desktop, user_handle_t win, un msg->msg = message; msg->wparam = wparam; msg->lparam = lparam; - msg->x = desktop->cursor.x; - msg->y = desktop->cursor.y; + msg->x = desktop->shared->cursor.x; + msg->y = desktop->shared->cursor.y; if (!(msg->win = win) && (input = desktop->foreground_input)) msg->win = input->active; queue_hardware_message( desktop, msg, 1 ); } @@ -450,10 +450,10 @@ static int update_desktop_cursor_pos( struct desktop *desktop, user_handle_t win
x = max( min( x, desktop->cursor.clip.right - 1 ), desktop->cursor.clip.left ); y = max( min( y, desktop->cursor.clip.bottom - 1 ), desktop->cursor.clip.top ); - updated = (desktop->cursor.x != x || desktop->cursor.y != y); - desktop->cursor.x = x; - desktop->cursor.y = y; - desktop->cursor.last_change = get_tick_count(); + updated = (desktop->shared->cursor.x != x || desktop->shared->cursor.y != y); + desktop->shared->cursor.x = x; + desktop->shared->cursor.y = y; + desktop->shared->cursor.last_change = get_tick_count();
if (!win && (input = desktop->foreground_input)) win = input->capture; if (!win || !is_window_visible( win ) || is_window_transparent( win )) @@ -502,8 +502,8 @@ static void get_message_defaults( struct msg_queue *queue, int *x, int *y, unsig { struct desktop *desktop = queue->input->desktop;
- *x = desktop->cursor.x; - *y = desktop->cursor.y; + *x = desktop->shared->cursor.x; + *y = desktop->shared->cursor.y; *time = get_tick_count(); }
@@ -527,9 +527,9 @@ void set_clip_rectangle( struct desktop *desktop, const rectangle_t *rect, unsig else desktop->cursor.clip = top_rect;
/* warp the mouse to be inside the clip rect */ - x = max( min( desktop->cursor.x, desktop->cursor.clip.right - 1 ), desktop->cursor.clip.left ); - y = max( min( desktop->cursor.y, desktop->cursor.clip.bottom - 1 ), desktop->cursor.clip.top ); - if (x != desktop->cursor.x || y != desktop->cursor.y) set_cursor_pos( desktop, x, y ); + x = max( min( desktop->shared->cursor.x, desktop->cursor.clip.right - 1 ), desktop->cursor.clip.left ); + y = max( min( desktop->shared->cursor.y, desktop->cursor.clip.bottom - 1 ), desktop->cursor.clip.top ); + if (x != desktop->shared->cursor.x || y != desktop->shared->cursor.y) set_cursor_pos( desktop, x, y );
/* request clip cursor rectangle reset to the desktop thread */ if (reset) post_desktop_message( desktop, WM_WINE_CLIPCURSOR, flags, FALSE ); @@ -1682,8 +1682,8 @@ static void queue_hardware_message( struct desktop *desktop, struct message *msg if (desktop->keystate[VK_XBUTTON2] & 0x80) msg->wparam |= MK_XBUTTON2; break; } - msg->x = desktop->cursor.x; - msg->y = desktop->cursor.y; + msg->x = desktop->shared->cursor.x; + msg->y = desktop->shared->cursor.y;
if (msg->win && (thread = get_window_thread( msg->win ))) { @@ -1874,11 +1874,11 @@ static int queue_mouse_message( struct desktop *desktop, user_handle_t win, cons };
/* update last desktop cursor change time */ - update_desktop_cursor_pos( desktop, desktop->cursor.win, desktop->cursor.x, desktop->cursor.y ); + update_desktop_cursor_pos( desktop, desktop->cursor.win, desktop->shared->cursor.x, desktop->shared->cursor.y );
flags = input->mouse.flags; time = input->mouse.time; - if (!time) time = desktop->cursor.last_change; + if (!time) time = desktop->shared->cursor.last_change;
if (flags & MOUSEEVENTF_MOVE) { @@ -1887,19 +1887,19 @@ static int queue_mouse_message( struct desktop *desktop, user_handle_t win, cons x = input->mouse.x; y = input->mouse.y; if (flags & ~(MOUSEEVENTF_MOVE | MOUSEEVENTF_ABSOLUTE) && - x == desktop->cursor.x && y == desktop->cursor.y) + x == desktop->shared->cursor.x && y == desktop->shared->cursor.y) flags &= ~MOUSEEVENTF_MOVE; } else { - x = desktop->cursor.x + input->mouse.x; - y = desktop->cursor.y + input->mouse.y; + x = desktop->shared->cursor.x + input->mouse.x; + y = desktop->shared->cursor.y + input->mouse.y; } } else { - x = desktop->cursor.x; - y = desktop->cursor.y; + x = desktop->shared->cursor.x; + y = desktop->shared->cursor.y; }
if ((foreground = get_foreground_thread( desktop, win ))) @@ -1916,8 +1916,8 @@ static int queue_mouse_message( struct desktop *desktop, user_handle_t win, cons msg_data->size = sizeof(*msg_data); msg_data->flags = flags; msg_data->rawinput.type = RIM_TYPEMOUSE; - msg_data->rawinput.mouse.x = x - desktop->cursor.x; - msg_data->rawinput.mouse.y = y - desktop->cursor.y; + msg_data->rawinput.mouse.x = x - desktop->shared->cursor.x; + msg_data->rawinput.mouse.y = y - desktop->shared->cursor.y; msg_data->rawinput.mouse.data = input->mouse.data;
enum_processes( queue_rawinput_message, &raw_msg ); @@ -2144,8 +2144,8 @@ static void queue_custom_hardware_message( struct desktop *desktop, user_handle_ msg->msg = input->hw.msg; msg->wparam = 0; msg->lparam = input->hw.lparam; - msg->x = desktop->cursor.x; - msg->y = desktop->cursor.y; + msg->x = desktop->shared->cursor.x; + msg->y = desktop->shared->cursor.y;
queue_hardware_message( desktop, msg, 1 ); } @@ -2660,8 +2660,8 @@ DECL_HANDLER(send_hardware_message) } }
- reply->prev_x = desktop->cursor.x; - reply->prev_y = desktop->cursor.y; + reply->prev_x = desktop->shared->cursor.x; + reply->prev_y = desktop->shared->cursor.y;
switch (req->input.type) { @@ -2679,8 +2679,8 @@ DECL_HANDLER(send_hardware_message) } if (thread) release_object( thread );
- reply->new_x = desktop->cursor.x; - reply->new_y = desktop->cursor.y; + reply->new_x = desktop->shared->cursor.x; + reply->new_y = desktop->shared->cursor.y; release_object( desktop ); }
@@ -3367,8 +3367,8 @@ DECL_HANDLER(set_cursor)
reply->prev_handle = input->cursor; reply->prev_count = input->cursor_count; - reply->prev_x = desktop->cursor.x; - reply->prev_y = desktop->cursor.y; + reply->prev_x = desktop->shared->cursor.x; + reply->prev_y = desktop->shared->cursor.y;
if (req->flags & SET_CURSOR_HANDLE) { @@ -3394,10 +3394,10 @@ DECL_HANDLER(set_cursor) else update_desktop_cursor_handle( desktop, input->cursor ); }
- reply->new_x = desktop->cursor.x; - reply->new_y = desktop->cursor.y; + reply->new_x = desktop->shared->cursor.x; + reply->new_y = desktop->shared->cursor.y; reply->new_clip = desktop->cursor.clip; - reply->last_change = desktop->cursor.last_change; + reply->last_change = desktop->shared->cursor.last_change; }
/* Get the history of the 64 last cursor positions */ diff --git a/server/user.h b/server/user.h index 75e6a0a37e6..33e25e14000 100644 --- a/server/user.h +++ b/server/user.h @@ -54,10 +54,7 @@ struct winstation
struct global_cursor { - int x; /* cursor position */ - int y; rectangle_t clip; /* cursor clip rectangle */ - unsigned int last_change; /* time of last position change */ user_handle_t win; /* window that contains the cursor */ user_handle_t handle; /* last set cursor handle */ };
From: Rémi Bernon rbernon@codeweavers.com
The client should check that the lower SEQUENCE_MASK_BITS are zero before reading the data and confirm that the number is unchanged when it's finished.
Based on a patch by Huw Davies huw@codeweavers.com. --- server/protocol.def | 5 +++++ server/queue.c | 46 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 1 deletion(-)
diff --git a/server/protocol.def b/server/protocol.def index 9ed4a4a6f85..f4c8972f8e7 100644 --- a/server/protocol.def +++ b/server/protocol.def @@ -899,10 +899,15 @@ struct shared_cursor
struct desktop_shared_memory { + unsigned int seq; /* sequence number - server updating if (seq_no & SEQUENCE_MASK) != 0 */ struct shared_cursor cursor; /* global cursor information */ }; typedef volatile struct desktop_shared_memory desktop_shm_t;
+/* Bits that must be clear for client to read */ +#define SEQUENCE_MASK_BITS 4 +#define SEQUENCE_MASK ((1UL << SEQUENCE_MASK_BITS) - 1) + /****************************************************************/ /* Request declarations */
diff --git a/server/queue.c b/server/queue.c index 9ee01af89b5..4e8b8791e71 100644 --- a/server/queue.c +++ b/server/queue.c @@ -232,6 +232,46 @@ static unsigned int last_input_time; static cursor_pos_t cursor_history[64]; static unsigned int cursor_history_latest;
+#if defined(__i386__) || defined(__x86_64__) + +#define SHARED_WRITE_BEGIN( x ) \ + do { \ + volatile unsigned int __seq = *(x); \ + assert( (__seq & SEQUENCE_MASK) != SEQUENCE_MASK ); \ + *(x) = ++__seq; \ + } while(0) + +#define SHARED_WRITE_END( x ) \ + do { \ + volatile unsigned int __seq = *(x); \ + assert( (__seq & SEQUENCE_MASK) != 0 ); \ + if ((__seq & SEQUENCE_MASK) > 1) __seq--; \ + else __seq += SEQUENCE_MASK; \ + *(x) = __seq; \ + } while(0) + +#else + +#define SHARED_WRITE_BEGIN( x ) \ + do { \ + assert( (*(x) & SEQUENCE_MASK) != SEQUENCE_MASK ); \ + if ((__atomic_add_fetch( x, 1, __ATOMIC_RELAXED ) & SEQUENCE_MASK) == 1) \ + __atomic_thread_fence( __ATOMIC_RELEASE ); \ + } while(0) + +#define SHARED_WRITE_END( x ) \ + do { \ + assert( (*(x) & SEQUENCE_MASK) != 0 ); \ + if ((*(x) & SEQUENCE_MASK) > 1) \ + __atomic_sub_fetch( x, 1, __ATOMIC_RELAXED ); \ + else { \ + __atomic_thread_fence( __ATOMIC_RELEASE ); \ + __atomic_add_fetch( x, SEQUENCE_MASK, __ATOMIC_RELAXED ); \ + } \ + } while(0) + +#endif + static void queue_hardware_message( struct desktop *desktop, struct message *msg, int always_queue ); static void free_message( struct message *msg );
@@ -447,13 +487,17 @@ static int update_desktop_cursor_pos( struct desktop *desktop, user_handle_t win { struct thread_input *input; int updated; + unsigned int time = get_tick_count();
x = max( min( x, desktop->cursor.clip.right - 1 ), desktop->cursor.clip.left ); y = max( min( y, desktop->cursor.clip.bottom - 1 ), desktop->cursor.clip.top ); updated = (desktop->shared->cursor.x != x || desktop->shared->cursor.y != y); + + SHARED_WRITE_BEGIN( &desktop->shared->seq ); desktop->shared->cursor.x = x; desktop->shared->cursor.y = y; - desktop->shared->cursor.last_change = get_tick_count(); + desktop->shared->cursor.last_change = time; + SHARED_WRITE_END( &desktop->shared->seq );
if (!win && (input = desktop->foreground_input)) win = input->capture; if (!win || !is_window_visible( win ) || is_window_transparent( win ))
From: Rémi Bernon rbernon@codeweavers.com
Based on a patch by Huw Davies huw@codeweavers.com. --- dlls/win32u/input.c | 20 +++++------ dlls/win32u/ntuser_private.h | 24 +++++++++++++ dlls/win32u/sysparams.c | 6 ++++ dlls/win32u/winstation.c | 65 ++++++++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+), 11 deletions(-)
diff --git a/dlls/win32u/input.c b/dlls/win32u/input.c index 1dcc439e560..d2821789244 100644 --- a/dlls/win32u/input.c +++ b/dlls/win32u/input.c @@ -740,25 +740,23 @@ BOOL WINAPI NtUserSetCursorPos( INT x, INT y ) */ BOOL get_cursor_pos( POINT *pt ) { - BOOL ret; + desktop_shm_t *shared = get_desktop_shared_memory(); DWORD last_change; + BOOL ret = TRUE; UINT dpi;
- if (!pt) return FALSE; + if (!pt || !shared) return FALSE;
- SERVER_START_REQ( set_cursor ) + SHARED_READ_BEGIN( &shared->seq ) { - if ((ret = !wine_server_call( req ))) - { - pt->x = reply->new_x; - pt->y = reply->new_y; - last_change = reply->last_change; - } + pt->x = shared->cursor.x; + pt->y = shared->cursor.y; + last_change = shared->cursor.last_change; } - SERVER_END_REQ; + SHARED_READ_END( &shared->seq );
/* query new position from graphics driver if we haven't updated recently */ - if (ret && NtGetTickCount() - last_change > 100) ret = user_driver->pGetCursorPos( pt ); + if (NtGetTickCount() - last_change > 100) ret = user_driver->pGetCursorPos( pt ); if (ret && (dpi = get_thread_dpi())) { HMONITOR monitor = monitor_from_point( *pt, MONITOR_DEFAULTTOPRIMARY, 0 ); diff --git a/dlls/win32u/ntuser_private.h b/dlls/win32u/ntuser_private.h index b39e38db5d6..6bcecc80ef8 100644 --- a/dlls/win32u/ntuser_private.h +++ b/dlls/win32u/ntuser_private.h @@ -132,6 +132,7 @@ struct user_thread_info UINT spy_indent; /* Current spy indent */ BOOL clipping_cursor; /* thread is currently clipping */ DWORD clipping_reset; /* time when clipping was last reset */ + desktop_shm_t *desktop_shm; /* Ptr to server's desktop shared memory */ };
C_ASSERT( sizeof(struct user_thread_info) <= sizeof(((TEB *)0)->Win32ClientInfo) ); @@ -250,6 +251,9 @@ void release_user_handle_ptr( void *ptr ) DECLSPEC_HIDDEN; void *next_process_user_handle_ptr( HANDLE *handle, unsigned int type ) DECLSPEC_HIDDEN; UINT win_set_flags( HWND hwnd, UINT set_mask, UINT clear_mask ) DECLSPEC_HIDDEN;
+/* winstation.c */ +extern desktop_shm_t *get_desktop_shared_memory(void) DECLSPEC_HIDDEN; + static inline UINT win_get_flags( HWND hwnd ) { return win_set_flags( hwnd, 0, 0 ); @@ -259,4 +263,24 @@ WND *get_win_ptr( HWND hwnd ) DECLSPEC_HIDDEN; BOOL is_child( HWND parent, HWND child ) DECLSPEC_HIDDEN; BOOL is_window( HWND hwnd ) DECLSPEC_HIDDEN;
+#if defined(__i386__) || defined(__x86_64__) +#define __SHARED_READ_SEQ( x ) (*(x)) +#define __SHARED_READ_FENCE do {} while(0) +#else +#define __SHARED_READ_SEQ( x ) __atomic_load_n( x, __ATOMIC_RELAXED ) +#define __SHARED_READ_FENCE __atomic_thread_fence( __ATOMIC_ACQUIRE ) +#endif + +#define SHARED_READ_BEGIN( x ) \ + do { \ + unsigned int __seq; \ + do { \ + while ((__seq = __SHARED_READ_SEQ( x )) & SEQUENCE_MASK) NtYieldExecution(); \ + __SHARED_READ_FENCE; + +#define SHARED_READ_END( x ) \ + __SHARED_READ_FENCE; \ + } while (__SHARED_READ_SEQ( x ) != __seq); \ + } while(0) + #endif /* __WINE_NTUSER_PRIVATE_H */ diff --git a/dlls/win32u/sysparams.c b/dlls/win32u/sysparams.c index b941d01e3b7..fb45fbc57b4 100644 --- a/dlls/win32u/sysparams.c +++ b/dlls/win32u/sysparams.c @@ -5910,6 +5910,12 @@ static void thread_detach(void) cleanup_imm_thread(); NtClose( thread_info->server_queue );
+ if (thread_info->desktop_shm) + { + NtUnmapViewOfSection( GetCurrentProcess(), (void *)thread_info->desktop_shm ); + thread_info->desktop_shm = NULL; + } + exiting_thread_id = 0; }
diff --git a/dlls/win32u/winstation.c b/dlls/win32u/winstation.c index 6877136e144..ca5dfa5fbd2 100644 --- a/dlls/win32u/winstation.c +++ b/dlls/win32u/winstation.c @@ -262,6 +262,11 @@ BOOL WINAPI NtUserSetThreadDesktop( HDESK handle ) thread_info->client_info.top_window = 0; thread_info->client_info.msg_window = 0; if (key_state_info) key_state_info->time = 0; + if (thread_info->desktop_shm) + { + NtUnmapViewOfSection( GetCurrentProcess(), (void *)thread_info->desktop_shm ); + thread_info->desktop_shm = NULL; + } } return ret; } @@ -586,6 +591,66 @@ static const WCHAR *get_default_desktop( void *buf, size_t buf_size ) return defaultW; }
+static volatile void *map_shared_memory_section( const WCHAR *name, SIZE_T size, HANDLE root ) +{ + OBJECT_ATTRIBUTES attr; + UNICODE_STRING section_str; + HANDLE handle; + UINT status; + void *ptr; + + RtlInitUnicodeString( §ion_str, name ); + InitializeObjectAttributes( &attr, §ion_str, 0, root, NULL ); + if (!(status = NtOpenSection( &handle, SECTION_ALL_ACCESS, &attr ))) + { + ptr = NULL; + status = NtMapViewOfSection( handle, GetCurrentProcess(), &ptr, 0, 0, NULL, + &size, ViewUnmap, 0, PAGE_READONLY ); + NtClose( handle ); + } + + if (status) + { + WARN( "Failed to map view of section %s, status %#x\n", debugstr_w(name), status ); + return NULL; + } + + return ptr; +} + +desktop_shm_t *get_desktop_shared_memory(void) +{ + static const WCHAR dir_desktop_maps[] = + { + '_','_','w','i','n','e','_','d','e','s','k','t','o','p','_','m','a','p','p','i','n','g','s','\',0 + }; + struct user_thread_info *thread_info = get_user_thread_info(); + HANDLE root, handles[2]; + WCHAR buf[MAX_PATH], *ptr; + DWORD i, needed; + + if (thread_info->desktop_shm) return thread_info->desktop_shm; + + handles[0] = NtUserGetProcessWindowStation(); + handles[1] = NtUserGetThreadDesktop( GetCurrentThreadId() ); + + memcpy( buf, dir_desktop_maps, wcslen(dir_desktop_maps) * sizeof(WCHAR) ); + ptr = buf + wcslen(dir_desktop_maps); + + for (i = 0; i < 2; i++) + { + NtUserGetObjectInformation( handles[i], UOI_NAME, (void *)ptr, sizeof(buf) - (ptr - buf) * sizeof(WCHAR), &needed ); + ptr += needed / sizeof(WCHAR); + if (i == 0) *(ptr - 1) = '\'; + } + + root = get_winstations_dir_handle(); + thread_info->desktop_shm = map_shared_memory_section( buf, sizeof(*thread_info->desktop_shm), root ); + NtClose( root ); + + return thread_info->desktop_shm; +} + /*********************************************************************** * winstation_init *
Jinoh Kang (@iamahuman) commented about dlls/win32u/ntuser_private.h:
BOOL is_child( HWND parent, HWND child ) DECLSPEC_HIDDEN; BOOL is_window( HWND hwnd ) DECLSPEC_HIDDEN;
+#if defined(__i386__) || defined(__x86_64__) +#define __SHARED_READ_SEQ( x ) (*(x)) +#define __SHARED_READ_FENCE do {} while(0)
A compiler barrier is (still) required regardless of whether or not the underlying architecture has strong memory ordering semantics.
```suggestion:-0+0 #define __SHARED_READ_FENCE __asm__ __volatile__( "" ::: "memory" ); ```
Jinoh Kang (@iamahuman) commented about server/queue.c:
static cursor_pos_t cursor_history[64]; static unsigned int cursor_history_latest;
+#if defined(__i386__) || defined(__x86_64__)
+#define SHARED_WRITE_BEGIN( x ) \
- do { \
volatile unsigned int __seq = *(x); \
I don't think `volatile` on an unrelated variable can suppress any undesired optimization, or act as a barrier at all[^vol], unlike MSVC. MSVC doesn't necessarily treat `volatile` accesses as an acquire/release barrier either if `/volatile:iso` is specified[^msvc].
[^vol]: [Why the "volatile" type class should not be used](https://www.kernel.org/doc/html/latest/process/volatile-considered-harmful.h...) [^msvc]: [volatile (C++) | Microsoft Learn](https://learn.microsoft.com/en-us/cpp/cpp/volatile-cpp?view=msvc-170)
Jinoh Kang (@iamahuman) commented about server/queue.c:
static cursor_pos_t cursor_history[64]; static unsigned int cursor_history_latest;
+#if defined(__i386__) || defined(__x86_64__)
+#define SHARED_WRITE_BEGIN( x ) \
- do { \
volatile unsigned int __seq = *(x); \
assert( (__seq & SEQUENCE_MASK) != SEQUENCE_MASK ); \
*(x) = ++__seq; \
- } while(0)
+#define SHARED_WRITE_END( x ) \
- do { \
volatile unsigned int __seq = *(x); \
Ditto.
Jinoh Kang (@iamahuman) commented about server/queue.c:
+#define SHARED_WRITE_BEGIN( x ) \
- do { \
assert( (*(x) & SEQUENCE_MASK) != SEQUENCE_MASK ); \
if ((__atomic_add_fetch( x, 1, __ATOMIC_RELAXED ) & SEQUENCE_MASK) == 1) \
__atomic_thread_fence( __ATOMIC_RELEASE ); \
- } while(0)
+#define SHARED_WRITE_END( x ) \
- do { \
assert( (*(x) & SEQUENCE_MASK) != 0 ); \
if ((*(x) & SEQUENCE_MASK) > 1) \
__atomic_sub_fetch( x, 1, __ATOMIC_RELAXED ); \
else { \
__atomic_thread_fence( __ATOMIC_RELEASE ); \
__atomic_add_fetch( x, SEQUENCE_MASK, __ATOMIC_RELAXED ); \
Can we just coalesce these two calls into one for better optimization opportunities (e.g., use of STLR with RCpc semantics on ARM64, instead of `DMB ISH`)?
```suggestion:-1+0 __atomic_add_fetch( x, SEQUENCE_MASK, __ATOMIC_RELEASE ); \ ```
On Mon Jun 19 13:24:15 2023 +0000, Jinoh Kang wrote:
Can we just coalesce these two calls into one for better optimization opportunities (e.g., use of STLR with RCpc semantics on ARM64, instead of `STR; DMB ISH`)?
__atomic_add_fetch( x, SEQUENCE_MASK, __ATOMIC_RELEASE ); \
I believe this is to pair with `__SHARED_READ_FENCE` and `__SHARED_READ_SEQ` respectively. I don't know how valid it is to merge both.
On Mon Jun 19 13:22:09 2023 +0000, Jinoh Kang wrote:
I don't think `volatile` on an unrelated variable can suppress any undesired optimization, or act as a barrier at all[^vol], unlike MSVC. MSVC doesn't necessarily treat `volatile` accesses as an acquire/release barrier either if `/volatile:iso` is specified[^msvc]. [^vol]: [Why the "volatile" type class should not be used](https://www.kernel.org/doc/html/latest/process/volatile-considered-harmful.h...) [^msvc]: [volatile (C++) | Microsoft Learn](https://learn.microsoft.com/en-us/cpp/cpp/volatile-cpp?view=msvc-170)
It is not a barrier with normal load/store operations, but I believe it is with other volatile accesses. Everything that is written between `SHARED_WRITE_BEGIN` / `SHARED_WRITE_END` is volatile, so as I understand it, ordering should be guaranteed.
Fwiw I wouldn't mind using __atomic builtins everywhere, I'm not sure to see how it is better to have to argue about volatile semantics every time, to keep compatibility with some unknown compiler that would not support them. GCC >= 4.7 has them, which imho is old enough, and I don't think we will ever build wineserver with MSVC, but if we do it would not be too difficult to adjust then.
On Tue Jun 20 08:32:45 2023 +0000, Rémi Bernon wrote:
It is not a barrier with normal load/store operations, but I believe it is with other volatile accesses. Everything that is written between `SHARED_WRITE_BEGIN` / `SHARED_WRITE_END` is volatile, so as I understand it, ordering should be guaranteed. Fwiw I wouldn't mind using __atomic builtins everywhere, I'm not sure to see how it is better to have to argue about volatile semantics every time, to keep compatibility with some unknown compiler that would not support them. GCC >= 4.7 has them, which imho is old enough, and I don't think we will ever build wineserver with MSVC, but if we do it would not be too difficult to adjust then.
In that case, wouldn't it suffice for `*(x)` to be volatile?
On Mon Jun 19 13:25:54 2023 +0000, Jinoh Kang wrote:
A compiler barrier is (still) required regardless of whether or not the underlying architecture has strong memory ordering semantics.
#define __SHARED_READ_FENCE __asm__ __volatile__( "" ::: "memory" )
I think that it's actually not needed for the same reasons as described below: everything that is accessed in between is volatile.
On Tue Jun 20 11:10:02 2023 +0000, Jinoh Kang wrote:
In that case, wouldn't it suffice for `*(x)` to be volatile?
Maybe, but I think having the volatile variable makes it consistent with the "everything done inside these blocks is volatile" assertion.
Fwiw I wouldn't mind using __atomic builtins everywhere, I'm not sure to see how it is better to have to argue about volatile semantics every time, to keep compatibility with some unknown compiler that would not support them.
I'd think it's better to have atomic helpers around like ReadAcquire in winnt.h? Anyway I'm not sure if volatile accesses are never reordered with atomic operations.
GCC >= 4.7 has them, which imho is old enough,
Atomic builtins are buggy prior to GCC 7.x (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81316 affects x86).
and I don't think we will ever build wineserver with MSVC, but if we do it would not be too difficult to adjust then.
MSVC was just an example; some people expect GCC volatile to act like MSVC.
Huw Davies (@huw) commented about server/protocol.def:
struct desktop_shared_memory {
- unsigned int seq; /* sequence number - server updating if (seq_no & SEQUENCE_MASK) != 0 */ struct shared_cursor cursor; /* global cursor information */
}; typedef volatile struct desktop_shared_memory desktop_shm_t;
+/* Bits that must be clear for client to read */ +#define SEQUENCE_MASK_BITS 4 +#define SEQUENCE_MASK ((1UL << SEQUENCE_MASK_BITS) - 1)
One thing that I was unhappy with when working on this was the need to allow nested updates. I'm not saying that it should be avoided as that seemed tricky, but I just wanted to flag it here.
On Wed Jun 28 06:31:09 2023 +0000, Huw Davies wrote:
One thing that I was unhappy with when working on this was the need to allow nested updates. I'm not saying that it should be avoided as that seemed tricky, but I just wanted to flag it here.
Hmm, do we need it though? It looked like a feature but I'm not sure I remember where it's used.
On Wed Jun 28 07:04:43 2023 +0000, Rémi Bernon wrote:
Hmm, do we need it though? It looked like a feature but I'm not sure I remember where it's used.
From a quick look it doesn't seem to be used very much, maybe I can get rid of it entirely if you were unhappy with it.
What do you think about doing something like https://gitlab.winehq.org/rbernon/wine/-/commit/f264bf99b135f72496f291dcfcff..., to statically assert that the SHM is only written within SHARED_WRITE_BEGIN / SHARED_WRITE_END, and make sure the passed sequence member matches?
I'm also considering doing the same on the client side, to better match SERVER_START_REQ / SERVER_END_REQ pattern, and avoid having to pass the sequence member to SHARED_READ_END (and the potential typos), although it's not as interesting there.
On Wed Jun 28 10:13:03 2023 +0000, Rémi Bernon wrote:
What do you think about doing something like https://gitlab.winehq.org/rbernon/wine/-/commit/f264bf99b135f72496f291dcfcff..., to statically assert that the SHM is only written within SHARED_WRITE_BEGIN / SHARED_WRITE_END, and make sure the passed sequence member matches? I'm also considering doing the same on the client side, to better match SERVER_START_REQ / SERVER_END_REQ pattern, and avoid having to pass the sequence member to SHARED_READ_END (and the potential typos), although it's not as interesting there.
Do we really need SHARED_READ_BEGIN / SHARED_READ_END? I think that hiding a loop behind them is not great. It should be possible to achieve that with a helper function, so that client code would look like:
``` unsigned int lock = 0; while (server_shm_lock( &lock, &shared->seq )) { /* read shared */ } ```
On Wed Jun 28 09:14:11 2023 +0000, Rémi Bernon wrote:
From a quick look it doesn't seem to be used very much, maybe I can get rid of it entirely if you were unhappy with it.
If it's possible to avoid having nested updates that strikes me as being better. The trick of course is to ensure that the exposed server state is valid at all times.
On Wed Jun 28 10:13:03 2023 +0000, Jacek Caban wrote:
Do we really need SHARED_READ_BEGIN / SHARED_READ_END? I think that hiding a loop behind them is not great. It should be possible to achieve that with a helper function, so that client code would look like:
unsigned int lock = 0; while (server_shm_lock( &lock, &shared->seq )) { /* read shared */ }
Sure. I'd deliberately made it look like a server call, but we don't have to do that of course.
I looked at something somewhat similar for shared user handle table. Back then, I hoped to use it also for user32/win32u separation. It ended up being too much of a side quest, but I hope to revisit that at some point. My old branch is here: https://gitlab.winehq.org/jacek/wine/-/tree/user-handles
There is not much overlap between that work and this MR (or Proton patches in general), but I'm mentioning it because it could be good to reconsider separate mappings per thread. I think that we will eventually want a global per-session mapping for handle table anyway. With a global shared mapping, mapped once in a process, we could just pass desktop data offset in requests affecting active desktop to keep client thread info updated.
On Wed Jun 28 10:24:39 2023 +0000, Huw Davies wrote:
Sure. I'd deliberately made it look like a server call, but we don't have to do that of course.
I personally like that it looks like server requests, it makes wineserver communication stand out compared to other function calls.
On Wed Jun 28 11:57:05 2023 +0000, Jacek Caban wrote:
I looked at something somewhat similar for shared user handle table. Back then, I hoped to use it also for user32/win32u separation. It ended up being too much of a side quest, but I hope to revisit that at some point. My old branch is here: https://gitlab.winehq.org/jacek/wine/-/tree/user-handles There is not much overlap between that work and this MR (or Proton patches in general), but I'm mentioning it because it could be good to reconsider separate mappings per thread. I think that we will eventually want a global per-session mapping for handle table anyway. With a global shared mapping, mapped once in a process, we could just pass desktop data offset in requests affecting active desktop to keep client thread info updated.
Having a global mapping would make it more complicated to support an arbitrary number of threads. Unless we want to use a fixed limit, which doesn't sound ideal, we'd need to support resizing the mapping somehow.
On Wed Jun 28 11:57:05 2023 +0000, Rémi Bernon wrote:
Having a global mapping would make it more complicated to support an arbitrary number of threads. Unless we want to use a fixed limit, which doesn't sound ideal, we'd need to support resizing the mapping somehow.
Number of threads does not seem relevant, it would be a number of desktops.
On Wed Jun 28 13:11:30 2023 +0000, Jacek Caban wrote:
Number of threads does not seem relevant, it would be a number of desktops.
This MR introduces shared memory for desktops, but the rest of the series also does the same with thread message queue and thread input. Thread input is the most interesting part as it avoids wineserver roundtrips for many functions like active / focus / foreground window, etc...
On Wed Jun 28 13:16:09 2023 +0000, Rémi Bernon wrote:
This MR introduces shared memory for desktops, but the rest of the series also does the same with thread message queue and thread input. Thread input is the most interesting part as it avoids wineserver roundtrips for many functions like active / focus / foreground window, etc...
Those use separated mappings, so it's not exactly relevant here.
On Wed Jun 28 13:31:00 2023 +0000, Jacek Caban wrote:
Those use separated mappings, so it's not exactly relevant here.
Yes, isn't that what you meant by "separate mappings per thread"? Or maybe you just meant avoid mapping the desktop shared memory multiple times on the client side?
I believe this is to pair with `__SHARED_READ_FENCE` and `__SHARED_READ_SEQ` respectively. I don't know how valid it is to merge both.
From [LLVM Atomic Instructions and Concurrency Guide][llvm-atomics]:
A `fence` provides Acquire and/or Release ordering which is not part of another operation; it is normally used along with Monotonic memory operations. A Monotonic load followed by an Acquire fence is roughly equivalent to an Acquire load, and a Monotonic store following a Release fence is roughly equivalent to a Release store. SequentiallyConsistent fences behave as both an Acquire and a Release fence, and offer some additional complicated guarantees, see the C++11 standard for details.
Since we intend the fence to impose ordering constraints only between the shared read/write section and the atomic operation on the sequence variable, it is safe to coalesce the atomic operation and the fence.
[llvm-atomics]: https://www.llvm.org/docs/Atomics.html
On Thu Jun 22 07:31:50 2023 +0000, Rémi Bernon wrote:
I think that it's actually not needed for the same reasons as described below: everything that is accessed in between is volatile.
Right, it should be fine as long as there is no intermediate non-volatile memory access (not necessarily an explicit variable) that interacts with the shared data / seqlock.
Maybe, but I think having the volatile variable makes it consistent with the "everything done inside these blocks is volatile" assertion.
If by everything done you mean computation, I don't think it holds since
```c *(x) = ++__seq; ```
just expands to:
```c unsigned int tmp = __seq + 1; __seq = tmp; *(x) = tmp; ```
where `tmp` is not volatile.
Furthermore it compiles to unnecessary MOV instructions (1 before the increment, 2 after the increment) around the increment, which cannot be optimized at all due to `volatile`; without volatile, the increment can be as simple as `INC R_seq; MOV [x], R_seq`.
If by everything done you mean access to any location that other threads may access (i.e., the pointer being dereferenced may alias with another pointer owned by a concurrently executing subroutine), then `__seq` (which is merely an automatic variable that does not escape this subroutine) does not meet this condition.
On Wed Jun 28 11:53:28 2023 +0000, Rémi Bernon wrote:
I personally like that it looks like server requests, it makes wineserver communication stand out compared to other function calls.
Yes, not to mention that it introduces an unnecessary read fence (which is a bit costly on ARM platforms) at the start of the loop. The read fence cannot be reordered after the `__SHARED_READ_SEQ( x ) != __seq` comparison, so it's difficult to make it conditional without introducing even more complexity or overhead.
On Wed Jun 28 14:25:09 2023 +0000, Rémi Bernon wrote:
Yes, isn't that what you meant by "separate mappings per thread"? Or maybe you just meant avoid mapping the desktop shared memory multiple times on the client side?
1. [user-handles] is tracking per-*session* state (`gSharedInfo`). It appears to me that a *process* is associated with at most one session at any time. Also, each *process* has only one mapping backed by the session shared memory.
2. This MR is tracking per-*desktop* state (`desktop_shm_t`). It appears to me that a *thread* is associated with at most one desktop at any time. Also, each *thread* has only one mapping backed by the desktop shared memory. - Transitively, a process can have multiple mappings backed by some desktop shared memory, since a process can have multiple threads. - If two or more threads have the same desktop, then each of those threads will have a *different* mapping backed by the *same* desktop shared memory. I believe *this* part is what is believed to be wasteful.
[user-handles]: https://gitlab.winehq.org/jacek/wine/-/tree/user-handles
On Wed Jun 28 16:39:03 2023 +0000, Jinoh Kang wrote:
- [user-handles] is tracking per-*session* state (`gSharedInfo`). It
appears to me that a *process* is associated with at most one session at any time. Also, each *process* has only one mapping backed by the session shared memory. 2. This MR is tracking per-*desktop* state (`desktop_shm_t`). It appears to me that a *thread* is associated with at most one desktop at any time. Also, each *thread* has only one mapping backed by the desktop shared memory.
- Transitively, a process can have multiple mappings backed by some
desktop shared memory, since a process can have multiple threads.
- If two or more threads have the same desktop, then each of those
threads will have a *different* mapping backed by the *same* desktop shared memory. I believe *this* part is what is believed to be wasteful. [user-handles]: https://gitlab.winehq.org/jacek/wine/-/tree/user-handles
Instead of letting each thread own a unique desktop-state mapping, perhaps we should instead make the virtual memory mapping ref-counted, and maintain a mapping from a desktop's unique ID to the refcounted mapping (which is backed by the same section).
The trick of course is to ensure that the exposed server state is valid at all times.
If "valid at all times" is interpreted literally (i.e. at any give time), this is not possible.
- A `desktop_shm_t` block is supposed to be arbitrarily large. - We cannot perform atomic operations on an arbitrarily large block of memory (exceeding e.g., 128 bits).
Therefore, we cannot perform atomic operations on a `desktop_shm_t` block.
Even if we get rid of nested updates, we need at least 1 bit of "dirty" state in `SEQUENCE_MASK`.
Even if we get rid of nested updates, we need at least 1 bit of "dirty" state in `SEQUENCE_MASK`.
Well yes, I meant we'd be clear to read if `seq` was even, but I guess I failed to articulate that.
I personally like that it looks like server requests, it makes wineserver communication stand out compared to other function calls.
I guess it depends on how far are we planning to go with it, but I can see an argument to use it from PE code and abstract wineserver involvement as "kernel" mapping (wineserver being an implementation detail of "kernel"). There is a very similar mechanism on Windows that's used by user32 instead of syscalls. It should be possible to replace some calls for which currently use interface like NtUserCall*Param with PE-side implementation using such mapping (`GetCursorPos` possibly being one of them, but this particular one would be more tricky and unlikely to worth it).
Anyway, we may worry about that if we decide to expose the mapping to PE.
Yes, isn't that what you meant by "separate mappings per thread"? Or maybe you just meant avoid mapping the desktop shared memory multiple times on the client side?
I meant to avoid mapping the desktop multiple times. There are different solutions, I suggested something that seems similar to Windows kernel memory mapping. We could use something like `alloc_session_shared` from https://gitlab.winehq.org/jacek/wine/-/commit/fc899509484cb381ceb9dba1a788cc... (without the handle argument).
Yes, isn't that what you meant by "separate mappings per thread"? Or maybe you just meant avoid mapping the desktop shared memory multiple times on the client side?
I meant to avoid mapping the desktop multiple times. There are different solutions, I suggested something that seems similar to Windows kernel memory mapping. We could use something like `alloc_session_shared` from https://gitlab.winehq.org/jacek/wine/-/commit/fc899509484cb381ceb9dba1a788cc... (without the handle argument).
Yes, but a process can have multiple desktops, and that "would make it more complicated to support an arbitrary number of threads."
On Thu Jun 29 12:45:44 2023 +0000, Jinoh Kang wrote:
Yes, isn't that what you meant by "separate mappings per thread"? Or
maybe you just meant avoid mapping the desktop shared memory multiple times on the client side?
I meant to avoid mapping the desktop multiple times. There are
different solutions, I suggested something that seems similar to Windows kernel memory mapping. We could use something like `alloc_session_shared` from https://gitlab.winehq.org/jacek/wine/-/commit/fc899509484cb381ceb9dba1a788cc... (without the handle argument). Yes, but a process can have multiple desktops, and that "would make it more complicated to support an arbitrary number of threads."
(To clarify, by "complicated approach" I was referring to the refcounting mechanism above. Whether it really counts as "complicated," I don't know.)
We could use something like `alloc_session_shared` from https://gitlab.winehq.org/jacek/wine/-/commit/fc899509484cb381ceb9dba1a788cc... (without the handle argument).
This solution would be directly applicable if HDESK was a USER handle, but it is actually a KERNEL handle. This means:
1. We can't use `handle_to_entry( user_handle_t handle )`. The closest thing available for KERNEL handles is `get_cached_fd`, but it deals with file descriptors.
2. We can't extend `fd_cache_entry` to include `shared_offset`, since `fd_cache_entry` will exceed 8 bytes and we'll need to use `InterlockedCompareExchange128` to update it. `cmpxchg16` is not available on 32-bit. Besides, introducing all this complexity just for desktop handles doesn't seem worth it.
3. We can of course create our own `HDESK -> shared_{offset|address}` map, but this means dealing with handle aliases--two different HDESKs can point to the same desktop object. Not to mention that KERNEL handles can be freely duplicated and closed via other kernel32 functions as well as from other processes.
On Thu Jun 29 13:07:22 2023 +0000, Jinoh Kang wrote:
We could use something like `alloc_session_shared` from
https://gitlab.winehq.org/jacek/wine/-/commit/fc899509484cb381ceb9dba1a788cc... (without the handle argument). This solution would be directly applicable if HDESK was a USER handle, but it is actually a KERNEL handle. This means:
- We can't use `handle_to_entry( user_handle_t handle )`. The closest
thing available for KERNEL handles is `get_cached_fd`, but it deals with file descriptors. 2. We can't extend `fd_cache_entry` to include `shared_offset`, since `fd_cache_entry` will exceed 8 bytes and we'll need to use `InterlockedCompareExchange128` to update it. `cmpxchg16` is not available on 32-bit. Besides, introducing all this complexity just for desktop handles doesn't seem worth it. 3. We can of course create our own `HDESK -> shared_{offset|address}` map, but this means dealing with handle aliases--two different HDESKs can point to the same desktop object. Not to mention that KERNEL handles can be freely duplicated and closed via other kernel32 functions as well as from other processes.
We could just pass a data offset as part of server requests that affect or query current desktop.
And yes, there is an argument to use a different mapping for more-or-less thread-specific data, but desktops are global objects.
We could just pass a data offset as part of server requests that affect or query current desktop.
This implies the existence of some shared memory that encompasses such desktops, where each desktop can be located at some "data offset."
This leads to four[^5th] sub-problems, which IMHO have minimal impact in my original proposal of "ref-counted mappings:"
1. What's the scope of the shared memory?
1. **The scope of the shared memory is Winstation-wide:** This sounds like the most intuitive choice to me, since a desktop has exactly one parent Winstation at any time. Handling `SetProcessWindowStation` will need to get a little more complicated, though.
2. **The scope of the shared memory is wineprefix-wide:** This will make the shared memory management more straightforward, but may end up bloating the shared memory for *all* processes whenever some sandbox-based application tries to create a new station.
2. How many desktops can fit in the initial size of the shared memory?
1. **Just one:** This is the least bloated choice, since most applications (including the `wineboot` dialog) needs just one desktop.
2. **More than one:** This will allow us to delay the 3rd and 4th problem when handling multiple desktops, but will lead to more bloat.
3. Can the shared memory be resized after creation?
1. **The size is fixed:** This means that we cannot use the desktop shared memory when the number of desktops exceed the certain threshold. The CreateDesktop[^createdesktop] documentation states that the "number of desktops that can be created is limited by the size of the system desktop heap," although I'm not sure if the heap is Winstation-wide or system-wide.
2. **The size is variable:** This allows us to delay the 4th problem until when we run out of the host memory.
4. What if we run out of shared memory space?
1. **Fail desktop creation:** Simplest solution, but may lead to regression in existing apps (e.g., Win32 sandboxed apps) if the shared memory is fixed and limited in size (e.g., just one desktop).
2. **Don't fail desktop creation:** This way, we'll have to retain the current desktop-related querying server calls for fallback, which may be additional maintenance burden.
Ref-counted mappings avoid these problems entirely, by making each mapping local to the specific desktop object.
And yes, there is an argument to use a different mapping for more-or-less thread-specific data,
Meanwhile, ref-counted mappings do not rely on thread-specific mappings; multiple threads can share the same mapping (the refcount merely has to be incremented).
but desktops are global objects.
I believe that the ambiguity of the word _global_ was the source of confusion. "Global" can mean two things:
1. The object being described is associated with some *broader* scope that subsumes the *immediate* scope in context. This matches the dictionary definition. For example, a C global variable is associated with the *program* scope, which subsumes the *scope of a translation unit* it is defined in.
2. The object is part of some ambient, implicit environment. For example, [ambient authority] is used for Unix/NT APIs that need to check for access permissions, and thread-local variables can be used as implicit parameters to functions.
I guess that the meaning was not easily disambiguated since the notion of "broader" and "immediate" scope were not clearly agreed (or thought to be agreed) upon, and the latter does not imply the former. I could be wrong, though, so please ignore it if it was not the case.
[^5th]: There's actually the 5th problem, which is a bit theoretical: a process that is given access to the entire shared memory can read the state of a desktop that the process has no access to. Wine is not a security boundary, so let's ignore it. [^createdesktop]: [CreateDesktopA function (winuser.h) - Win32 apps | Microsoft Learn](https://learn.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-creat...)
[ambient authority]: https://en.wikipedia.org/wiki/Ambient_authority
On Wed Jun 28 18:01:29 2023 +0000, Huw Davies wrote:
Even if we get rid of nested updates, we need at least 1 bit of
"dirty" state in `SEQUENCE_MASK`. Well yes, I meant we'd be clear to read if `seq` was even, but I guess I failed to articulate that.
Thanks. Even if we need nested updates for *some* shared states, we can just track the nesting level out-of-band (i.e., out of the shared memory region) in some metadata block for the shared memory.
Jinoh Kang (@iamahuman) commented about server/mapping.c:
pe_image_info_t image; /* image info (for PE image mapping) */ struct ranges *committed; /* list of committed ranges in this mapping */ struct shared_map *shared; /* temp file for shared PE mapping */
- void *shared_ptr; /* mmaped pointer for shared mappings */
Is there a reason why the section (mapping) object should be responsible for managing the server-side VMA's lifecycle? How about just returning mmap()-ed pointer for the section object on demand, and let the user free it?
For example, the desktop object doesn't really need reference to the section object itself; it merely has to access the actual shared memory.[^note]
[^note]: At least this was the impression I got from https://gitlab.winehq.org/rbernon/wine/-/commits/mr/shared-memories. Maybe there's a use case for resizing the mapping, but I suppose that requires creating a new replacement anyway, and the object can be referenced by name in that case.
On Sat Jul 1 14:15:57 2023 +0000, Jinoh Kang wrote:
Is there a reason why the section (mapping) object should be responsible for managing the server-side VMA's lifecycle? How about just returning mmap()-ed pointer for the section object on demand, and let the user free it? For example, the desktop object doesn't really need reference to the section object itself; it merely has to access the actual shared memory.[^note] [^note]: At least this was the impression I got from https://gitlab.winehq.org/rbernon/wine/-/commits/mr/shared-memories. Maybe there's a use case for resizing the mapping, but I suppose that requires creating a new replacement anyway, and the object can be referenced by name in that case.
I don't see any particular reason not to do it like that.
We had some scenarios in Proton where the mapping was transient, while the desktop was destroyed. A new desktop object was created quickly after, re-using the same mapping. The relevant commit message in Proton is:
``` server: Reuse shared mapping's mmaped pointer and handle mmap failures.
Desktop and its shared mapping can be still alive and may be reused after desktop is unlinked via close_desktop_timeout() but before all references and handles are cleared.
On the re-use path the mmap(PROT_WRITE) is called on a write-sealed fd which will fail returning MAP_FAILED. It would then be propagated up and dereferenced causing the server to crash.
This fixes the crash + makes sure that we can recreate the desktop successfully if needed.
This also correctly unmap the shared memory pointers whenever a mapping is destroyed, fixing memory leaks when many threads are created. ```
There's also some possible patches to seal the mapping to make sure only wineserver can write to them. This requires the memory to be mmap writable only once (and future wineserver maps to re-use it).
On Thu Aug 3 12:57:37 2023 +0000, Rémi Bernon wrote:
I don't see any particular reason not to do it like that. We had some scenarios in Proton where the mapping was transient, while the desktop was destroyed. A new desktop object was created quickly after, re-using the same mapping. The relevant commit message in Proton is:
server: Reuse shared mapping's mmaped pointer and handle mmap failures. Desktop and its shared mapping can be still alive and may be reused after desktop is unlinked via close_desktop_timeout() but before all references and handles are cleared. On the re-use path the mmap(PROT_WRITE) is called on a write-sealed fd which will fail returning MAP_FAILED. It would then be propagated up and dereferenced causing the server to crash. This fixes the crash + makes sure that we can recreate the desktop successfully if needed. This also correctly unmap the shared memory pointers whenever a mapping is destroyed, fixing memory leaks when many threads are created.
There's also some possible patches to seal the mapping to make sure only wineserver can write to them. This requires the memory to be mmap writable only once (and future wineserver maps to re-use it).
I'm not sure I understand the "reuse" part. A desktop object be considered alive as long as it has nonzero reference or handle count. In this case, shouldn't the mapping be kept alive as well?
If I understood correctly, Proton is basically "resurrecting" the mapping if a desktop object is brought back to the namespace. I'm not sure if trying to release the mapping while the desktop is still technically alive in the first place is a good idea. It comes to me as a bit awkward that we don't bind the lifetime of the mapping to the lifetime of the desktop object itself.
On Thu Aug 3 15:38:10 2023 +0000, Jinoh Kang wrote:
I'm not sure I understood the "reuse" part. A desktop object be considered alive as long as it has nonzero reference or handle count. In this case, shouldn't the mapping be kept alive as well? If I understood correctly, Proton is basically "resurrecting" the mapping if a desktop object is brought back to the namespace. I'm not sure if trying to release the mapping while the desktop is still technically alive in the first place is a good idea. It comes to me as a bit awkward that we don't bind the lifetime of the mapping to the lifetime of the desktop object itself.
The desktop is unlinked in `close_desktop_timeout`, when it is about to be closed and its name is released. A new desktop with the same name can be created, before the previous one is fully destroyed, ending up re-using the same mapping because it is created with the desktop name.
ending up re-using the same mapping because it is created with the desktop name.
Then I think we should unlink the mapping object (but keep it alive), and create a *new* mapping object with the same.
Perhaps this was obvious at first, but there was some complication that I failed to notice. Do you mind elaborating if this is the case?