Signed-off-by: Paul Gofman pgofman@codeweavers.com --- dlls/ntdll/unix/virtual.c | 49 ++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 16 deletions(-)
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index 984af2d4a21..3028910ce5c 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -943,6 +943,23 @@ static BYTE get_page_vprot( const void *addr ) }
+/*********************************************************************** + * get_vprot_range_size + * + * Return the size of the region with equal masked protection byte. + * The function assumes that base and size are page aligned and + * base + size does not wrap around. */ +static SIZE_T get_vprot_range_size( BYTE *base, SIZE_T size, BYTE mask, BYTE *vprot ) +{ + BYTE *addr; + + *vprot = get_page_vprot( base ); + for (addr = base + page_size; addr != base + size; addr += page_size) + if ((*vprot ^ get_page_vprot( addr )) & mask) break; + + return addr - base; +} + /*********************************************************************** * set_page_vprot * @@ -2047,18 +2064,21 @@ done: */ static SIZE_T get_committed_size( struct file_view *view, void *base, BYTE *vprot ) { - SIZE_T i, start; + SIZE_T offset;
- start = ((char *)base - (char *)view->base) >> page_shift; - *vprot = get_page_vprot( base ); + base = ROUND_ADDR( base, page_mask ); + offset = (BYTE *)base - (BYTE *)view->base;
if (view->protect & SEC_RESERVE) { SIZE_T ret = 0; + + *vprot = get_page_vprot( base ); + SERVER_START_REQ( get_mapping_committed_range ) { req->base = wine_server_client_ptr( view->base ); - req->offset = start << page_shift; + req->offset = offset; if (!wine_server_call( req )) { ret = reply->size; @@ -2072,9 +2092,8 @@ static SIZE_T get_committed_size( struct file_view *view, void *base, BYTE *vpro SERVER_END_REQ; return ret; } - for (i = start + 1; i < view->size >> page_shift; i++) - if ((*vprot ^ get_page_vprot( (char *)view->base + (i << page_shift) )) & VPROT_COMMITTED) break; - return (i - start) << page_shift; + + return get_vprot_range_size( base, view->size - offset, VPROT_COMMITTED, vprot ); }
@@ -4098,7 +4117,7 @@ static NTSTATUS get_basic_memory_info( HANDLE process, LPCVOID addr, SIZE_T len, SIZE_T *res_len ) { struct file_view *view; - char *base, *alloc_base = 0, *alloc_end = working_set_limit; + BYTE *base, *alloc_base = 0, *alloc_end = working_set_limit; struct wine_rb_entry *ptr; sigset_t sigset;
@@ -4145,20 +4164,20 @@ static NTSTATUS get_basic_memory_info( HANDLE process, LPCVOID addr, while (ptr) { view = WINE_RB_ENTRY_VALUE( ptr, struct file_view, entry ); - if ((char *)view->base > base) + if ((BYTE *)view->base > base) { alloc_end = view->base; ptr = ptr->left; } - else if ((char *)view->base + view->size <= base) + else if ((BYTE *)view->base + view->size <= base) { - alloc_base = (char *)view->base + view->size; + alloc_base = (BYTE *)view->base + view->size; ptr = ptr->right; } else { alloc_base = view->base; - alloc_end = (char *)view->base + view->size; + alloc_end = (BYTE *)view->base + view->size; break; } } @@ -4196,7 +4215,6 @@ static NTSTATUS get_basic_memory_info( HANDLE process, LPCVOID addr, else { BYTE vprot; - char *ptr; SIZE_T range_size = get_committed_size( view, base, &vprot );
info->State = (vprot & VPROT_COMMITTED) ? MEM_COMMIT : MEM_RESERVE; @@ -4205,9 +4223,8 @@ static NTSTATUS get_basic_memory_info( HANDLE process, LPCVOID addr, if (view->protect & SEC_IMAGE) info->Type = MEM_IMAGE; else if (view->protect & (SEC_FILE | SEC_RESERVE | SEC_COMMIT)) info->Type = MEM_MAPPED; else info->Type = MEM_PRIVATE; - for (ptr = base; ptr < base + range_size; ptr += page_size) - if ((get_page_vprot( ptr ) ^ vprot) & ~VPROT_WRITEWATCH) break; - info->RegionSize = ptr - base; + + info->RegionSize = get_vprot_range_size( base, range_size, ~VPROT_WRITEWATCH, &vprot ); } server_leave_uninterrupted_section( &virtual_mutex, &sigset );
Signed-off-by: Paul Gofman pgofman@codeweavers.com --- dlls/ntdll/unix/virtual.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index 3028910ce5c..14ea3b11143 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -4194,7 +4194,7 @@ static NTSTATUS get_basic_memory_info( HANDLE process, LPCVOID addr, { /* not in a reserved area at all, pretend it's allocated */ #ifdef __i386__ - if (base >= (char *)address_space_start) + if (base >= (BYTE *)address_space_start) { info->State = MEM_RESERVE; info->Protect = PAGE_NOACCESS; @@ -4215,7 +4215,13 @@ static NTSTATUS get_basic_memory_info( HANDLE process, LPCVOID addr, else { BYTE vprot; - SIZE_T range_size = get_committed_size( view, base, &vprot ); + SIZE_T range_size; + + if (view->protect & SEC_RESERVE) + range_size = get_committed_size( view, base, &vprot ); + else + range_size = view->size - (base - (BYTE *)view->base); + info->RegionSize = get_vprot_range_size( base, range_size, ~VPROT_WRITEWATCH, &vprot );
info->State = (vprot & VPROT_COMMITTED) ? MEM_COMMIT : MEM_RESERVE; info->Protect = (vprot & VPROT_COMMITTED) ? get_win32_prot( vprot, view->protect ) : 0; @@ -4223,8 +4229,6 @@ static NTSTATUS get_basic_memory_info( HANDLE process, LPCVOID addr, if (view->protect & SEC_IMAGE) info->Type = MEM_IMAGE; else if (view->protect & (SEC_FILE | SEC_RESERVE | SEC_COMMIT)) info->Type = MEM_MAPPED; else info->Type = MEM_PRIVATE; - - info->RegionSize = get_vprot_range_size( base, range_size, ~VPROT_WRITEWATCH, &vprot ); } server_leave_uninterrupted_section( &virtual_mutex, &sigset );
Signed-off-by: Paul Gofman pgofman@codeweavers.com --- v3: - get rid of the the last remainder loop; - get rid of 'count' variable; - define word related constants instead of hard coding them.
This patch reduces the overhead for scanning huge ranges for the changed vprot greatly. Improves DeathLoop performance which reserves a huge memory area and then commits some pages from within it, often calling VirtualQuery() for the addresses within the allocated area.
dlls/ntdll/unix/virtual.c | 67 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 5 deletions(-)
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index 14ea3b11143..9c754de0b6f 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -947,17 +947,74 @@ static BYTE get_page_vprot( const void *addr ) * get_vprot_range_size * * Return the size of the region with equal masked protection byte. + * base and size should be page aligned. * The function assumes that base and size are page aligned and * base + size does not wrap around. */ static SIZE_T get_vprot_range_size( BYTE *base, SIZE_T size, BYTE mask, BYTE *vprot ) { - BYTE *addr; +#define BYTES_IN_WORD sizeof(UINT64) + static const UINT_PTR index_align_mask = BYTES_IN_WORD - 1; + static const UINT64 word_from_byte = 0x101010101010101ull; + SIZE_T i, start_idx, end_idx, aligned_start_idx; + UINT64 vprot_word, mask_word, changed_word; + const BYTE *vprot_ptr; +#ifdef _WIN64 + size_t idx_page; +#endif + unsigned int j; + size_t idx; + + TRACE("base %p, size %p, mask %#x.\n", base, (void *)size, mask); + + start_idx = (size_t)base >> page_shift; + end_idx = start_idx + (size >> page_shift); + idx = start_idx; +#ifdef _WIN64 + end_idx = min( end_idx, pages_vprot_size << pages_vprot_shift ); + if (end_idx <= start_idx) + { + *vprot = 0; + return size; + } + idx_page = idx >> pages_vprot_shift; + idx &= pages_vprot_mask; + vprot_ptr = pages_vprot[idx_page]; +#else + vprot_ptr = pages_vprot; +#endif + + aligned_start_idx = (start_idx + index_align_mask) & ~index_align_mask; + if (aligned_start_idx > end_idx) aligned_start_idx = end_idx;
- *vprot = get_page_vprot( base ); - for (addr = base + page_size; addr != base + size; addr += page_size) - if ((*vprot ^ get_page_vprot( addr )) & mask) break; + /* Page count in zero level page table on x64 is at least the multiples of BYTES_IN_WORD + * so we don't have to worry about crossing the boundary on unaligned idx values. */ + *vprot = vprot_ptr[idx]; + + for (i = start_idx; i < aligned_start_idx; ++i) + if ((*vprot ^ vprot_ptr[idx++]) & mask) return (i - start_idx) << page_shift; + + vprot_word = word_from_byte * *vprot; + mask_word = word_from_byte * mask; + for (; i < end_idx; i += BYTES_IN_WORD) + { +#ifdef _WIN64 + if (idx >> pages_vprot_shift) + { + idx = 0; + vprot_ptr = pages_vprot[++idx_page]; + } +#endif + changed_word = (vprot_word ^ *(UINT64 *)(vprot_ptr + idx)) & mask_word; + if (changed_word) + { + for (j = 0; i < end_idx && !((BYTE *)&changed_word)[j]; ++j) ++i; + return (i - start_idx) << page_shift; + } + idx += BYTES_IN_WORD; + }
- return addr - base; + return *vprot & mask ? (end_idx - start_idx) << page_shift : size; +#undef BYTES_IN_WORD }
/***********************************************************************
Paul Gofman pgofman@codeweavers.com writes:
@@ -4098,7 +4117,7 @@ static NTSTATUS get_basic_memory_info( HANDLE process, LPCVOID addr, SIZE_T len, SIZE_T *res_len ) { struct file_view *view;
- char *base, *alloc_base = 0, *alloc_end = working_set_limit;
- BYTE *base, *alloc_base = 0, *alloc_end = working_set_limit; struct wine_rb_entry *ptr; sigset_t sigset;
@@ -4145,20 +4164,20 @@ static NTSTATUS get_basic_memory_info( HANDLE process, LPCVOID addr, while (ptr) { view = WINE_RB_ENTRY_VALUE( ptr, struct file_view, entry );
if ((char *)view->base > base)
if ((BYTE *)view->base > base) { alloc_end = view->base; ptr = ptr->left; }
else if ((char *)view->base + view->size <= base)
else if ((BYTE *)view->base + view->size <= base) {
alloc_base = (char *)view->base + view->size;
alloc_base = (BYTE *)view->base + view->size; ptr = ptr->right; } else { alloc_base = view->base;
alloc_end = (char *)view->base + view->size;
alloc_end = (BYTE *)view->base + view->size; break;
That sort of change doesn't seem necessary.
On 9/20/21 21:59, Alexandre Julliard wrote:
Paul Gofman pgofman@codeweavers.com writes:
@@ -4098,7 +4117,7 @@ static NTSTATUS get_basic_memory_info( HANDLE process, LPCVOID addr, SIZE_T len, SIZE_T *res_len ) { struct file_view *view;
- char *base, *alloc_base = 0, *alloc_end = working_set_limit;
- BYTE *base, *alloc_base = 0, *alloc_end = working_set_limit; struct wine_rb_entry *ptr; sigset_t sigset;
@@ -4145,20 +4164,20 @@ static NTSTATUS get_basic_memory_info( HANDLE process, LPCVOID addr, while (ptr) { view = WINE_RB_ENTRY_VALUE( ptr, struct file_view, entry );
if ((char *)view->base > base)
if ((BYTE *)view->base > base) { alloc_end = view->base; ptr = ptr->left; }
else if ((char *)view->base + view->size <= base)
else if ((BYTE *)view->base + view->size <= base) {
alloc_base = (char *)view->base + view->size;
alloc_base = (BYTE *)view->base + view->size; ptr = ptr->right; } else { alloc_base = view->base;
alloc_end = (char *)view->base + view->size;
alloc_end = (BYTE *)view->base + view->size; break;
That sort of change doesn't seem necessary.
Thanks, I will follow the existing path and use 'char *' in the new function then.