This MR adds support for creating file mapping objects backed by large pages on Linux, by making the following changes:
## wineserver
* On Linux, `create_temp_file` will first attempt to use memfds as the backing fd. If it fails, it'll return to the current codepath, creating a temporary file in either the server or config directory. * The created memfd will be sealed against writes, if the caller requesting the appropriate page protection flags. * This removes the requirement that FDs be only created on filesystems/directories that aren't `noexec`. * In the server method `create_mapping` , if large pages have been requested by the caller, hold that the calling thread's token holds `SeLockMemoryPrivilege` . * Additionally, add `SeLockMemoryPrivilege` to the list of privileges enabled for the Administrator.
## `ntdll`
* Add `virtual_get_min_large_page_size` and its exported wrapper `wine_unix_get_min_large_page_size`. * On Linux, the minimum page size is determined by going through `/sys/kernel/mm/hugepages`. If hugepage support was not detected, `STATUS_NOT_SUPPORTED` is returned instead. On other platforms, the older hard-coded value of 2*1024*1024 is returned instead. * `NtCreateSection` will validate certain parameters if large pages are requested. Specifically, it will return STATUS_INVALID_PARAMETER if the requested mapping is not anonymous/unnamed, or the size is not a multiple of the minimum supported page size.
## `kernelbase`
* `GetLargePageMinimum` will use `wine_unix_get_min_large_page_size`.
## `kernel32/tests`
* Add new test test_large_page_file_mapping, which validates privilege enforcements and parameter validation while creating large pages backed file mapping obejcts. The tests are skipped if `GetLargePageMinimum` returns 0.
-- v16: kernel32: Add tests for large page mapping support. ntdll: Validate parameters to NtCreateSection if large pages are requested. server: Use memfd to back anonymous mappings on Linux. kernelbase: Implement GetLargePageMinimum by returning the value of LargePageMinimum in _KUSER_SHARED_DATA. server: Set LargePageMinimum in _KUSER_SHARED_DATA on Linux. server: Require SeLockMemoryPrivilege to create large page mappings.
From: Vibhav Pant vibhavp@gmail.com
Also, add SeLockMemoryPrivilege to the list of admin privileges. --- server/mapping.c | 6 ++++++ server/security.h | 1 + server/token.c | 2 ++ 3 files changed, 9 insertions(+)
diff --git a/server/mapping.c b/server/mapping.c index ff99b45ce51..a6f7d3e951e 100644 --- a/server/mapping.c +++ b/server/mapping.c @@ -1284,6 +1284,12 @@ DECL_HANDLER(create_mapping)
if (!objattr) return;
+ if ((req->flags & SEC_LARGE_PAGES) && !thread_single_check_privilege( current, SeLockMemoryPrivilege )) + { + set_error( STATUS_PRIVILEGE_NOT_HELD ); + return; + } + if ((mapping = create_mapping( root, &name, objattr->attributes, req->size, req->flags, req->file_handle, req->file_access, sd ))) { diff --git a/server/security.h b/server/security.h index f4dff679179..7a135e90928 100644 --- a/server/security.h +++ b/server/security.h @@ -23,6 +23,7 @@
#include <sys/types.h>
+extern const struct luid SeLockMemoryPrivilege; extern const struct luid SeIncreaseQuotaPrivilege; extern const struct luid SeSecurityPrivilege; extern const struct luid SeTakeOwnershipPrivilege; diff --git a/server/token.c b/server/token.c index da7f0bb7ff2..b1fd23af492 100644 --- a/server/token.c +++ b/server/token.c @@ -42,6 +42,7 @@
#define MAX_SUBAUTH_COUNT 1
+const struct luid SeLockMemoryPrivilege = { 4, 0}; const struct luid SeIncreaseQuotaPrivilege = { 5, 0 }; const struct luid SeTcbPrivilege = { 7, 0 }; const struct luid SeSecurityPrivilege = { 8, 0 }; @@ -782,6 +783,7 @@ struct token *token_create_admin( unsigned primary, int impersonation_level, int { SeManageVolumePrivilege, 0 }, { SeImpersonatePrivilege, SE_PRIVILEGE_ENABLED }, { SeCreateGlobalPrivilege, SE_PRIVILEGE_ENABLED }, + { SeLockMemoryPrivilege, SE_PRIVILEGE_ENABLED }, }; /* note: we don't include non-builtin groups here for the user - * telling us these is the job of a client-side program */
From: Vibhav Pant vibhavp@gmail.com
On Linux, get the minimum supported hugepage size from directory entries /sys/kernel/mm/hugepages, and set the LargePageMinimum field in user_shared_data to it, if supported. --- server/mapping.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+)
diff --git a/server/mapping.c b/server/mapping.c index a6f7d3e951e..04cd60a1d01 100644 --- a/server/mapping.c +++ b/server/mapping.c @@ -29,6 +29,14 @@ #include <sys/stat.h> #include <sys/mman.h> #include <unistd.h> +#ifdef HAVE_MEMFD_CREATE +#include <linux/memfd.h> +#endif +#ifdef __linux__ /* For detecting hugepage support. */ +#include <dirent.h> +#include <errno.h> +#include <limits.h> +#endif
#include "ntstatus.h" #define WIN32_NO_STATUS @@ -1256,6 +1264,66 @@ int get_page_size(void) return page_mask + 1; }
+#ifdef __linux__ +static size_t linux_get_min_hugepage_size( void ) +{ + DIR *sysfs_hugepages; + struct dirent *supported_size; + size_t min_size = 0; + size_t total_supported_sizes = 0; + + errno = 0; + sysfs_hugepages = opendir("/sys/kernel/mm/hugepages"); + if (sysfs_hugepages == NULL) + { + return -1; + } + + while(1) + { + long hugepage_size; + + supported_size = readdir( sysfs_hugepages ); + if ( supported_size == NULL ) + { + break; + } + if (strncmp( supported_size->d_name, "hugepages-", 10) != 0) + { + continue; + } + errno = 0; + hugepage_size = strtol( &supported_size->d_name[10], NULL, 10 ); + if (hugepage_size == 0 || hugepage_size == LONG_MAX || hugepage_size == LONG_MIN) + { + if (errno != 0) + { + fprintf( stderr, + "could not parse page size from directory entry '%s': %s\n", + supported_size->d_name, strerror( errno ) ); + } + continue; + } + hugepage_size *= 1024; + min_size = ( total_supported_sizes == 0 ) + ? hugepage_size + : ( hugepage_size < min_size ? hugepage_size : min_size ); + total_supported_sizes++; + } + + closedir(sysfs_hugepages); + return min_size; +} +#endif /* __linux__ */ + +static size_t get_min_large_page_size( void ) +{ +#ifdef __linux__ + return linux_get_min_hugepage_size(); +#endif + return 0; +} + struct object *create_user_data_mapping( struct object *root, const struct unicode_str *name, unsigned int attr, const struct security_descriptor *sd ) { @@ -1267,8 +1335,15 @@ struct object *create_user_data_mapping( struct object *root, const struct unico ptr = mmap( NULL, mapping->size, PROT_WRITE, MAP_SHARED, get_unix_fd( mapping->fd ), 0 ); if (ptr != MAP_FAILED) { + ULONG min_large_page_size; + user_shared_data = ptr; user_shared_data->SystemCall = 1; + min_large_page_size = get_min_large_page_size(); + if (min_large_page_size != 0) + { + user_shared_data->LargePageMinimum = min_large_page_size; + } } return &mapping->obj; }
From: Vibhav Pant vibhavp@gmail.com
--- dlls/kernelbase/memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/dlls/kernelbase/memory.c b/dlls/kernelbase/memory.c index 4f4bba9a13b..316aec7d40a 100644 --- a/dlls/kernelbase/memory.c +++ b/dlls/kernelbase/memory.c @@ -41,6 +41,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(heap); WINE_DECLARE_DEBUG_CHANNEL(virtual); WINE_DECLARE_DEBUG_CHANNEL(globalmem);
+static const struct _KUSER_SHARED_DATA *user_shared_data = (struct _KUSER_SHARED_DATA *)0x7ffe0000;
/*********************************************************************** * Virtual memory functions @@ -77,7 +78,7 @@ BOOL WINAPI DECLSPEC_HOTPATCH FlushViewOfFile( const void *base, SIZE_T size ) */ SIZE_T WINAPI GetLargePageMinimum(void) { - return 2 * 1024 * 1024; + return user_shared_data->LargePageMinimum; }
From: Vibhav Pant vibhavp@gmail.com
On Linux, try creating a memfd first to implement create_temp_file. If a large page mapping is requested (SEC_LARGE_PAGES), create a memfd with the MFD_HUGETLB flags. If SEC_COMMIT is passed, allocate sufficient space to back the mapping with posix_fallocate.
Additionally, ensure when SEC_LARGE_PAGES is requested, ensure that SEC_COMMIT is passed as well. --- configure | 12 +++++ configure.ac | 2 + include/config.h.in | 6 +++ server/mapping.c | 118 ++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 134 insertions(+), 4 deletions(-)
diff --git a/configure b/configure index 101981ad8df..d9d4a51c054 100755 --- a/configure +++ b/configure @@ -7995,6 +7995,12 @@ if test "x$ac_cv_header_linux_major_h" = xyes then : printf "%s\n" "#define HAVE_LINUX_MAJOR_H 1" >>confdefs.h
+fi +ac_fn_c_check_header_compile "$LINENO" "linux/memfd.h" "ac_cv_header_linux_memfd_h" "$ac_includes_default" +if test "x$ac_cv_header_linux_memfd_h" = xyes +then : + printf "%s\n" "#define HAVE_LINUX_MEMFD_H 1" >>confdefs.h + fi ac_fn_c_check_header_compile "$LINENO" "linux/param.h" "ac_cv_header_linux_param_h" "$ac_includes_default" if test "x$ac_cv_header_linux_param_h" = xyes @@ -20787,6 +20793,12 @@ if test "x$ac_cv_func_mach_continuous_time" = xyes then : printf "%s\n" "#define HAVE_MACH_CONTINUOUS_TIME 1" >>confdefs.h
+fi +ac_fn_c_check_func "$LINENO" "memfd_create" "ac_cv_func_memfd_create" +if test "x$ac_cv_func_memfd_create" = xyes +then : + printf "%s\n" "#define HAVE_MEMFD_CREATE 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "pipe2" "ac_cv_func_pipe2" if test "x$ac_cv_func_pipe2" = xyes diff --git a/configure.ac b/configure.ac index 231ec7f17d6..e63c97900b2 100644 --- a/configure.ac +++ b/configure.ac @@ -384,6 +384,7 @@ AC_CHECK_HEADERS(\ linux/input.h \ linux/ioctl.h \ linux/major.h \ + linux/memfd.h \ linux/param.h \ linux/serial.h \ linux/types.h \ @@ -2079,6 +2080,7 @@ AC_CHECK_FUNCS(\ getrandom \ kqueue \ mach_continuous_time \ + memfd_create \ pipe2 \ port_create \ posix_fadvise \ diff --git a/include/config.h.in b/include/config.h.in index ef783c75a42..923d943de01 100644 --- a/include/config.h.in +++ b/include/config.h.in @@ -180,6 +180,9 @@ /* Define to 1 if you have the <linux/major.h> header file. */ #undef HAVE_LINUX_MAJOR_H
+/* Define to 1 if you have the <linux/memfd.h> header file. */ +#undef HAVE_LINUX_MEMFD_H + /* Define to 1 if you have the <linux/param.h> header file. */ #undef HAVE_LINUX_PARAM_H
@@ -219,6 +222,9 @@ /* Define to 1 if you have the <mach-o/loader.h> header file. */ #undef HAVE_MACH_O_LOADER_H
+/* Define to 1 if you have the 'memfd_create' function. */ +#undef HAVE_MEMFD_CREATE + /* Define to 1 if you have the <mntent.h> header file. */ #undef HAVE_MNTENT_H
diff --git a/server/mapping.c b/server/mapping.c index 04cd60a1d01..1cf018db629 100644 --- a/server/mapping.c +++ b/server/mapping.c @@ -325,13 +325,117 @@ static int check_current_dir_for_exec(void) return (ret != MAP_FAILED); }
+#ifdef HAVE_MEMFD_CREATE +static void make_memfd_name( const char *prefix, ULONG sec_flags, char *dest ) +{ + /* memfd names are restricted to 250 bytes including \0, but are only meant for debugging, with + * duplicate names not causing any side-effects. */ + + if (prefix != NULL) + { + snprintf( dest, 250, "map-%s-%x", prefix, (int)sec_flags ); + } + else + { + snprintf( dest, 250, "map-%x", (int)sec_flags ); + } +} + +static int create_memfd( ULONG file_access, ULONG sec_flags, file_pos_t new_size, int *err ) +{ + int fd; + off_t size = new_size; + char memfd_name[256]; + unsigned int memfd_flags = MFD_ALLOW_SEALING; + unsigned int seal_flags = F_SEAL_SEAL | F_SEAL_WRITE; + +#ifdef HAVE_POSIX_FALLOCATE + if (sizeof(file_pos_t) > sizeof(off_t) && size != new_size) + { + *err = EINVAL; + return -1; + } +#endif +#ifdef MFD_EXEC + memfd_flags |= MFD_EXEC; +#endif + + make_memfd_name( NULL, sec_flags, memfd_name ); + + if (sec_flags & SEC_LARGE_PAGES) + { + memfd_flags |= MFD_HUGETLB; + } + fd = memfd_create( memfd_name, memfd_flags ); + if (fd == -1) + { + if (errno == EINVAL && (sec_flags & SEC_LARGE_PAGES)) + { + /* MFD_HUGETLB & MFD_ALLOW_SEALING is only available in Linux >= 4.16. Lets try creating + * one without HUGETLB */ + fd = memfd_create( memfd_name, memfd_flags & ~MFD_HUGETLB ); + if (fd == -1) + { + *err = errno; + return -1; + } + } + else + { + *err = errno; + return -1; + } + } +#ifdef HAVE_POSIX_FALLOCATE + if (sec_flags & SEC_COMMIT) + { + *err = posix_fallocate( fd, 0, size ); + if (*err != 0) + { + close( fd ); + return -1; + } + } +#endif + if (ftruncate( fd, size ) == -1) + { + *err = errno; + close( fd ); + return -1; + } + if (file_access & FILE_WRITE_DATA) + { + seal_flags &= ~F_SEAL_WRITE; + } + + if (fcntl( fd, F_ADD_SEALS, F_SEAL_SEAL, seal_flags ) == -1) + { + *err = errno; + close( fd ); + return -1; + } + return fd; +} +#endif /* defined(HAVE_MEMFD_CREATE) */ + /* create a temp file for anonymous mappings */ -static int create_temp_file( file_pos_t size ) +static int create_temp_file( ULONG file_access, ULONG sec_flags, file_pos_t size ) { static int temp_dir_fd = -1; char tmpfn[16]; int fd;
+#ifdef HAVE_MEMFD_CREATE + int err = 0; + + fd = create_memfd( file_access, sec_flags, size, &err); + if (fd != -1) + { + return fd; + } + +#endif + if (temp_dir_fd == -1) { temp_dir_fd = server_dir_fd; @@ -616,7 +720,7 @@ static int build_shared_mapping( struct mapping *mapping, int fd,
/* create a temp file for the mapping */
- if ((shared_fd = create_temp_file( total_size )) == -1) return 0; + if ((shared_fd = create_temp_file( FILE_WRITE_DATA, 0, total_size )) == -1) return 0; if (!(file = create_file_for_fd( shared_fd, FILE_GENERIC_READ|FILE_GENERIC_WRITE, 0 ))) return 0;
if (!(buffer = malloc( max_size ))) goto error; @@ -948,7 +1052,12 @@ static struct ranges *create_ranges(void)
static unsigned int get_mapping_flags( obj_handle_t handle, unsigned int flags ) { - switch (flags & (SEC_IMAGE | SEC_RESERVE | SEC_COMMIT | SEC_FILE)) + if (flags & SEC_LARGE_PAGES) + { + if (!(flags & SEC_COMMIT)) + goto error; + } + switch (flags & (SEC_IMAGE | SEC_RESERVE | SEC_COMMIT | SEC_FILE )) { case SEC_IMAGE: if (flags & (SEC_WRITECOMBINE | SEC_LARGE_PAGES)) break; @@ -963,6 +1072,7 @@ static unsigned int get_mapping_flags( obj_handle_t handle, unsigned int flags ) if (handle) return SEC_FILE | (flags & (SEC_NOCACHE | SEC_WRITECOMBINE)); return flags; } +error: set_error( STATUS_INVALID_PARAMETER ); return 0; } @@ -1052,7 +1162,7 @@ static struct mapping *create_mapping( struct object *root, const struct unicode } if ((flags & SEC_RESERVE) && !(mapping->committed = create_ranges())) goto error; mapping->size = (mapping->size + page_mask) & ~((mem_size_t)page_mask); - if ((unix_fd = create_temp_file( mapping->size )) == -1) goto error; + if ((unix_fd = create_temp_file( file_access, flags, mapping->size )) == -1) goto error; if (!(mapping->fd = create_anonymous_fd( &mapping_fd_ops, unix_fd, &mapping->obj, FILE_SYNCHRONOUS_IO_NONALERT ))) goto error; allow_fd_caching( mapping->fd );
From: Vibhav Pant vibhavp@gmail.com
If large pages are requested (SEC_LARGE_PAGES), return STATUS_INVALID_PARAMETER if: * If the backing file object (file) is not NULL. * The maximum size of the section is NULL. * Large pages are not supported by the host platform. * Large pages are supported, but the requested size is not a multiple of the minimum large page size. --- dlls/ntdll/unix/sync.c | 13 +++++++++++++ 1 file changed, 13 insertions(+)
diff --git a/dlls/ntdll/unix/sync.c b/dlls/ntdll/unix/sync.c index 4b2d7c1ccbc..81931931d9c 100644 --- a/dlls/ntdll/unix/sync.c +++ b/dlls/ntdll/unix/sync.c @@ -2058,6 +2058,19 @@ NTSTATUS WINAPI NtCreateSection( HANDLE *handle, ACCESS_MASK access, const OBJEC
*handle = 0;
+ if (sec_flags & SEC_LARGE_PAGES) + { + SIZE_T min_size = user_shared_data->LargePageMinimum; + if (file != NULL || size == NULL) + { + return STATUS_INVALID_PARAMETER; + } + + if (min_size == 0 || size->QuadPart % min_size != 0) + { + return STATUS_INVALID_PARAMETER; + } + } switch (protect & 0xff) { case PAGE_READONLY:
From: Vibhav Pant vibhavp@gmail.com
--- dlls/kernel32/tests/process.c | 73 +++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+)
diff --git a/dlls/kernel32/tests/process.c b/dlls/kernel32/tests/process.c index 8e168622c17..2521c52a7b1 100644 --- a/dlls/kernel32/tests/process.c +++ b/dlls/kernel32/tests/process.c @@ -82,6 +82,7 @@ static BOOL (WINAPI *pGetNumaProcessorNode)(UCHAR, PUCHAR); static NTSTATUS (WINAPI *pNtQueryInformationProcess)(HANDLE, PROCESSINFOCLASS, PVOID, ULONG, PULONG); static NTSTATUS (WINAPI *pNtQueryInformationThread)(HANDLE, THREADINFOCLASS, PVOID, ULONG, PULONG); static NTSTATUS (WINAPI *pNtQuerySystemInformationEx)(SYSTEM_INFORMATION_CLASS, void*, ULONG, void*, ULONG, ULONG*); +static NTSTATUS (WINAPI *pRtlAdjustPrivilege)(ULONG,BOOLEAN,BOOLEAN,PBOOLEAN); static DWORD (WINAPI *pWTSGetActiveConsoleSessionId)(void); static HANDLE (WINAPI *pCreateToolhelp32Snapshot)(DWORD, DWORD); static BOOL (WINAPI *pProcess32First)(HANDLE, PROCESSENTRY32*); @@ -252,6 +253,7 @@ static BOOL init(void) pNtQueryInformationProcess = (void *)GetProcAddress(hntdll, "NtQueryInformationProcess"); pNtQueryInformationThread = (void *)GetProcAddress(hntdll, "NtQueryInformationThread"); pNtQuerySystemInformationEx = (void *)GetProcAddress(hntdll, "NtQuerySystemInformationEx"); + pRtlAdjustPrivilege = (void *)GetProcAddress(hntdll, "RtlAdjustPrivilege");
pGetNativeSystemInfo = (void *) GetProcAddress(hkernel32, "GetNativeSystemInfo"); pGetSystemRegistryQuota = (void *) GetProcAddress(hkernel32, "GetSystemRegistryQuota"); @@ -4347,6 +4349,76 @@ static void test_largepages(void) ok((size == 0) || (size == 2*1024*1024) || (size == 4*1024*1024), "GetLargePageMinimum reports %Id size\n", size); }
+static void test_large_page_file_mapping( void ) +{ + SIZE_T size; + BOOLEAN enabled; + NTSTATUS status; + DWORD err; + HANDLE file; + HANDLE token; + HANDLE process_token; + + if (!pGetLargePageMinimum) + { + win_skip( "No GetLargePageMinimum support.\n" ); + return; + } + if (!pRtlAdjustPrivilege) + { + win_skip( "No RtlAdjustPrivilege support.\n" ); + return; + } + + size = pGetLargePageMinimum(); + if (size == 0) + { + trace( "No large pages support, skipping test.\n" ); + return; + } + ok( OpenProcessToken( GetCurrentProcess(), TOKEN_DUPLICATE | TOKEN_QUERY, &process_token ), + "OpenProcessToken failed (%ld)\n", GetLastError() ); + ok( DuplicateToken( process_token, SecurityImpersonation, &token ), + "DuplicateToken failed (%ld)\n", GetLastError() ); + ok( ImpersonateLoggedOnUser( token ), "ImpersonateLoggedOnUser failed (%ld)\n", + GetLastError() ); + status = pRtlAdjustPrivilege( SE_LOCK_MEMORY_PRIVILEGE, TRUE, TRUE, &enabled ); + if (status != STATUS_SUCCESS) + { + trace( "Couldn't get SE_LOCK_MEMORY_PRIVILEGE (%ld), skipping large page file " + "mapping test.\n", + status ); + } + else + { + file = + CreateFileMappingW( INVALID_HANDLE_VALUE, NULL, + PAGE_READWRITE | SEC_LARGE_PAGES | SEC_COMMIT, 0, size - 1, NULL ); + err = GetLastError(); + ok( file == NULL && err == ERROR_INVALID_PARAMETER, + "CreateFileMappingW should fail with ERROR_INVALID_PARAMETER (got %ld instead)\n", + err ); + if (file != NULL) CloseHandle( file ); + + file = + CreateFileMappingW( INVALID_HANDLE_VALUE, NULL, + PAGE_READWRITE | SEC_LARGE_PAGES | SEC_COMMIT, 0, size * 2, NULL ); + ok( file != NULL, "CreateFileMappingW failed (%ld)\n", GetLastError() ); + if (file != NULL) CloseHandle( file ); + + file = CreateFileMappingW( INVALID_HANDLE_VALUE, NULL, + PAGE_READWRITE | SEC_LARGE_PAGES | SEC_RESERVE, 0, size, NULL ); + err = GetLastError(); + ok( file == NULL && err == ERROR_INVALID_PARAMETER, + "CreateFileMappingW should have failed with ERROR_INVALID_PARAMETER (got %ld " + "instead)\n", + err ); + if (file != NULL) CloseHandle( file ); + } + + ok( RevertToSelf(), "RevertToSelf failed (%ld)\n", GetLastError() ); +} + struct proc_thread_attr { DWORD_PTR attr; @@ -5668,6 +5740,7 @@ START_TEST(process) test_GetLogicalProcessorInformationEx(); test_GetSystemCpuSetInformation(); test_largepages(); + test_large_page_file_mapping(); test_ProcThreadAttributeList(); test_SuspendProcessState(); test_SuspendProcessNewThread();