Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/ntdll/tests/time.c | 78 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+)
diff --git a/dlls/ntdll/tests/time.c b/dlls/ntdll/tests/time.c index 5382a952d8d8..fe71d4458c0f 100644 --- a/dlls/ntdll/tests/time.c +++ b/dlls/ntdll/tests/time.c @@ -18,7 +18,9 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */
+#define NONAMELESSUNION #include "ntdll_test.h" +#include "ddk/wdm.h"
#define TICKSPERSEC 10000000 #define TICKSPERMSEC 10000 @@ -29,6 +31,7 @@ static VOID (WINAPI *pRtlTimeFieldsToTime)( PTIME_FIELDS TimeFields, PLARGE_IN static NTSTATUS (WINAPI *pNtQueryPerformanceCounter)( LARGE_INTEGER *counter, LARGE_INTEGER *frequency ); static NTSTATUS (WINAPI *pRtlQueryTimeZoneInformation)( RTL_TIME_ZONE_INFORMATION *); static NTSTATUS (WINAPI *pRtlQueryDynamicTimeZoneInformation)( RTL_DYNAMIC_TIME_ZONE_INFORMATION *); +static BOOL (WINAPI *pRtlQueryUnbiasedInterruptTime)( ULONGLONG *time );
static const int MonthLengths[2][12] = { @@ -153,6 +156,79 @@ static void test_RtlQueryTimeZoneInformation(void) wine_dbgstr_w(tzinfo.DaylightName)); }
+static ULONGLONG read_ksystem_time(volatile KSYSTEM_TIME *time) +{ + ULONGLONG high, low; + do + { + high = time->High1Time; + low = time->LowPart; + } + while (high != time->High2Time); + return high << 32 | low; +} + +static void test_user_shared_data_time(void) +{ + KSHARED_USER_DATA *user_shared_data = (void *)0x7ffe0000; + ULONGLONG t1, t2, t3; + int i = 0; + + i = 0; + do + { + t1 = GetTickCount(); + if (user_shared_data->NtMajorVersion <= 5 && user_shared_data->NtMinorVersion <= 1) + t2 = ((ULONG64)user_shared_data->TickCountLowDeprecated * user_shared_data->TickCountMultiplier) >> 24; + else + t2 = (read_ksystem_time(&user_shared_data->u.TickCount) * user_shared_data->TickCountMultiplier) >> 24; + t3 = GetTickCount(); + } while(t3 < t1 && i++ < 1); /* allow for wrap, but only once */ + + ok(t1 <= t2, "USD TickCount / GetTickCount are out of order: %s %s\n", + wine_dbgstr_longlong(t1), wine_dbgstr_longlong(t2)); + todo_wine + ok(t2 <= t3, "USD TickCount / GetTickCount are out of order: %s %s\n", + wine_dbgstr_longlong(t2), wine_dbgstr_longlong(t3)); + + i = 0; + do + { + LARGE_INTEGER system_time; + NtQuerySystemTime(&system_time); + t1 = system_time.QuadPart; + t2 = read_ksystem_time(&user_shared_data->SystemTime); + NtQuerySystemTime(&system_time); + t3 = system_time.QuadPart; + } while(t3 < t1 && i++ < 1); /* allow for wrap, but only once */ + + todo_wine + ok(t1 <= t2, "USD SystemTime / NtQuerySystemTime are out of order %s %s\n", + wine_dbgstr_longlong(t1), wine_dbgstr_longlong(t2)); + ok(t2 <= t3, "USD SystemTime / NtQuerySystemTime are out of order %s %s\n", + wine_dbgstr_longlong(t2), wine_dbgstr_longlong(t3)); + + if (!pRtlQueryUnbiasedInterruptTime) + win_skip("skipping RtlQueryUnbiasedInterruptTime tests\n"); + else + { + i = 0; + do + { + pRtlQueryUnbiasedInterruptTime(&t1); + t2 = read_ksystem_time(&user_shared_data->InterruptTime); + pRtlQueryUnbiasedInterruptTime(&t3); + } while(t3 < t1 && i++ < 1); /* allow for wrap, but only once */ + + todo_wine + ok(t1 <= t2, "USD InterruptTime / RtlQueryUnbiasedInterruptTime are out of order %s %s\n", + wine_dbgstr_longlong(t1), wine_dbgstr_longlong(t2)); + ok(t2 <= t3 || broken(t2 == t3 + 82410089070) /* w864 has some weird offset on testbot */, + "USD InterruptTime / RtlQueryUnbiasedInterruptTime are out of order %s %s\n", + wine_dbgstr_longlong(t2), wine_dbgstr_longlong(t3)); + } +} + START_TEST(time) { HMODULE mod = GetModuleHandleA("ntdll.dll"); @@ -163,6 +239,7 @@ START_TEST(time) (void *)GetProcAddress(mod, "RtlQueryTimeZoneInformation"); pRtlQueryDynamicTimeZoneInformation = (void *)GetProcAddress(mod, "RtlQueryDynamicTimeZoneInformation"); + pRtlQueryUnbiasedInterruptTime = (void *)GetProcAddress(mod, "RtlQueryUnbiasedInterruptTime");
if (pRtlTimeToTimeFields && pRtlTimeFieldsToTime) test_pRtlTimeToTimeFields(); @@ -170,4 +247,5 @@ START_TEST(time) win_skip("Required time conversion functions are not available\n"); test_NtQueryPerformanceCounter(); test_RtlQueryTimeZoneInformation(); + test_user_shared_data_time(); }
The USD page is created when the first process (wineboot.exe) completes its creation, using its provided user_shared_data for initialization.
The server maps the page write-only and the clients map it read-only, then the server updates the timestamps every 16 ms.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=29168 Signed-off-by: Rémi Bernon rbernon@codeweavers.com ---
The tests todo_wine cannot be completely removed as the read may have missed a server timestamp update whereas the time functions are still directly calling native clocks to get their values.
v5: A standard mapping object is created and a handle is returned instead of directly using the unix fd.
The section is mapped with mmap instead of standard functions for multiple reasons:
* NtQueryVirtualMemory returns MEM_PRIVATE on Windows, using standard section mapping will return MEM_MAPPED instead.
* NtMapViewOfSection requires the view to be unmapped first with NtFreeVirtualMemory, this could create a race condition if a signal is received between the two calls. On the contrary, mmap with MAP_FIXED doesn't need the memory to be unmapped first.
dlls/ntdll/ntdll_misc.h | 1 + dlls/ntdll/server.c | 20 ++++++++++ dlls/ntdll/tests/time.c | 9 +++-- dlls/ntdll/thread.c | 2 + dlls/ntoskrnl.exe/instr.c | 12 ------ server/file.h | 2 + server/mapping.c | 84 +++++++++++++++++++++++++++++++++++++++ server/process.c | 1 + server/protocol.def | 2 + 9 files changed, 118 insertions(+), 15 deletions(-)
diff --git a/dlls/ntdll/ntdll_misc.h b/dlls/ntdll/ntdll_misc.h index 13771d230a88..ecce05df93c0 100644 --- a/dlls/ntdll/ntdll_misc.h +++ b/dlls/ntdll/ntdll_misc.h @@ -221,6 +221,7 @@ extern void virtual_set_large_address_space(void) DECLSPEC_HIDDEN; extern void virtual_fill_image_information( const pe_image_info_t *pe_info, SECTION_IMAGE_INFORMATION *info ) DECLSPEC_HIDDEN; extern struct _KUSER_SHARED_DATA *user_shared_data DECLSPEC_HIDDEN; +extern size_t user_shared_data_size DECLSPEC_HIDDEN;
/* completion */ extern NTSTATUS NTDLL_AddCompletion( HANDLE hFile, ULONG_PTR CompletionValue, diff --git a/dlls/ntdll/server.c b/dlls/ntdll/server.c index 41e6ea891c56..d81d413c90a4 100644 --- a/dlls/ntdll/server.c +++ b/dlls/ntdll/server.c @@ -91,6 +91,7 @@ #include "wine/server.h" #include "wine/debug.h" #include "ntdll_misc.h" +#include "ddk/wdm.h"
WINE_DEFAULT_DEBUG_CHANNEL(server);
@@ -1863,7 +1864,13 @@ void server_init_process_done(void) IMAGE_NT_HEADERS *nt = RtlImageNtHeader( peb->ImageBaseAddress ); void *entry = (char *)peb->ImageBaseAddress + nt->OptionalHeader.AddressOfEntryPoint; NTSTATUS status; + HANDLE usd_handle; int suspend; + SIZE_T size = user_shared_data_size; + void *addr = user_shared_data; + ULONG old_prot; + + NtProtectVirtualMemory( NtCurrentProcess(), &addr, &size, PAGE_READONLY, &old_prot );
#ifdef __APPLE__ send_server_task_port(); @@ -1886,11 +1893,24 @@ void server_init_process_done(void) #endif req->entry = wine_server_client_ptr( entry ); req->gui = (nt->OptionalHeader.Subsystem != IMAGE_SUBSYSTEM_WINDOWS_CUI); + wine_server_add_data( req, user_shared_data, sizeof(*user_shared_data) ); status = wine_server_call( req ); suspend = reply->suspend; + usd_handle = wine_server_ptr_handle( reply->usd_handle ); } SERVER_END_REQ;
+ if (usd_handle) + { + int res, usd_fd, needs_close; + if ((res = server_get_unix_fd( usd_handle, 0, &usd_fd, &needs_close, NULL, NULL )) || + (user_shared_data != mmap( user_shared_data, user_shared_data_size, + PROT_READ, MAP_SHARED | MAP_FIXED, usd_fd, 0 ))) + fatal_error( "failed to remap the process user shared data\n" ); + if (needs_close) close( usd_fd ); + NtClose( usd_handle ); + } + assert( !status ); signal_start_process( entry, suspend ); } diff --git a/dlls/ntdll/tests/time.c b/dlls/ntdll/tests/time.c index fe71d4458c0f..7f81c20f21ec 100644 --- a/dlls/ntdll/tests/time.c +++ b/dlls/ntdll/tests/time.c @@ -185,9 +185,10 @@ static void test_user_shared_data_time(void) t3 = GetTickCount(); } while(t3 < t1 && i++ < 1); /* allow for wrap, but only once */
+ /* FIXME: not always in order, but should be close */ + todo_wine_if(t1 > t2 && t1 - t2 < 50) ok(t1 <= t2, "USD TickCount / GetTickCount are out of order: %s %s\n", wine_dbgstr_longlong(t1), wine_dbgstr_longlong(t2)); - todo_wine ok(t2 <= t3, "USD TickCount / GetTickCount are out of order: %s %s\n", wine_dbgstr_longlong(t2), wine_dbgstr_longlong(t3));
@@ -202,7 +203,8 @@ static void test_user_shared_data_time(void) t3 = system_time.QuadPart; } while(t3 < t1 && i++ < 1); /* allow for wrap, but only once */
- todo_wine + /* FIXME: not always in order, but should be close */ + todo_wine_if(t1 > t2 && t1 - t2 < 50 * TICKSPERMSEC) ok(t1 <= t2, "USD SystemTime / NtQuerySystemTime are out of order %s %s\n", wine_dbgstr_longlong(t1), wine_dbgstr_longlong(t2)); ok(t2 <= t3, "USD SystemTime / NtQuerySystemTime are out of order %s %s\n", @@ -220,7 +222,8 @@ static void test_user_shared_data_time(void) pRtlQueryUnbiasedInterruptTime(&t3); } while(t3 < t1 && i++ < 1); /* allow for wrap, but only once */
- todo_wine + /* FIXME: not always in order, but should be close */ + todo_wine_if(t1 > t2 && t1 - t2 < 50 * TICKSPERMSEC) ok(t1 <= t2, "USD InterruptTime / RtlQueryUnbiasedInterruptTime are out of order %s %s\n", wine_dbgstr_longlong(t1), wine_dbgstr_longlong(t2)); ok(t2 <= t3 || broken(t2 == t3 + 82410089070) /* w864 has some weird offset on testbot */, diff --git a/dlls/ntdll/thread.c b/dlls/ntdll/thread.c index b25f87e43755..6e5428bf0f06 100644 --- a/dlls/ntdll/thread.c +++ b/dlls/ntdll/thread.c @@ -56,6 +56,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(thread); #endif
struct _KUSER_SHARED_DATA *user_shared_data = NULL; +size_t user_shared_data_size = 0; static const WCHAR default_windirW[] = {'C',':','\','w','i','n','d','o','w','s',0};
void (WINAPI *kernel32_start_process)(LPTHREAD_START_ROUTINE,void*) = NULL; @@ -244,6 +245,7 @@ TEB *thread_init(void) exit(1); } user_shared_data = addr; + user_shared_data_size = size; memcpy( user_shared_data->NtSystemRoot, default_windirW, sizeof(default_windirW) );
/* allocate and initialize the PEB and initial TEB */ diff --git a/dlls/ntoskrnl.exe/instr.c b/dlls/ntoskrnl.exe/instr.c index 77803f07d725..0973b3a80a07 100644 --- a/dlls/ntoskrnl.exe/instr.c +++ b/dlls/ntoskrnl.exe/instr.c @@ -593,15 +593,6 @@ static void fake_syscall_function(void) }
-static void update_shared_data(void) -{ - struct _KUSER_SHARED_DATA *shared_data = (struct _KUSER_SHARED_DATA *)wine_user_shared_data; - - shared_data->u.TickCountQuad = GetTickCount64(); - shared_data->u.TickCount.High2Time = shared_data->u.TickCount.High1Time; -} - - /*********************************************************************** * emulate_instruction * @@ -802,7 +793,6 @@ static DWORD emulate_instruction( EXCEPTION_RECORD *rec, CONTEXT *context ) if (offset <= sizeof(KSHARED_USER_DATA) - data_size) { ULONGLONG temp = 0; - update_shared_data(); memcpy( &temp, wine_user_shared_data + offset, data_size ); store_reg_word( context, instr[2], (BYTE *)&temp, long_op, rex ); context->Rip += prefixlen + len + 2; @@ -823,7 +813,6 @@ static DWORD emulate_instruction( EXCEPTION_RECORD *rec, CONTEXT *context )
if (offset <= sizeof(KSHARED_USER_DATA) - data_size) { - update_shared_data(); switch (*instr) { case 0x8a: store_reg_byte( context, instr[1], wine_user_shared_data + offset, rex ); break; @@ -845,7 +834,6 @@ static DWORD emulate_instruction( EXCEPTION_RECORD *rec, CONTEXT *context )
if (offset <= sizeof(KSHARED_USER_DATA) - data_size) { - update_shared_data(); memcpy( &context->Rax, wine_user_shared_data + offset, data_size ); context->Rip += prefixlen + len + 1; return ExceptionContinueExecution; diff --git a/server/file.h b/server/file.h index 7395814dadd2..269c8bed79f6 100644 --- a/server/file.h +++ b/server/file.h @@ -173,6 +173,8 @@ extern struct file *get_mapping_file( struct process *process, client_ptr_t base extern void free_mapped_views( struct process *process ); extern int get_page_size(void);
+extern obj_handle_t get_usd_handle( const void *usd_init, data_size_t usd_size ); + /* device functions */
extern struct object *create_named_pipe_device( struct object *root, const struct unicode_str *name ); diff --git a/server/mapping.c b/server/mapping.c index 6990a1913d76..8cd2260feae1 100644 --- a/server/mapping.c +++ b/server/mapping.c @@ -35,6 +35,7 @@ #define WIN32_NO_STATUS #include "windef.h" #include "winternl.h" +#include "ddk/wdm.h"
#include "file.h" #include "handle.h" @@ -943,6 +944,89 @@ int get_page_size(void) return page_mask + 1; }
+struct object *kusd_mapping; +static KSHARED_USER_DATA *kusd = MAP_FAILED; +static const timeout_t kusd_timeout = 16 * -TICKS_PER_SEC / 1000; + +static void kusd_set_current_time( void *private ) +{ + ULONG system_time_high = current_time >> 32; + ULONG system_time_low = current_time & 0xffffffff; + ULONG interrupt_time_high = monotonic_time >> 32; + ULONG interrupt_time_low = monotonic_time & 0xffffffff; + ULONG tick_count_high = (monotonic_time * 1000 / TICKS_PER_SEC) >> 32; + ULONG tick_count_low = (monotonic_time * 1000 / TICKS_PER_SEC) & 0xffffffff; + KSHARED_USER_DATA *ptr = kusd; + + add_timeout_user( kusd_timeout, kusd_set_current_time, NULL ); + +#if defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 7))) + __atomic_store_n(&ptr->SystemTime.High2Time, system_time_high, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->SystemTime.LowPart, system_time_low, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->SystemTime.High1Time, system_time_high, __ATOMIC_SEQ_CST); + + __atomic_store_n(&ptr->InterruptTime.High2Time, interrupt_time_high, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->InterruptTime.LowPart, interrupt_time_low, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->InterruptTime.High1Time, interrupt_time_high, __ATOMIC_SEQ_CST); + + __atomic_store_n(&ptr->TickCount.High2Time, tick_count_high, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->TickCount.LowPart, tick_count_low, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->TickCount.High1Time, tick_count_high, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->TickCountLowDeprecated, tick_count_low, __ATOMIC_SEQ_CST); +#else + ptr->SystemTime.High2Time = system_time_high; + ptr->SystemTime.LowPart = system_time_low; + ptr->SystemTime.High1Time = system_time_high; + + ptr->InterruptTime.High2Time = interrupt_time_high; + ptr->InterruptTime.LowPart = interrupt_time_low; + ptr->InterruptTime.High1Time = interrupt_time_high; + + ptr->TickCount.High2Time = tick_count_high; + ptr->TickCount.LowPart = tick_count_low; + ptr->TickCount.High1Time = tick_count_high; + ptr->TickCountLowDeprecated = tick_count_low; +#endif +} + +obj_handle_t get_usd_handle( const void *usd_init, data_size_t usd_size ) +{ + /* keep it the same as user_shared_data_size in ntdll */ + size_t size = 0x10000; + struct fd *fd; + + if (sizeof(*kusd) != usd_size) + { + set_error( STATUS_INVALID_PARAMETER ); + return 0; + } + + if (!kusd_mapping) + { + if (!(kusd_mapping = create_mapping( NULL, NULL, 0, size, SEC_COMMIT, 0, 0, NULL ))) + return 0; + make_object_static( kusd_mapping ); + + if (!(fd = mapping_get_fd( kusd_mapping ))) + return 0; + + if ((kusd = mmap( NULL, size, PROT_WRITE, MAP_SHARED, get_unix_fd( fd ), 0 )) != MAP_FAILED) + { + memcpy( kusd, usd_init, usd_size ); + kusd_set_current_time( NULL ); + } + release_object( fd ); + } + + if (kusd == MAP_FAILED) + { + set_error( STATUS_NO_MEMORY ); + return 0; + } + + return alloc_handle( current->process, kusd_mapping, SECTION_QUERY|SECTION_MAP_READ, 0 ); +} + /* create a file mapping */ DECL_HANDLER(create_mapping) { diff --git a/server/process.c b/server/process.c index 211207ed03b6..d5800b05a01c 100644 --- a/server/process.c +++ b/server/process.c @@ -1390,6 +1390,7 @@ DECL_HANDLER(init_process_done) if (req->gui) process->idle_event = create_event( NULL, NULL, 0, 1, 0, NULL ); if (process->debugger) set_process_debug_flag( process, 1 ); reply->suspend = (current->suspend || process->suspend); + reply->usd_handle = get_usd_handle( get_req_data(), get_req_data_size() ); }
/* open a handle to a process */ diff --git a/server/protocol.def b/server/protocol.def index 06a29b153ea0..7d1071cd86e3 100644 --- a/server/protocol.def +++ b/server/protocol.def @@ -854,8 +854,10 @@ struct rawinput_device mod_handle_t module; /* main module base address */ client_ptr_t ldt_copy; /* address of LDT copy (in thread address space) */ client_ptr_t entry; /* process entry point */ + VARARG(usd,bytes); /* USD initialization data */ @REPLY int suspend; /* is process suspended? */ + obj_handle_t usd_handle; /* USD mapping handle */ @END
Rémi Bernon rbernon@codeweavers.com writes:
+#if defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 7)))
- __atomic_store_n(&ptr->SystemTime.High2Time, system_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->SystemTime.LowPart, system_time_low, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->SystemTime.High1Time, system_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->InterruptTime.High2Time, interrupt_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->InterruptTime.LowPart, interrupt_time_low, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->InterruptTime.High1Time, interrupt_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCount.High2Time, tick_count_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCount.LowPart, tick_count_low, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCount.High1Time, tick_count_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCountLowDeprecated, tick_count_low, __ATOMIC_SEQ_CST);
Is this gcc-specific code really necessary?
+obj_handle_t get_usd_handle( const void *usd_init, data_size_t usd_size ) +{
- /* keep it the same as user_shared_data_size in ntdll */
- size_t size = 0x10000;
- struct fd *fd;
- if (sizeof(*kusd) != usd_size)
- {
set_error( STATUS_INVALID_PARAMETER );
return 0;
- }
- if (!kusd_mapping)
- {
if (!(kusd_mapping = create_mapping( NULL, NULL, 0, size, SEC_COMMIT, 0, 0, NULL )))
return 0;
make_object_static( kusd_mapping );
if (!(fd = mapping_get_fd( kusd_mapping )))
return 0;
if ((kusd = mmap( NULL, size, PROT_WRITE, MAP_SHARED, get_unix_fd( fd ), 0 )) != MAP_FAILED)
{
memcpy( kusd, usd_init, usd_size );
kusd_set_current_time( NULL );
}
release_object( fd );
- }
- if (kusd == MAP_FAILED)
- {
set_error( STATUS_NO_MEMORY );
return 0;
- }
- return alloc_handle( current->process, kusd_mapping, SECTION_QUERY|SECTION_MAP_READ, 0 );
+}
I still think it would be cleaner to use standard APIs for this, something like NtCreateSection("__wine_shared_user_data"), on STATUS_SUCCESS initialize it and tell the server about it, and on STATUS_OBJECT_NAME_EXISTS simply map it.
On 5/7/20 10:45 PM, Alexandre Julliard wrote:
Rémi Bernon rbernon@codeweavers.com writes:
+#if defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 7)))
- __atomic_store_n(&ptr->SystemTime.High2Time, system_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->SystemTime.LowPart, system_time_low, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->SystemTime.High1Time, system_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->InterruptTime.High2Time, interrupt_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->InterruptTime.LowPart, interrupt_time_low, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->InterruptTime.High1Time, interrupt_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCount.High2Time, tick_count_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCount.LowPart, tick_count_low, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCount.High1Time, tick_count_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCountLowDeprecated, tick_count_low, __ATOMIC_SEQ_CST);
Is this gcc-specific code really necessary?
I'm not sure it is strictly necessary but then it depends what guarantees we want to have. The volatile qualifier only acts as a compiler barrier and will make sure that the stores aren't optimized or reordered by the compiler. However it doesn't enforce anything on the CPU. I believe that X86 has global store ordering guarantees, but a quick search tells me that ARM or POWER don't, so the stores to the various members may be seen out of order from another CPU depending on the architecture.
Then, if it is about the __atomic builtins usage rather than __sync builtins that are sometimes used elsewhere, it's because these builtins provide cleaner semantics, as well as read-only and write-only operations -- whereas __sync only offers read+write operations. The page is mapped write-only in the server, so there was no guarantee that it was possible to read it. It's maybe a bit overkill and PROT_WRITE also somtimes implies PROT_READ, so maybe it should go this way and use __sync builtins instead?
+obj_handle_t get_usd_handle( const void *usd_init, data_size_t usd_size ) +{
- /* keep it the same as user_shared_data_size in ntdll */
- size_t size = 0x10000;
- struct fd *fd;
- if (sizeof(*kusd) != usd_size)
- {
set_error( STATUS_INVALID_PARAMETER );
return 0;
- }
- if (!kusd_mapping)
- {
if (!(kusd_mapping = create_mapping( NULL, NULL, 0, size, SEC_COMMIT, 0, 0, NULL )))
return 0;
make_object_static( kusd_mapping );
if (!(fd = mapping_get_fd( kusd_mapping )))
return 0;
if ((kusd = mmap( NULL, size, PROT_WRITE, MAP_SHARED, get_unix_fd( fd ), 0 )) != MAP_FAILED)
{
memcpy( kusd, usd_init, usd_size );
kusd_set_current_time( NULL );
}
release_object( fd );
- }
- if (kusd == MAP_FAILED)
- {
set_error( STATUS_NO_MEMORY );
return 0;
- }
- return alloc_handle( current->process, kusd_mapping, SECTION_QUERY|SECTION_MAP_READ, 0 );
+}
I still think it would be cleaner to use standard APIs for this, something like NtCreateSection("__wine_shared_user_data"), on STATUS_SUCCESS initialize it and tell the server about it, and on STATUS_OBJECT_NAME_EXISTS simply map it.
Alright.
Rémi Bernon rbernon@codeweavers.com writes:
On 5/7/20 10:45 PM, Alexandre Julliard wrote:
Rémi Bernon rbernon@codeweavers.com writes:
+#if defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 7)))
- __atomic_store_n(&ptr->SystemTime.High2Time, system_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->SystemTime.LowPart, system_time_low, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->SystemTime.High1Time, system_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->InterruptTime.High2Time, interrupt_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->InterruptTime.LowPart, interrupt_time_low, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->InterruptTime.High1Time, interrupt_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCount.High2Time, tick_count_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCount.LowPart, tick_count_low, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCount.High1Time, tick_count_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCountLowDeprecated, tick_count_low, __ATOMIC_SEQ_CST);
Is this gcc-specific code really necessary?
I'm not sure it is strictly necessary but then it depends what guarantees we want to have. The volatile qualifier only acts as a compiler barrier and will make sure that the stores aren't optimized or reordered by the compiler. However it doesn't enforce anything on the CPU. I believe that X86 has global store ordering guarantees, but a quick search tells me that ARM or POWER don't, so the stores to the various members may be seen out of order from another CPU depending on the architecture.
My concern is that if the code is necessary, it means that the fallback path for older gccs would yield a broken implementation. But if the fallback works correctly on x86 that's probably good enough.
On 08.05.2020 22:12, Alexandre Julliard wrote:
Rémi Bernon rbernon@codeweavers.com writes:
On 5/7/20 10:45 PM, Alexandre Julliard wrote:
Rémi Bernon rbernon@codeweavers.com writes:
+#if defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 7)))
- __atomic_store_n(&ptr->SystemTime.High2Time, system_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->SystemTime.LowPart, system_time_low, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->SystemTime.High1Time, system_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->InterruptTime.High2Time, interrupt_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->InterruptTime.LowPart, interrupt_time_low, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->InterruptTime.High1Time, interrupt_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCount.High2Time, tick_count_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCount.LowPart, tick_count_low, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCount.High1Time, tick_count_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCountLowDeprecated, tick_count_low, __ATOMIC_SEQ_CST);
Is this gcc-specific code really necessary?
I'm not sure it is strictly necessary but then it depends what guarantees we want to have. The volatile qualifier only acts as a compiler barrier and will make sure that the stores aren't optimized or reordered by the compiler. However it doesn't enforce anything on the CPU. I believe that X86 has global store ordering guarantees, but a quick search tells me that ARM or POWER don't, so the stores to the various members may be seen out of order from another CPU depending on the architecture.
My concern is that if the code is necessary, it means that the fallback path for older gccs would yield a broken implementation. But if the fallback works correctly on x86 that's probably good enough.
The check should probably be adjusted for clang through. It pretends to be GCC 4.2 and supports __atomic_store_n (and is a required compiler on aarch64).
Jacek
On 5/9/20 1:18 AM, Jacek Caban wrote:
On 08.05.2020 22:12, Alexandre Julliard wrote:
Rémi Bernon rbernon@codeweavers.com writes:
On 5/7/20 10:45 PM, Alexandre Julliard wrote:
Rémi Bernon rbernon@codeweavers.com writes:
+#if defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 7))) + __atomic_store_n(&ptr->SystemTime.High2Time, system_time_high, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->SystemTime.LowPart, system_time_low, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->SystemTime.High1Time, system_time_high, __ATOMIC_SEQ_CST);
+ __atomic_store_n(&ptr->InterruptTime.High2Time, interrupt_time_high, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->InterruptTime.LowPart, interrupt_time_low, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->InterruptTime.High1Time, interrupt_time_high, __ATOMIC_SEQ_CST);
+ __atomic_store_n(&ptr->TickCount.High2Time, tick_count_high, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->TickCount.LowPart, tick_count_low, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->TickCount.High1Time, tick_count_high, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->TickCountLowDeprecated, tick_count_low, __ATOMIC_SEQ_CST);
Is this gcc-specific code really necessary?
I'm not sure it is strictly necessary but then it depends what guarantees we want to have. The volatile qualifier only acts as a compiler barrier and will make sure that the stores aren't optimized or reordered by the compiler. However it doesn't enforce anything on the CPU. I believe that X86 has global store ordering guarantees, but a quick search tells me that ARM or POWER don't, so the stores to the various members may be seen out of order from another CPU depending on the architecture.
My concern is that if the code is necessary, it means that the fallback path for older gccs would yield a broken implementation. But if the fallback works correctly on x86 that's probably good enough.
The check should probably be adjusted for clang through. It pretends to be GCC 4.2 and supports __atomic_store_n (and is a required compiler on aarch64).
Jacek
So would it be acceptable to extend the check for clang and simply error in the case where __atomic builtins aren't supported? Or should we keep the fallback but check that it's on X86 only?
There's also a possible intermediate but heavier fallback, usable on clang 3.0 or gcc >= 4.1, with __sync_synchronize, but still GCC-like specific again.
Or, do the complete portable implementation with the asm variants for all the architectures we need. What compilers / architectures are we willing for wineserver?
On 09.05.2020 11:22, Rémi Bernon wrote:
On 5/9/20 1:18 AM, Jacek Caban wrote:
On 08.05.2020 22:12, Alexandre Julliard wrote:
Rémi Bernon rbernon@codeweavers.com writes:
On 5/7/20 10:45 PM, Alexandre Julliard wrote:
Rémi Bernon rbernon@codeweavers.com writes:
+#if defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 7))) + __atomic_store_n(&ptr->SystemTime.High2Time, system_time_high, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->SystemTime.LowPart, system_time_low, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->SystemTime.High1Time, system_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->InterruptTime.High2Time,
interrupt_time_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->InterruptTime.LowPart,
interrupt_time_low, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->InterruptTime.High1Time,
interrupt_time_high, __ATOMIC_SEQ_CST);
+ __atomic_store_n(&ptr->TickCount.High2Time, tick_count_high, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->TickCount.LowPart, tick_count_low, __ATOMIC_SEQ_CST); + __atomic_store_n(&ptr->TickCount.High1Time, tick_count_high, __ATOMIC_SEQ_CST);
- __atomic_store_n(&ptr->TickCountLowDeprecated, tick_count_low,
__ATOMIC_SEQ_CST);
Is this gcc-specific code really necessary?
I'm not sure it is strictly necessary but then it depends what guarantees we want to have. The volatile qualifier only acts as a compiler barrier and will make sure that the stores aren't optimized or reordered by the compiler. However it doesn't enforce anything on the CPU. I believe that X86 has global store ordering guarantees, but a quick search tells me that ARM or POWER don't, so the stores to the various members may be seen out of order from another CPU depending on the architecture.
My concern is that if the code is necessary, it means that the fallback path for older gccs would yield a broken implementation. But if the fallback works correctly on x86 that's probably good enough.
The check should probably be adjusted for clang through. It pretends to be GCC 4.2 and supports __atomic_store_n (and is a required compiler on aarch64).
Jacek
So would it be acceptable to extend the check for clang and simply error in the case where __atomic builtins aren't supported? Or should we keep the fallback but check that it's on X86 only?
There's also a possible intermediate but heavier fallback, usable on clang 3.0 or gcc >= 4.1, with __sync_synchronize, but still GCC-like specific again.
Or, do the complete portable implementation with the asm variants for all the architectures we need.
The implementation and use of __atomic_store_n is fine, it's just values of __GNUC__ and __GNUC_MINOR__ that we can't trust on clang, so it's only #if that could be improved. In practice, you could probably just add "|| defined(__clang__)" there. There is also__has_builtin(__atomic_store_n) that could be used, but I'm not sure it's worth it. We could also avoid dealing with hardcoded compiler version and use a configure check.
What compilers / architectures are we willing for wineserver?
It should be generally portable (although GCC and Clang are primary targets in practice). In this case, I noticed it because you mentioned ARMs and on aarch64 Clang is currently the only supported compiler because of __builtin_ms_va_list.
Jacek
On 5/9/20 1:11 PM, Jacek Caban wrote:
On 09.05.2020 11:22, Rémi Bernon wrote:
On 5/9/20 1:18 AM, Jacek Caban wrote:
On 08.05.2020 22:12, Alexandre Julliard wrote:
Rémi Bernon rbernon@codeweavers.com writes:
On 5/7/20 10:45 PM, Alexandre Julliard wrote:
Rémi Bernon rbernon@codeweavers.com writes:
> +#if defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && > (__GNUC_MINOR__ >= 7))) > + __atomic_store_n(&ptr->SystemTime.High2Time, > system_time_high, __ATOMIC_SEQ_CST); > + __atomic_store_n(&ptr->SystemTime.LowPart, system_time_low, > __ATOMIC_SEQ_CST); > + __atomic_store_n(&ptr->SystemTime.High1Time, > system_time_high, __ATOMIC_SEQ_CST); > + > + __atomic_store_n(&ptr->InterruptTime.High2Time, > interrupt_time_high, __ATOMIC_SEQ_CST); > + __atomic_store_n(&ptr->InterruptTime.LowPart, > interrupt_time_low, __ATOMIC_SEQ_CST); > + __atomic_store_n(&ptr->InterruptTime.High1Time, > interrupt_time_high, __ATOMIC_SEQ_CST); > + > + __atomic_store_n(&ptr->TickCount.High2Time, tick_count_high, > __ATOMIC_SEQ_CST); > + __atomic_store_n(&ptr->TickCount.LowPart, tick_count_low, > __ATOMIC_SEQ_CST); > + __atomic_store_n(&ptr->TickCount.High1Time, tick_count_high, > __ATOMIC_SEQ_CST); > + __atomic_store_n(&ptr->TickCountLowDeprecated, tick_count_low, > __ATOMIC_SEQ_CST); Is this gcc-specific code really necessary?
I'm not sure it is strictly necessary but then it depends what guarantees we want to have. The volatile qualifier only acts as a compiler barrier and will make sure that the stores aren't optimized or reordered by the compiler. However it doesn't enforce anything on the CPU. I believe that X86 has global store ordering guarantees, but a quick search tells me that ARM or POWER don't, so the stores to the various members may be seen out of order from another CPU depending on the architecture.
My concern is that if the code is necessary, it means that the fallback path for older gccs would yield a broken implementation. But if the fallback works correctly on x86 that's probably good enough.
The check should probably be adjusted for clang through. It pretends to be GCC 4.2 and supports __atomic_store_n (and is a required compiler on aarch64).
Jacek
So would it be acceptable to extend the check for clang and simply error in the case where __atomic builtins aren't supported? Or should we keep the fallback but check that it's on X86 only?
There's also a possible intermediate but heavier fallback, usable on clang 3.0 or gcc >= 4.1, with __sync_synchronize, but still GCC-like specific again.
Or, do the complete portable implementation with the asm variants for all the architectures we need.
The implementation and use of __atomic_store_n is fine, it's just values of __GNUC__ and __GNUC_MINOR__ that we can't trust on clang, so it's only #if that could be improved. In practice, you could probably just add "|| defined(__clang__)" there. There is also__has_builtin(__atomic_store_n) that could be used, but I'm not sure it's worth it. We could also avoid dealing with hardcoded compiler version and use a configure check.
Yes, I'm planning on adding a __clang__ version check, as some quick test tells me that __atomic builtins are only supported for clang >= 3.1.
But then, as the fallback isn't very reliable and depends on the guarantees that the target architecture provides, it can either simply be completely unsupported and report a compilation error, or we can try to be a bit clever and have it only for X86 (but that may not be a good idea).
Rémi Bernon rbernon@codeweavers.com writes:
The implementation and use of __atomic_store_n is fine, it's just values of __GNUC__ and __GNUC_MINOR__ that we can't trust on clang, so it's only #if that could be improved. In practice, you could probably just add "|| defined(__clang__)" there. There is also__has_builtin(__atomic_store_n) that could be used, but I'm not sure it's worth it. We could also avoid dealing with hardcoded compiler version and use a configure check.
Yes, I'm planning on adding a __clang__ version check, as some quick test tells me that __atomic builtins are only supported for clang >= 3.1.
But then, as the fallback isn't very reliable and depends on the guarantees that the target architecture provides, it can either simply be completely unsupported and report a compilation error, or we can try to be a bit clever and have it only for X86 (but that may not be a good idea).
If the fallback code is good enough for x86, you could make that the default, and then have the __atomic code be the fallback on other architectures. Then you don't need compiler checks, we can assume that we have a recent compiler on the more "exotic" architectures.
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/ntdll/thread.c | 2 +- server/mapping.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/dlls/ntdll/thread.c b/dlls/ntdll/thread.c index 6e5428bf0f06..7566dbb8fc7a 100644 --- a/dlls/ntdll/thread.c +++ b/dlls/ntdll/thread.c @@ -236,7 +236,7 @@ TEB *thread_init(void) /* reserve space for shared user data */
addr = (void *)0x7ffe0000; - size = 0x10000; + size = 0x1000; status = NtAllocateVirtualMemory( NtCurrentProcess(), &addr, 0, &size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE ); if (status) diff --git a/server/mapping.c b/server/mapping.c index 8cd2260feae1..bf34fa52f37d 100644 --- a/server/mapping.c +++ b/server/mapping.c @@ -992,7 +992,7 @@ static void kusd_set_current_time( void *private ) obj_handle_t get_usd_handle( const void *usd_init, data_size_t usd_size ) { /* keep it the same as user_shared_data_size in ntdll */ - size_t size = 0x10000; + size_t size = 0x1000; struct fd *fd;
if (sizeof(*kusd) != usd_size)