Signed-off-by: Rémi Bernon rbernon@codeweavers.com ---
This is probably more an RFC but I don't expect it to be merged right away anyway. It fixes the high overhead (Rtl)QueryPerformanceCounter now have with the new syscall frames. There's a few things I'm still not completely confident with:
* The reported Qpc frequency now changes from the default 10MHz. This is also the case on some Windows version (since they introduced QpcBypass but before they introduced the hypervisor page), and applications shouldn't rely on a fixed frequency, but we can expect that some do.
Since the hypervisor page, the reported frequency is 10MHz again, and the divider is stored in the new shared page. We could very well implement the same thing, but it required the new shared memory page to be introduced.
* The rdtsc(p) frequency calibration done when we cannot get the information from cpuid (on AMD and older Intel CPU) is still a little bit imprecise, and I'm not sure how to make it better while keeping the calibration time small. On my computer I can see a ±100KHz error for a nominal 3.6GHz frequency. I believe the Linux kernel also does some estimation there, but it has access to hardware timers that may help to get a better approximation.
programs/wineboot/wineboot.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+)
diff --git a/programs/wineboot/wineboot.c b/programs/wineboot/wineboot.c index 9427448b612..9e36b3c22dd 100644 --- a/programs/wineboot/wineboot.c +++ b/programs/wineboot/wineboot.c @@ -82,6 +82,8 @@
WINE_DEFAULT_DEBUG_CHANNEL(wineboot);
+#define TICKSPERSEC 10000000 + extern BOOL shutdown_close_windows( BOOL force ); extern BOOL shutdown_all_desktops( BOOL force ); extern void kill_processes( BOOL kill_desktop ); @@ -241,12 +243,28 @@ static void initialize_xstate_features(struct _KUSER_SHARED_DATA *data) TRACE("XSAVE feature 2 %#x, %#x, %#x, %#x.\n", regs[0], regs[1], regs[2], regs[3]); }
+static void initialize_qpc_features(struct _KUSER_SHARED_DATA *data) +{ + data->QpcBypassEnabled = 0; + data->QpcFrequency = TICKSPERSEC; + data->QpcShift = 0; + data->QpcBias = 0; +} + #else
static void initialize_xstate_features(struct _KUSER_SHARED_DATA *data) { }
+static void initialize_qpc_features(struct _KUSER_SHARED_DATA *data) +{ + data->QpcBypassEnabled = 0; + data->QpcFrequency = TICKSPERSEC; + data->QpcShift = 0; + data->QpcBias = 0; +} + #endif
static void create_user_shared_data(void) @@ -336,6 +354,7 @@ static void create_user_shared_data(void) data->ActiveGroupCount = 1;
initialize_xstate_features( data ); + initialize_qpc_features( data );
UnmapViewOfFile( data ); }
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/ntdll/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dlls/ntdll/time.c b/dlls/ntdll/time.c index f92443500d8..c0eb1f7f923 100644 --- a/dlls/ntdll/time.c +++ b/dlls/ntdll/time.c @@ -389,7 +389,7 @@ BOOL WINAPI DECLSPEC_HOTPATCH RtlQueryPerformanceCounter( LARGE_INTEGER *counter */ BOOL WINAPI DECLSPEC_HOTPATCH RtlQueryPerformanceFrequency( LARGE_INTEGER *frequency ) { - frequency->QuadPart = TICKSPERSEC; + frequency->QuadPart = user_shared_data->QpcFrequency; return TRUE; }
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/ntdll/time.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+)
diff --git a/dlls/ntdll/time.c b/dlls/ntdll/time.c index c0eb1f7f923..404a07a4402 100644 --- a/dlls/ntdll/time.c +++ b/dlls/ntdll/time.c @@ -39,6 +39,7 @@ #include "wine/exception.h" #include "wine/debug.h" #include "ntdll_misc.h" +#include "intrin.h"
WINE_DEFAULT_DEBUG_CHANNEL(ntdll);
@@ -380,6 +381,26 @@ LONGLONG WINAPI RtlGetSystemTimePrecise( void ) */ BOOL WINAPI DECLSPEC_HOTPATCH RtlQueryPerformanceCounter( LARGE_INTEGER *counter ) { + if (user_shared_data->u3.QpcBypassEnabled & SHARED_GLOBAL_FLAGS_QPC_BYPASS_ENABLED) + { + unsigned __int64 tsc; + unsigned int aux; + + if (user_shared_data->u3.QpcBypassEnabled & SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_RDTSCP) + tsc = __rdtscp(&aux); + else + { + if (user_shared_data->u3.QpcBypassEnabled & SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_MFENCE) + __asm__ __volatile__ ( "mfence" : : : "memory" ); + if (user_shared_data->u3.QpcBypassEnabled & SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_LFENCE) + __asm__ __volatile__ ( "lfence" : : : "memory" ); + tsc = __rdtsc(); + } + + counter->QuadPart = (tsc + user_shared_data->QpcBias) >> user_shared_data->u3.QpcShift; + return TRUE; + } + NtQueryPerformanceCounter( counter, NULL ); return TRUE; }
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- programs/wineboot/wineboot.c | 117 +++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+)
diff --git a/programs/wineboot/wineboot.c b/programs/wineboot/wineboot.c index 9e36b3c22dd..9333116e9af 100644 --- a/programs/wineboot/wineboot.c +++ b/programs/wineboot/wineboot.c @@ -245,10 +245,127 @@ static void initialize_xstate_features(struct _KUSER_SHARED_DATA *data)
static void initialize_qpc_features(struct _KUSER_SHARED_DATA *data) { + int regs[4], cpuid_level, denom, numer, freq, tmp; + + if (data->QpcBypassEnabled) return; + data->QpcBypassEnabled = 0; data->QpcFrequency = TICKSPERSEC; data->QpcShift = 0; data->QpcBias = 0; + + if (!data->ProcessorFeatures[PF_RDTSC_INSTRUCTION_AVAILABLE]) + { + WARN("No RDTSC support, disabling QpcBypass\n"); + return; + } + + __cpuid(regs, 0x80000000); + if (regs[0] < 0x80000007) + { + WARN("Unable to check invariant TSC, disabling QpcBypass\n"); + return; + } + + /* check for invariant tsc bit */ + __cpuid(regs, 0x80000007); + if (!(regs[3] & (1 << 8))) + { + WARN("No invariant TSC, disabling QpcBypass\n"); + return; + } + data->QpcBypassEnabled |= SHARED_GLOBAL_FLAGS_QPC_BYPASS_ENABLED; + + /* check for rdtscp support bit */ + __cpuid(regs, 0x80000001); + if ((regs[3] & (1 << 27))) + data->QpcBypassEnabled |= SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_RDTSCP; + else if (data->ProcessorFeatures[PF_XMMI64_INSTRUCTIONS_AVAILABLE]) + data->QpcBypassEnabled |= SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_LFENCE; + else + data->QpcBypassEnabled |= SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_MFENCE; + + __cpuid(regs, 0); + tmp = regs[2]; + regs[2] = regs[3]; + regs[3] = tmp; + + /* only available on some intel CPUs */ + if (memcmp(regs + 1, "GenuineIntel", 12)) data->QpcFrequency = 0; + else if ((cpuid_level = regs[0]) < 0x15) data->QpcFrequency = 0; + else + { + __cpuid(regs, 0x15); + if (!(denom = regs[0]) || !(numer = regs[1])) data->QpcFrequency = 0; + else + { + if (!(freq = regs[2]) && cpuid_level >= 0x16) + { + __cpuid(regs, 0x16); /* eax is base freq in MHz */ + freq = regs[0] * 1000 * denom / numer; + } + + data->QpcFrequency = freq * numer / denom; + } + + if (!data->QpcFrequency) + WARN("Failed to read TSC frequency from CPUID, falling back to calibration.\n"); + else + { + data->QpcFrequency = (data->QpcFrequency + (1 << 10) - 1) >> 10; + data->QpcShift = 10; + data->QpcBias = 0; + + TRACE("TSC frequency read from CPUID, freq %I64d, shift %d, bias %I64d\n", + data->QpcFrequency, data->QpcShift, data->QpcBias); + } + } + + if (!data->QpcFrequency) + { + LONGLONG time0, time1, tsc0, tsc1, tsc2, tsc3, freq0, freq1, error; + unsigned int aux; + UINT retries = 50; + + data->QpcShift = 0; + data->QpcBias = 0; + + do + { + tsc0 = __rdtscp(&aux); + time0 = RtlGetSystemTimePrecise(); + tsc1 = __rdtscp(&aux); + Sleep(1); + tsc2 = __rdtscp(&aux); + time1 = RtlGetSystemTimePrecise(); + tsc3 = __rdtscp(&aux); + + freq0 = (tsc2 - tsc0) * 10000000 / (time1 - time0); + freq1 = (tsc3 - tsc1) * 10000000 / (time1 - time0); + error = llabs((freq1 - freq0) * 1000000 / min(freq1, freq0)); + } + while (error > 100 && retries--); + + if (!retries) WARN("TSC frequency calibration failed, unstable TSC?\n"); + else + { + data->QpcFrequency = (freq0 + freq1 + (1 << 10) - 1) >> 11; + data->QpcShift = 10; + data->QpcBias = 0; + + TRACE("TSC frequency calibration complete, freq %I64d, shift %d, bias %I64d\n", + data->QpcFrequency, data->QpcShift, data->QpcBias); + } + } + + if (!data->QpcFrequency) + { + WARN("Unable to calibrate TSC frequency, disabling QpcBypass.\n"); + data->QpcBypassEnabled = 0; + data->QpcFrequency = TICKSPERSEC; + data->QpcShift = 0; + data->QpcBias = 0; + } }
#else