Signed-off-by: Rémi Bernon rbernon@codeweavers.com ---
This is some tests to validate that RtlQueryPerformanceCounter should be able to bypass NtQueryPerformanceCounter syscall and be optimised with rdtsc(p). The XSTATE save and restore is making its syscall much slower than it was before, and some applications are now burning CPU calling Qpc in a tight loop.
It also looks like that there's a new shared page in town since w10v1809 and it's pretty much undocumented. On previous versions the bypass is not always enabled, and it's for instance not enabled on the testbot VMs but I was able to test it in a local VM with w10v1511, and it only seems to be using the USD values.
dlls/ntdll/tests/time.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/dlls/ntdll/tests/time.c b/dlls/ntdll/tests/time.c index d756a8c839c..931bf765aa7 100644 --- a/dlls/ntdll/tests/time.c +++ b/dlls/ntdll/tests/time.c @@ -206,9 +206,9 @@ static void test_user_shared_data_time(void) { t1 = GetTickCount(); if (user_shared_data->NtMajorVersion <= 5 && user_shared_data->NtMinorVersion <= 1) - t2 = (*(volatile ULONG*)&user_shared_data->TickCountLowDeprecated * (ULONG64)user_shared_data->TickCountMultiplier) >> 24; + t2 = (DWORD)((*(volatile ULONG*)&user_shared_data->TickCountLowDeprecated * (ULONG64)user_shared_data->TickCountMultiplier) >> 24); else - t2 = (read_ksystem_time(&user_shared_data->u.TickCount) * user_shared_data->TickCountMultiplier) >> 24; + t2 = (DWORD)((read_ksystem_time(&user_shared_data->u.TickCount) * user_shared_data->TickCountMultiplier) >> 24); t3 = GetTickCount(); } while(t3 < t1 && i++ < 1); /* allow for wrap, but only once */
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/ntdll/tests/time.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/dlls/ntdll/tests/time.c b/dlls/ntdll/tests/time.c index 931bf765aa7..51d9115b87c 100644 --- a/dlls/ntdll/tests/time.c +++ b/dlls/ntdll/tests/time.c @@ -243,14 +243,13 @@ static void test_user_shared_data_time(void) do { pRtlQueryUnbiasedInterruptTime(&t1); - t2 = read_ksystem_time(&user_shared_data->InterruptTime); + t2 = read_ksystem_time(&user_shared_data->InterruptTime) - user_shared_data->InterruptTimeBias; pRtlQueryUnbiasedInterruptTime(&t3); } while(t3 < t1 && i++ < 1); /* allow for wrap, but only once */
ok(t1 <= t2, "USD InterruptTime / RtlQueryUnbiasedInterruptTime are out of order %s %s\n", wine_dbgstr_longlong(t1), wine_dbgstr_longlong(t2)); - ok(t2 <= t3 || broken(t2 == t3 + 82410089070) /* w864 has some weird offset on testbot */, - "USD InterruptTime / RtlQueryUnbiasedInterruptTime are out of order %s %s\n", + ok(t2 <= t3, "USD InterruptTime / RtlQueryUnbiasedInterruptTime are out of order %s %s\n", wine_dbgstr_longlong(t2), wine_dbgstr_longlong(t3)); } }
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- include/winnt.h | 5 +++++ include/winternl.h | 1 + 2 files changed, 6 insertions(+)
diff --git a/include/winnt.h b/include/winnt.h index 7973960bf21..5cac1baac7e 100644 --- a/include/winnt.h +++ b/include/winnt.h @@ -6729,6 +6729,11 @@ typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX } DUMMYUNIONNAME; } SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
+typedef struct _SYSTEM_HYPERVISOR_SHARED_PAGE_INFORMATION +{ + PVOID HypervisorSharedUserVa; +} SYSTEM_HYPERVISOR_SHARED_PAGE_INFORMATION, *PSYSTEM_HYPERVISOR_SHARED_PAGE_INFORMATION; + /* Threadpool things */ typedef DWORD TP_VERSION,*PTP_VERSION;
diff --git a/include/winternl.h b/include/winternl.h index f0ab223ef2e..14c5f7bc99d 100644 --- a/include/winternl.h +++ b/include/winternl.h @@ -1306,6 +1306,7 @@ typedef enum _SYSTEM_INFORMATION_CLASS { SystemFileCacheInformationEx = 81, SystemDynamicTimeZoneInformation = 102, SystemLogicalProcessorInformationEx = 107, + SystemHypervisorSharedPageInformation = 197, SystemInformationClassMax } SYSTEM_INFORMATION_CLASS, *PSYSTEM_INFORMATION_CLASS;
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- include/ddk/wdm.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/include/ddk/wdm.h b/include/ddk/wdm.h index c0b97e243aa..3b9af7d52b2 100644 --- a/include/ddk/wdm.h +++ b/include/ddk/wdm.h @@ -1292,12 +1292,26 @@ typedef struct _KUSER_SHARED_DATA { volatile ULONGLONG QpcBias; /* 0x3b8 */ ULONG ActiveProcessorCount; /* 0x3c0 */ volatile UCHAR ActiveGroupCount; /* 0x3c4 */ - USHORT QpcData; /* 0x3c6 */ + union { + USHORT QpcData; /* 0x3c6 */ + struct { + UCHAR volatile QpcBypassEnabled; + UCHAR QpcShift; + } DUMMYSTRUCTNAME; + } DUMMYUNIONNAME3; LARGE_INTEGER TimeZoneBiasEffectiveStart; /* 0x3c8 */ LARGE_INTEGER TimeZoneBiasEffectiveEnd; /* 0x3d0 */ XSTATE_CONFIGURATION XState; /* 0x3d8 */ } KSHARED_USER_DATA, *PKSHARED_USER_DATA;
+#define SHARED_GLOBAL_FLAGS_QPC_BYPASS_ENABLED 0x01 +#define SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_HV_PAGE 0x02 +#define SHARED_GLOBAL_FLAGS_QPC_BYPASS_DISABLE_32BIT 0x04 +#define SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_MFENCE 0x10 +#define SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_LFENCE 0x20 +#define SHARED_GLOBAL_FLAGS_QPC_BYPASS_A73_ERRATA 0x40 +#define SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_RDTSCP 0x80 + typedef enum _MEMORY_CACHING_TYPE { MmNonCached = 0, MmCached = 1,
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- include/msvcrt/intrin.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+)
diff --git a/include/msvcrt/intrin.h b/include/msvcrt/intrin.h index 781c6fac823..061866fe63b 100644 --- a/include/msvcrt/intrin.h +++ b/include/msvcrt/intrin.h @@ -20,6 +20,36 @@ static inline void __cpuid(int info[4], int ax) { return __cpuidex(info, ax, 0); } + +#ifdef __i386 +static inline unsigned __int64 __rdtsc(void) +{ + unsigned __int64 a; + __asm__ ("rdtsc" : "=A" (a)); + return a; +} + +static inline unsigned __int64 __rdtscp(unsigned int *aux) +{ + unsigned __int64 a; + __asm__ ("rdtscp" : "=A" (a), "=c" (*aux)); + return a; +} +#elif defined __amd64 +static inline unsigned __int64 __rdtsc(void) +{ + unsigned __int64 a, d; + __asm__ ("rdtsc" : "=a" (a), "=d" (d)); + return (d << 32) | a; +} + +static inline unsigned __int64 __rdtscp(unsigned int *aux) +{ + unsigned __int64 a, d; + __asm__ ("rdtscp" : "=a" (a), "=d" (d), "=c" (*aux)); + return (d << 32) | a; +} +#endif #endif
#ifdef __aarch64__
Hi Rémi,
On 10.03.2021 12:22, Rémi Bernon wrote:
Signed-off-by: Rémi Bernon rbernon@codeweavers.com
include/msvcrt/intrin.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+)
diff --git a/include/msvcrt/intrin.h b/include/msvcrt/intrin.h index 781c6fac823..061866fe63b 100644 --- a/include/msvcrt/intrin.h +++ b/include/msvcrt/intrin.h @@ -20,6 +20,36 @@ static inline void __cpuid(int info[4], int ax) { return __cpuidex(info, ax, 0); }
+#ifdef __i386 +static inline unsigned __int64 __rdtsc(void) +{
- unsigned __int64 a;
- __asm__ ("rdtsc" : "=A" (a));
- return a;
+}
+static inline unsigned __int64 __rdtscp(unsigned int *aux) +{
- unsigned __int64 a;
- __asm__ ("rdtscp" : "=A" (a), "=c" (*aux));
- return a;
+} +#elif defined __amd64 +static inline unsigned __int64 __rdtsc(void) +{
- unsigned __int64 a, d;
- __asm__ ("rdtsc" : "=a" (a), "=d" (d));
- return (d << 32) | a;
+}
+static inline unsigned __int64 __rdtscp(unsigned int *aux) +{
- unsigned __int64 a, d;
- __asm__ ("rdtscp" : "=a" (a), "=d" (d), "=c" (*aux));
- return (d << 32) | a;
+} +#endif #endif
I think you meant __i386__ and __x86_64__ for guards. If guards were right, you'd get a redefinition because __rdtsc is a compiler builtin. It seems to me that all we need for __rdtsc is to provide MSVC-style declaration in winnt.h, like it's done in WinSDK.
Thanks,
Jacek
On 3/10/21 3:57 PM, Jacek Caban wrote:
Hi Rémi,
On 10.03.2021 12:22, Rémi Bernon wrote:
Signed-off-by: Rémi Bernon rbernon@codeweavers.com
include/msvcrt/intrin.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+)
diff --git a/include/msvcrt/intrin.h b/include/msvcrt/intrin.h index 781c6fac823..061866fe63b 100644 --- a/include/msvcrt/intrin.h +++ b/include/msvcrt/intrin.h @@ -20,6 +20,36 @@ static inline void __cpuid(int info[4], int ax) { return __cpuidex(info, ax, 0); }
+#ifdef __i386 +static inline unsigned __int64 __rdtsc(void) +{ + unsigned __int64 a; + __asm__ ("rdtsc" : "=A" (a)); + return a; +}
+static inline unsigned __int64 __rdtscp(unsigned int *aux) +{ + unsigned __int64 a; + __asm__ ("rdtscp" : "=A" (a), "=c" (*aux)); + return a; +} +#elif defined __amd64 +static inline unsigned __int64 __rdtsc(void) +{ + unsigned __int64 a, d; + __asm__ ("rdtsc" : "=a" (a), "=d" (d)); + return (d << 32) | a; +}
+static inline unsigned __int64 __rdtscp(unsigned int *aux) +{ + unsigned __int64 a, d; + __asm__ ("rdtscp" : "=a" (a), "=d" (d), "=c" (*aux)); + return (d << 32) | a; +} +#endif #endif
I think you meant __i386__ and __x86_64__ for guards. If guards were right, you'd get a redefinition because __rdtsc is a compiler builtin. It seems to me that all we need for __rdtsc is to provide MSVC-style declaration in winnt.h, like it's done in WinSDK.
Thanks,
Jacek
Yeah I'm not sure where I got these guards from, but I think the test failed to compile, so I assumed MinGW missed these intrinsics? I'll double check.
On 3/10/21 4:00 PM, Rémi Bernon wrote:
On 3/10/21 3:57 PM, Jacek Caban wrote:
Hi Rémi,
On 10.03.2021 12:22, Rémi Bernon wrote:
Signed-off-by: Rémi Bernon rbernon@codeweavers.com
include/msvcrt/intrin.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+)
diff --git a/include/msvcrt/intrin.h b/include/msvcrt/intrin.h index 781c6fac823..061866fe63b 100644 --- a/include/msvcrt/intrin.h +++ b/include/msvcrt/intrin.h @@ -20,6 +20,36 @@ static inline void __cpuid(int info[4], int ax) { return __cpuidex(info, ax, 0); }
+#ifdef __i386 +static inline unsigned __int64 __rdtsc(void) +{ + unsigned __int64 a; + __asm__ ("rdtsc" : "=A" (a)); + return a; +}
+static inline unsigned __int64 __rdtscp(unsigned int *aux) +{ + unsigned __int64 a; + __asm__ ("rdtscp" : "=A" (a), "=c" (*aux)); + return a; +} +#elif defined __amd64 +static inline unsigned __int64 __rdtsc(void) +{ + unsigned __int64 a, d; + __asm__ ("rdtsc" : "=a" (a), "=d" (d)); + return (d << 32) | a; +}
+static inline unsigned __int64 __rdtscp(unsigned int *aux) +{ + unsigned __int64 a, d; + __asm__ ("rdtscp" : "=a" (a), "=d" (d), "=c" (*aux)); + return (d << 32) | a; +} +#endif #endif
I think you meant __i386__ and __x86_64__ for guards. If guards were right, you'd get a redefinition because __rdtsc is a compiler builtin. It seems to me that all we need for __rdtsc is to provide MSVC-style declaration in winnt.h, like it's done in WinSDK.
Thanks,
Jacek
Yeah I'm not sure where I got these guards from, but I think the test failed to compile, so I assumed MinGW missed these intrinsics? I'll double check.
So I can confirm that although the guard aren't the usual ones there's no __rdtsc(p) intrinsics on MinGW and the test fails to compile without these (and I don't get redefinition errors with them).
Then I agree that these are probably MSVC intrinsics and may need to be guarded for that case, but they are still needed for GCC (and possibly clang?).
On 3/10/21 4:40 PM, Rémi Bernon wrote:
On 3/10/21 4:00 PM, Rémi Bernon wrote:
On 3/10/21 3:57 PM, Jacek Caban wrote:
Hi Rémi,
On 10.03.2021 12:22, Rémi Bernon wrote:
Signed-off-by: Rémi Bernon rbernon@codeweavers.com
include/msvcrt/intrin.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+)
diff --git a/include/msvcrt/intrin.h b/include/msvcrt/intrin.h index 781c6fac823..061866fe63b 100644 --- a/include/msvcrt/intrin.h +++ b/include/msvcrt/intrin.h @@ -20,6 +20,36 @@ static inline void __cpuid(int info[4], int ax) { return __cpuidex(info, ax, 0); }
+#ifdef __i386 +static inline unsigned __int64 __rdtsc(void) +{ + unsigned __int64 a; + __asm__ ("rdtsc" : "=A" (a)); + return a; +}
+static inline unsigned __int64 __rdtscp(unsigned int *aux) +{ + unsigned __int64 a; + __asm__ ("rdtscp" : "=A" (a), "=c" (*aux)); + return a; +} +#elif defined __amd64 +static inline unsigned __int64 __rdtsc(void) +{ + unsigned __int64 a, d; + __asm__ ("rdtsc" : "=a" (a), "=d" (d)); + return (d << 32) | a; +}
+static inline unsigned __int64 __rdtscp(unsigned int *aux) +{ + unsigned __int64 a, d; + __asm__ ("rdtscp" : "=a" (a), "=d" (d), "=c" (*aux)); + return (d << 32) | a; +} +#endif #endif
I think you meant __i386__ and __x86_64__ for guards. If guards were right, you'd get a redefinition because __rdtsc is a compiler builtin. It seems to me that all we need for __rdtsc is to provide MSVC-style declaration in winnt.h, like it's done in WinSDK.
Thanks,
Jacek
Yeah I'm not sure where I got these guards from, but I think the test failed to compile, so I assumed MinGW missed these intrinsics? I'll double check.
So I can confirm that although the guard aren't the usual ones there's no __rdtsc(p) intrinsics on MinGW and the test fails to compile without these (and I don't get redefinition errors with them).
Then I agree that these are probably MSVC intrinsics and may need to be guarded for that case, but they are still needed for GCC (and possibly clang?).
Apparently clang >= 4.0.0 has __rdtsc (but not __rdtscp). Can we just assume clang >= 4.0.0 is used and something like that would be acceptable instead?
#ifndef _MSC_VER # ifndef __clang__ /* clang >= 4.0.0 has __rdtsc intrinsic */ # if defined(__i386__) static inline unsigned __int64 __rdtsc(void) { unsigned __int64 a; __asm__ ("rdtsc" : "=A" (a)); return a; } # elif defined(__x86_64__) static inline unsigned __int64 __rdtsc(void) { unsigned __int64 a, d; __asm__ ("rdtsc" : "=a" (a), "=d" (d)); return (d << 32) | a; } # endif # endif /* __clang__ */
# if defined(__i386__) static inline unsigned __int64 __rdtscp(unsigned int *aux) { unsigned __int64 a; __asm__ ("rdtscp" : "=A" (a), "=c" (*aux)); return a; } # elif defined(__x86_64__) static inline unsigned __int64 __rdtscp(unsigned int *aux) { unsigned __int64 a, d; __asm__ ("rdtscp" : "=a" (a), "=d" (d), "=c" (*aux)); return (d << 32) | a; } # endif #endif /* _MSC_VER */
Cheers,
On 10.03.2021 17:13, Rémi Bernon wrote:
Apparently clang >= 4.0.0 has __rdtsc (but not __rdtscp). Can we just assume clang >= 4.0.0 is used and something like that would be acceptable instead?
There is __has_builtin(), which would be cleaner in this case, IMHO.
Both compilers ship __rdtscp in ia32intrin.h header (gcc also __rdtscp, which clang doesn't need). I was considering something like this inside our intrin.h:
#if defined(__i386__) || defined(__x86_64__)
# include <x86intrin.h>
#endif
I believe that this should solve your problem. This rises some compatibility concerns, but I think it may be fine. It's shipped with compilers themselves for all platforms (not by mingw-w64).
Jacek
On 3/10/21 9:44 PM, Jacek Caban wrote:
On 10.03.2021 17:13, Rémi Bernon wrote:
Apparently clang >= 4.0.0 has __rdtsc (but not __rdtscp). Can we just assume clang >= 4.0.0 is used and something like that would be acceptable instead?
There is __has_builtin(), which would be cleaner in this case, IMHO.
Both compilers ship __rdtscp in ia32intrin.h header (gcc also __rdtscp, which clang doesn't need). I was considering something like this inside our intrin.h:
#if defined(__i386__) || defined(__x86_64__)
# include <x86intrin.h>
#endif
I believe that this should solve your problem. This rises some compatibility concerns, but I think it may be fine. It's shipped with compilers themselves for all platforms (not by mingw-w64).
Jacek
Ah yes sure, I didn't know these were available in intel intrinsic headers. I guess that would be perfect then.
On 3/10/21 10:20 PM, Rémi Bernon wrote:
On 3/10/21 9:44 PM, Jacek Caban wrote:
On 10.03.2021 17:13, Rémi Bernon wrote:
Apparently clang >= 4.0.0 has __rdtsc (but not __rdtscp). Can we just assume clang >= 4.0.0 is used and something like that would be acceptable instead?
There is __has_builtin(), which would be cleaner in this case, IMHO.
Both compilers ship __rdtscp in ia32intrin.h header (gcc also __rdtscp, which clang doesn't need). I was considering something like this inside our intrin.h:
#if defined(__i386__) || defined(__x86_64__)
# include <x86intrin.h>
#endif
I believe that this should solve your problem. This rises some compatibility concerns, but I think it may be fine. It's shipped with compilers themselves for all platforms (not by mingw-w64).
Jacek
Ah yes sure, I didn't know these were available in intel intrinsic headers. I guess that would be perfect then.
Except that it has some conflicts with msvcrt _(l)rot[r|l] functions in stdlib.h. Can we remove the definitions from there?
On 11.03.2021 11:47, Rémi Bernon wrote:
On 3/10/21 10:20 PM, Rémi Bernon wrote:
On 3/10/21 9:44 PM, Jacek Caban wrote:
On 10.03.2021 17:13, Rémi Bernon wrote:
Apparently clang >= 4.0.0 has __rdtsc (but not __rdtscp). Can we just assume clang >= 4.0.0 is used and something like that would be acceptable instead?
There is __has_builtin(), which would be cleaner in this case, IMHO.
Both compilers ship __rdtscp in ia32intrin.h header (gcc also __rdtscp, which clang doesn't need). I was considering something like this inside our intrin.h:
#if defined(__i386__) || defined(__x86_64__)
# include <x86intrin.h>
#endif
I believe that this should solve your problem. This rises some compatibility concerns, but I think it may be fine. It's shipped with compilers themselves for all platforms (not by mingw-w64).
Jacek
Ah yes sure, I didn't know these were available in intel intrinsic headers. I guess that would be perfect then.
Except that it has some conflicts with msvcrt _(l)rot[r|l] functions in stdlib.h. Can we remove the definitions from there?
#undefs, like in the attached patch, seem to be enough.
Jacek
On 3/11/21 9:23 PM, Jacek Caban wrote:
On 11.03.2021 11:47, Rémi Bernon wrote:
On 3/10/21 10:20 PM, Rémi Bernon wrote:
On 3/10/21 9:44 PM, Jacek Caban wrote:
On 10.03.2021 17:13, Rémi Bernon wrote:
Apparently clang >= 4.0.0 has __rdtsc (but not __rdtscp). Can we just assume clang >= 4.0.0 is used and something like that would be acceptable instead?
There is __has_builtin(), which would be cleaner in this case, IMHO.
Both compilers ship __rdtscp in ia32intrin.h header (gcc also __rdtscp, which clang doesn't need). I was considering something like this inside our intrin.h:
#if defined(__i386__) || defined(__x86_64__)
# include <x86intrin.h>
#endif
I believe that this should solve your problem. This rises some compatibility concerns, but I think it may be fine. It's shipped with compilers themselves for all platforms (not by mingw-w64).
Jacek
Ah yes sure, I didn't know these were available in intel intrinsic headers. I guess that would be perfect then.
Except that it has some conflicts with msvcrt _(l)rot[r|l] functions in stdlib.h. Can we remove the definitions from there?
#undefs, like in the attached patch, seem to be enough.
Jacek
IMHO if these are definitions, it would be more efficient to have #ifndef instead, and use the compiler builtin intrinsic when it is available.
But then, I'm not sure we can reliably assume they are always definitions (it seems to be the case on Clang and GCC, but there's no guarantee it'll always be).
On 3/11/21 9:37 PM, Rémi Bernon wrote:
On 3/11/21 9:23 PM, Jacek Caban wrote:
On 11.03.2021 11:47, Rémi Bernon wrote:
On 3/10/21 10:20 PM, Rémi Bernon wrote:
On 3/10/21 9:44 PM, Jacek Caban wrote:
On 10.03.2021 17:13, Rémi Bernon wrote:
Apparently clang >= 4.0.0 has __rdtsc (but not __rdtscp). Can we just assume clang >= 4.0.0 is used and something like that would be acceptable instead?
There is __has_builtin(), which would be cleaner in this case, IMHO.
Both compilers ship __rdtscp in ia32intrin.h header (gcc also __rdtscp, which clang doesn't need). I was considering something like this inside our intrin.h:
#if defined(__i386__) || defined(__x86_64__)
# include <x86intrin.h>
#endif
I believe that this should solve your problem. This rises some compatibility concerns, but I think it may be fine. It's shipped with compilers themselves for all platforms (not by mingw-w64).
Jacek
Ah yes sure, I didn't know these were available in intel intrinsic headers. I guess that would be perfect then.
Except that it has some conflicts with msvcrt _(l)rot[r|l] functions in stdlib.h. Can we remove the definitions from there?
#undefs, like in the attached patch, seem to be enough.
Jacek
IMHO if these are definitions, it would be more efficient to have #ifndef instead, and use the compiler builtin intrinsic when it is available.
But then, I'm not sure we can reliably assume they are always definitions (it seems to be the case on Clang and GCC, but there's no guarantee it'll always be).
And it's apparently not a #define on ICC, not sure if we care?
On 11.03.2021 21:37, Rémi Bernon wrote:
On 3/11/21 9:23 PM, Jacek Caban wrote:
On 11.03.2021 11:47, Rémi Bernon wrote:
On 3/10/21 10:20 PM, Rémi Bernon wrote:
On 3/10/21 9:44 PM, Jacek Caban wrote:
On 10.03.2021 17:13, Rémi Bernon wrote:
Apparently clang >= 4.0.0 has __rdtsc (but not __rdtscp). Can we just assume clang >= 4.0.0 is used and something like that would be acceptable instead?
There is __has_builtin(), which would be cleaner in this case, IMHO.
Both compilers ship __rdtscp in ia32intrin.h header (gcc also __rdtscp, which clang doesn't need). I was considering something like this inside our intrin.h:
#if defined(__i386__) || defined(__x86_64__)
# include <x86intrin.h>
#endif
I believe that this should solve your problem. This rises some compatibility concerns, but I think it may be fine. It's shipped with compilers themselves for all platforms (not by mingw-w64).
Jacek
Ah yes sure, I didn't know these were available in intel intrinsic headers. I guess that would be perfect then.
Except that it has some conflicts with msvcrt _(l)rot[r|l] functions in stdlib.h. Can we remove the definitions from there?
#undefs, like in the attached patch, seem to be enough.
Jacek
IMHO if these are definitions, it would be more efficient to have #ifndef instead, and use the compiler builtin intrinsic when it is available.
Sure, that should work too.
But then, I'm not sure we can reliably assume they are always definitions (it seems to be the case on Clang and GCC, but there's no guarantee it'll always be).
I kind of expected compatibility glitches in that area, that's why I wasn't sure if x86intrin.h is the way to go. It doesn't seem like a deal breaker to me yet.
Jacek
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/ntdll/tests/time.c | 110 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+)
diff --git a/dlls/ntdll/tests/time.c b/dlls/ntdll/tests/time.c index 51d9115b87c..d028f12e97c 100644 --- a/dlls/ntdll/tests/time.c +++ b/dlls/ntdll/tests/time.c @@ -19,8 +19,10 @@ */
#define NONAMELESSUNION +#define NONAMELESSSTRUCT #include "ntdll_test.h" #include "ddk/wdm.h" +#include "intrin.h"
#define TICKSPERSEC 10000000 #define TICKSPERMSEC 10000 @@ -35,6 +37,9 @@ static NTSTATUS (WINAPI *pRtlQueryTimeZoneInformation)( RTL_TIME_ZONE_INFORMATIO static NTSTATUS (WINAPI *pRtlQueryDynamicTimeZoneInformation)( RTL_DYNAMIC_TIME_ZONE_INFORMATION *); static BOOL (WINAPI *pRtlQueryUnbiasedInterruptTime)( ULONGLONG *time );
+static BOOL (WINAPI *pRtlQueryPerformanceCounter)(LARGE_INTEGER*); +static BOOL (WINAPI *pRtlQueryPerformanceFrequency)(LARGE_INTEGER*); + static const int MonthLengths[2][12] = { { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }, @@ -122,6 +127,108 @@ static void test_NtQueryPerformanceCounter(void) ok(status == STATUS_SUCCESS, "expected STATUS_SUCCESS, got %08x\n", status); }
+struct hypervisor_shared_data +{ + UINT64 unknown; + UINT64 QpcMultiplier; + UINT64 QpcBias; +}; + +static void test_RtlQueryPerformanceCounter(void) +{ + SYSTEM_HYPERVISOR_SHARED_PAGE_INFORMATION si; + struct hypervisor_shared_data *hsd; + KSHARED_USER_DATA *usd = (void *)0x7ffe0000; + LARGE_INTEGER frequency, counter; + unsigned int aux; + NTSTATUS status; + UINT64 tsc0, tsc1, mul_lo, mul_hi, tsc_lo, tsc_hi; + ULONG len; + BOOL ret; + + if (!pRtlQueryPerformanceCounter || !pRtlQueryPerformanceFrequency) + { + win_skip( "RtlQueryPerformanceCounter/Frequency not available, skipping tests\n" ); + return; + } + + if (!(usd->u3.s.QpcBypassEnabled & SHARED_GLOBAL_FLAGS_QPC_BYPASS_ENABLED)) + { + todo_wine win_skip("QpcBypassEnabled is not set, skipping tests\n"); + return; + } + + if ((usd->u3.s.QpcBypassEnabled & SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_HV_PAGE)) + { + ok( usd->u3.s.QpcBypassEnabled == (SHARED_GLOBAL_FLAGS_QPC_BYPASS_ENABLED|SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_HV_PAGE|SHARED_GLOBAL_FLAGS_QPC_BYPASS_USE_RDTSCP), + "unexpected QpcBypassEnabled %x, expected 0x83\n", usd->u3.s.QpcBypassEnabled ); + ok( usd->QpcFrequency == 10000000, "unexpected QpcFrequency %I64d, expected 10000000\n", usd->QpcFrequency ); + ok( !usd->u3.s.QpcShift, "unexpected QpcShift %d, expected 0\n", usd->u3.s.QpcShift ); + ok( usd->QpcInterruptTimeIncrement == ((ULONGLONG)1 << 63), + "unexpected QpcInterruptTimeIncrement %I64x, expected 1<<63\n", usd->QpcInterruptTimeIncrement ); + ok( usd->QpcInterruptTimeIncrementShift == 1, + "unexpected QpcInterruptTimeIncrementShift %d, expected 1\n", usd->QpcInterruptTimeIncrementShift ); + ok( usd->QpcSystemTimeIncrement == ((ULONGLONG)1 << 63), + "unexpected QpcSystemTimeIncrement %I64x, expected 1<<63\n", usd->QpcSystemTimeIncrement ); + ok( usd->QpcSystemTimeIncrementShift == 1, + "unexpected QpcSystemTimeIncrementShift %d, expected 1\n", usd->QpcSystemTimeIncrementShift ); + + memset( &si, 0xcc, sizeof(si) ); + status = pNtQuerySystemInformation( SystemHypervisorSharedPageInformation, &si, sizeof(si), &len ); + ok( !status, "NtQuerySystemInformation returned %x\n", status ); + ok( len == sizeof(si), "unexpected SystemHypervisorSharedPageInformation length %u\n", len ); + trace( "SystemHypervisorSharedPageInformation: %p\n", si.HypervisorSharedUserVa ); + hsd = (struct hypervisor_shared_data *)si.HypervisorSharedUserVa; + mul_hi = (hsd->QpcMultiplier >> 32); + mul_lo = (UINT32)hsd->QpcMultiplier; + + tsc0 = __rdtscp(&aux); + ret = pRtlQueryPerformanceCounter( &counter ); + tsc1 = __rdtscp(&aux); + ok( ret, "RtlQueryPerformanceCounter failed\n" ); + + tsc_hi = (tsc0 >> 32); + tsc_lo = (UINT32)tsc0; + tsc0 = (tsc_hi * mul_hi) + (tsc_hi * mul_lo >> 32) + (mul_hi * tsc_lo >> 32); + tsc0 += usd->QpcBias + hsd->QpcBias; + + tsc_hi = (tsc1 >> 32); + tsc_lo = (UINT32)tsc1; + tsc1 = (tsc_hi * mul_hi) + (tsc_hi * mul_lo >> 32) + (mul_hi * tsc_lo >> 32); + tsc1 += usd->QpcBias + hsd->QpcBias; + + ok( tsc0 <= counter.QuadPart, "rdtscp %I64d and RtlQueryPerformanceCounter %I64d are out of order\n", tsc0, counter.QuadPart ); + ok( counter.QuadPart <= tsc1, "RtlQueryPerformanceCounter %I64d and rdtscp %I64d are out of order\n", counter.QuadPart, tsc1 ); + } + else + { + ok( usd->u3.s.QpcShift == 10, "unexpected QpcShift %d, expected 10\n", usd->u3.s.QpcShift ); + ok( usd->QpcInterruptTimeIncrementShift == 2, + "unexpected QpcInterruptTimeIncrementShift %d, expected 2\n", usd->QpcInterruptTimeIncrementShift ); + ok( usd->QpcSystemTimeIncrementShift == 2, + "unexpected QpcSystemTimeIncrementShift %d, expected 2\n", usd->QpcSystemTimeIncrementShift ); + + tsc0 = __rdtsc(); + ret = pRtlQueryPerformanceCounter( &counter ); + tsc1 = __rdtsc(); + ok( ret, "RtlQueryPerformanceCounter failed\n" ); + + tsc0 += usd->QpcBias; + tsc0 >>= usd->u3.s.QpcShift; + tsc1 += usd->QpcBias; + tsc1 >>= usd->u3.s.QpcShift; + + ok( tsc0 <= counter.QuadPart, "rdtscp %I64d and RtlQueryPerformanceCounter %I64d are out of order\n", tsc0, counter.QuadPart ); + ok( counter.QuadPart <= tsc1, "RtlQueryPerformanceCounter %I64d and rdtscp %I64d are out of order\n", counter.QuadPart, tsc1 ); + } + + ret = pRtlQueryPerformanceFrequency( &frequency ); + ok( ret, "RtlQueryPerformanceFrequency failed\n" ); + ok( frequency.QuadPart == usd->QpcFrequency, + "RtlQueryPerformanceFrequency returned %I64d, expected USD QpcFrequency %I64d\n", + frequency.QuadPart, usd->QpcFrequency ); +} + static void test_RtlQueryTimeZoneInformation(void) { RTL_DYNAMIC_TIME_ZONE_INFORMATION tzinfo, tzinfo2; @@ -266,6 +373,8 @@ START_TEST(time) pRtlQueryDynamicTimeZoneInformation = (void *)GetProcAddress(mod, "RtlQueryDynamicTimeZoneInformation"); pRtlQueryUnbiasedInterruptTime = (void *)GetProcAddress(mod, "RtlQueryUnbiasedInterruptTime"); + pRtlQueryPerformanceCounter = (void *)GetProcAddress(mod, "RtlQueryPerformanceCounter"); + pRtlQueryPerformanceFrequency = (void *)GetProcAddress(mod, "RtlQueryPerformanceFrequency");
if (pRtlTimeToTimeFields && pRtlTimeFieldsToTime) test_pRtlTimeToTimeFields(); @@ -274,4 +383,5 @@ START_TEST(time) test_NtQueryPerformanceCounter(); test_RtlQueryTimeZoneInformation(); test_user_shared_data_time(); + test_RtlQueryPerformanceCounter(); }
Hi,
While running your changed tests, I think I found new failures. Being a bot and all I'm not very good at pattern recognition, so I might be wrong, but could you please double-check?
Full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=86777
Your paranoid android.
=== w1064 (64 bit report) ===
ntdll: time.c:201: Test failed: RtlQueryPerformanceCounter 588260569 and rdtscp 588260568 are out of order
=== w1064_2qxl (64 bit report) ===
ntdll: time.c:201: Test failed: RtlQueryPerformanceCounter 692368387 and rdtscp 692368386 are out of order
=== w10pro64_ar (64 bit report) ===
ntdll: time.c:201: Test failed: RtlQueryPerformanceCounter 643900402 and rdtscp 643900401 are out of order
=== w10pro64_ja (64 bit report) ===
ntdll: time.c:201: Test failed: RtlQueryPerformanceCounter 590879244 and rdtscp 590879243 are out of order
On 3/10/21 6:05 PM, Marvin wrote:
Hi,
While running your changed tests, I think I found new failures. Being a bot and all I'm not very good at pattern recognition, so I might be wrong, but could you please double-check?
Full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=86777
Your paranoid android.
=== w1064 (64 bit report) ===
ntdll: time.c:201: Test failed: RtlQueryPerformanceCounter 588260569 and rdtscp 588260568 are out of order
=== w1064_2qxl (64 bit report) ===
ntdll: time.c:201: Test failed: RtlQueryPerformanceCounter 692368387 and rdtscp 692368386 are out of order
=== w10pro64_ar (64 bit report) ===
ntdll: time.c:201: Test failed: RtlQueryPerformanceCounter 643900402 and rdtscp 643900401 are out of order
=== w10pro64_ja (64 bit report) ===
ntdll: time.c:201: Test failed: RtlQueryPerformanceCounter 590879244 and rdtscp 590879243 are out of order
So close... :'(
Hi,
While running your changed tests, I think I found new failures. Being a bot and all I'm not very good at pattern recognition, so I might be wrong, but could you please double-check?
Full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=86772
Your paranoid android.
=== w864 (32 bit report) ===
ntdll: time.c:252: Test failed: USD InterruptTime / RtlQueryUnbiasedInterruptTime are out of order 157ad58bccc8f 182216aa0
=== w864 (64 bit report) ===
ntdll: time.c:252: Test failed: USD InterruptTime / RtlQueryUnbiasedInterruptTime are out of order 157ad58649653 181c93464