Below are results of running "the big keybench" benchmark of Xonotic under "perf". They're negligible, still, it might make more difference for slower machines or distinct workloads.
branch-misses: vanilla-wine | patched-wine | improvement ------------------------------|------------ 1,757,439,276 | 1,749,075,739 | -8,363,537 1,750,670,918 | 1,746,169,971 | -4,500,947 1,746,488,762 | 1,748,569,739 | 2,080,977 1,753,328,811 | 1,739,515,555 | -13,813,256
Command line example for testing: export WINEPREFIX=~/.wineTESTING; perf stat -B -e cache-misses,branch-misses,faults ~/Projects/wine/test-build/loader/wine ./xonotic-x86.exe -benchmark demos/the-big-keybench
Signed-off-by: Konstantin Kharlamov Hi-Angel@yandex.ru --- include/windef.h | 9 +++++++++ include/wine/debug.h | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/include/windef.h b/include/windef.h index f9a570d9cd..6d1c348d47 100644 --- a/include/windef.h +++ b/include/windef.h @@ -35,6 +35,15 @@ extern "C" { #endif
+/* Branch hints for performance critical code */ +#ifdef __GNUC__ +#define __likely(expr) (__builtin_expect(expr, 1)) +#define __unlikely(expr) (__builtin_expect(expr, 0)) +#else +#define __likely(expr) (expr) +#define __unlikely(expr) (expr) +#endif + /* Calling conventions definitions */
#if (defined(__x86_64__) || defined(__powerpc64__) || defined(__aarch64__)) && !defined(_WIN64) diff --git a/include/wine/debug.h b/include/wine/debug.h index e9ec81c623..e13376d6db 100644 --- a/include/wine/debug.h +++ b/include/wine/debug.h @@ -58,7 +58,7 @@ struct __wine_debug_channel };
#ifndef WINE_NO_TRACE_MSGS -# define __WINE_GET_DEBUGGING_TRACE(dbch) ((dbch)->flags & (1 << __WINE_DBCL_TRACE)) +# define __WINE_GET_DEBUGGING_TRACE(dbch) __unlikely((dbch)->flags & (1 << __WINE_DBCL_TRACE)) # define WINE_TRACE __WINE_DPRINTF(_TRACE,__wine_dbch___default) # define WINE_TRACE_(ch) __WINE_DPRINTF(_TRACE,&__wine_dbch_##ch) #else