From bd95150f2ef410be8be1438ad77ede7fe70a4483 Mon Sep 17 00:00:00 2001 From: Jan Sikorski Date: Tue, 19 Apr 2022 15:53:46 +0200 Subject: [PATCH] msvcrt: Faster memcmp(). To: wine-devel --- dlls/msvcrt/string.c | 55 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c index 3b352ac0bf2..181a161481c 100644 --- a/dlls/msvcrt/string.c +++ b/dlls/msvcrt/string.c @@ -2675,21 +2675,64 @@ int CDECL I10_OUTPUT(MSVCRT__LDOUBLE ld80, int prec, int flag, struct _I10_OUTPU } #undef I10_OUTPUT_MAX_PREC -/********************************************************************* - * memcmp (MSVCRT.@) - */ -int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n) +static inline int memcmp_bytes(const void *ptr1, const void *ptr2, size_t n) { const unsigned char *p1, *p2; for (p1 = ptr1, p2 = ptr2; n; n--, p1++, p2++) { - if (*p1 < *p2) return -1; - if (*p1 > *p2) return 1; + if (*p1 != *p2) + return *p1 > *p2 ? 1 : -1; } return 0; } +static inline int memcmp_blocks(const void *ptr1, const void *ptr2, size_t size) +{ + typedef uint64_t DECLSPEC_ALIGN(1) unaligned_ui64; + + const uint64_t *p1 = ptr1; + const unaligned_ui64 *p2 = ptr2; + size_t remainder = size & (sizeof(uint64_t) - 1); + size_t block_count = size / sizeof(uint64_t); + + while (block_count) + { + if (*p1 != *p2) + return memcmp_bytes(p1, p2, sizeof(uint64_t)); + + p1++; + p2++; + block_count--; + } + + return memcmp_bytes(p1, p2, remainder); +} + +/********************************************************************* + * memcmp (MSVCRT.@) + */ +int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n) +{ + const unsigned char *p1 = ptr1, *p2 = ptr2; + size_t align; + int result; + + if (n < sizeof(uint64_t)) + return memcmp_bytes(p1, p2, n); + + align = -(size_t)p1 & (sizeof(uint64_t) - 1); + + if ((result = memcmp_bytes(p1, p2, align))) + return result; + + p1 += align; + p2 += align; + n -= align; + + return memcmp_blocks(p1, p2, n); +} + #if defined(__i386__) || defined(__x86_64__) #ifdef __i386__ -- 2.35.1