Signed-off-by: Jan Sikorski jsikorski@codeweavers.com --- v3: Piotr's reviewed variant. I don't know how well compilers will handle unaligned access on archs that don't natively support it. One thing to perhaps look out for it to not have the compiler resort to faulting with software fixup..
-- v2: msvcrt: Faster memcmp().
From: Jan Sikorski jsikorski@codeweavers.com
Signed-off-by: Jan Sikorski jsikorski@codeweavers.com Signed-off-by: Alexandre Julliard julliard@winehq.org --- dlls/msvcrt/string.c | 55 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 6 deletions(-)
diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c index 3b352ac0bf2..181a161481c 100644 --- a/dlls/msvcrt/string.c +++ b/dlls/msvcrt/string.c @@ -2675,21 +2675,64 @@ int CDECL I10_OUTPUT(MSVCRT__LDOUBLE ld80, int prec, int flag, struct _I10_OUTPU } #undef I10_OUTPUT_MAX_PREC
-/********************************************************************* - * memcmp (MSVCRT.@) - */ -int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n) +static inline int memcmp_bytes(const void *ptr1, const void *ptr2, size_t n) { const unsigned char *p1, *p2;
for (p1 = ptr1, p2 = ptr2; n; n--, p1++, p2++) { - if (*p1 < *p2) return -1; - if (*p1 > *p2) return 1; + if (*p1 != *p2) + return *p1 > *p2 ? 1 : -1; } return 0; }
+static inline int memcmp_blocks(const void *ptr1, const void *ptr2, size_t size) +{ + typedef uint64_t DECLSPEC_ALIGN(1) unaligned_ui64; + + const uint64_t *p1 = ptr1; + const unaligned_ui64 *p2 = ptr2; + size_t remainder = size & (sizeof(uint64_t) - 1); + size_t block_count = size / sizeof(uint64_t); + + while (block_count) + { + if (*p1 != *p2) + return memcmp_bytes(p1, p2, sizeof(uint64_t)); + + p1++; + p2++; + block_count--; + } + + return memcmp_bytes(p1, p2, remainder); +} + +/********************************************************************* + * memcmp (MSVCRT.@) + */ +int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n) +{ + const unsigned char *p1 = ptr1, *p2 = ptr2; + size_t align; + int result; + + if (n < sizeof(uint64_t)) + return memcmp_bytes(p1, p2, n); + + align = -(size_t)p1 & (sizeof(uint64_t) - 1); + + if ((result = memcmp_bytes(p1, p2, align))) + return result; + + p1 += align; + p2 += align; + n -= align; + + return memcmp_blocks(p1, p2, n); +} + #if defined(__i386__) || defined(__x86_64__)
#ifdef __i386__
This merge request was accepted by Alexandre Julliard.