[PATCH] msvcrt: Faster memcmp(). - wine-devel

6 Apr 2022

Signed-off-by: Jan Sikorski jsikorski@codeweavers.com
---
 dlls/msvcrt/string.c | 63 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 59 insertions(+), 4 deletions(-)

diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c
index 3b352ac0bf2..7a753fbcd21 100644
--- a/dlls/msvcrt/string.c
+++ b/dlls/msvcrt/string.c
@@ -2675,10 +2675,7 @@ int CDECL I10_OUTPUT(MSVCRT__LDOUBLE ld80, int prec, int flag, struct _I10_OUTPU
 }
 #undef I10_OUTPUT_MAX_PREC
-/*********************************************************************
- *                  memcmp (MSVCRT.@)
- */
-int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n)
+static inline int memcmp_unaligned(const void *ptr1, const void *ptr2, size_t n)
 {
     const unsigned char *p1, *p2;
@@ -2690,6 +2687,64 @@ int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n)
     return 0;
 }
+static int memcmp_aligned(const void *p1, const void *p2, size_t size)
+{
+    const unsigned char *first = p1, *second = p2;
+    const size_t block_size = sizeof(size_t);
+
+    size_t remainder = size & (block_size - 1);
+    size_t block_count = size / block_size;
+
+    while (block_count-- > 0)
+    {
+        size_t value_1 = *(size_t *)first;
+        size_t value_2 = *(size_t *)second;
+
+        if (value_1 != value_2)
+            return memcmp_unaligned(first, second, block_size);
+
+        first += block_size;
+        second += block_size;
+    }
+
+    return memcmp_unaligned(first, second, remainder);
+}
+
+/*********************************************************************
+ *                  memcmp (MSVCRT.@)
+ */
+int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n)
+{
+    const size_t block_size = sizeof(size_t);
+    const unsigned char *p1 = ptr1, *p2 = ptr2;
+    size_t align;
+    int result;
+
+    if (n < block_size)
+        return memcmp_unaligned(p1, p2, n);
+
+    align = -(uintptr_t)p1 & (block_size - 1);
+
+    if ((result = memcmp_unaligned(p1, p2, align)))
+        return result;
+
+    p1 += align;
+    p2 += align;
+    n  -= align;
+
+#if defined(__i386__) || defined(__x86_64__)
+    return memcmp_aligned(p1, p2, n);
+#else
+    if (!((uintptr_t)p2 & (block_size - 1)))
+    {
+        result = memcmp_aligned(p1, p2, n);
+        return result;
+    }
+
+    return memcmp_unaligned(p1, p2, n);
+#endif
+}
+
 #if defined(__i386__) || defined(__x86_64__)
#ifdef __i386__
-- 
2.32.0