Signed-off-by: Jan Sikorski <jsikorski(a)codeweavers.com>
---
v2: Smarter unaligned implementation for non-x86 architectures.
Use uint64_t blocks instead of size_t.
Some renaming & clean up.
---
dlls/msvcrt/string.c | 99 ++++++++++++++++++++++++++++++++++++++++----
1 file changed, 90 insertions(+), 9 deletions(-)
diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c
index 3b352ac0bf2..7b42604c123 100644
--- a/dlls/msvcrt/string.c
+++ b/dlls/msvcrt/string.c
@@ -2675,10 +2675,13 @@ int CDECL I10_OUTPUT(MSVCRT__LDOUBLE ld80, int prec, int flag, struct _I10_OUTPU
}
#undef I10_OUTPUT_MAX_PREC
-/*********************************************************************
- * memcmp (MSVCRT.@)
- */
-int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n)
+#ifdef WORDS_BIGENDIAN
+# define MERGE(w1, sh1, w2, sh2) ((w1 << sh1) | (w2 >> sh2))
+#else
+# define MERGE(w1, sh1, w2, sh2) ((w1 >> sh1) | (w2 << sh2))
+#endif
+
+static inline int memcmp_bytes(const void *ptr1, const void *ptr2, size_t n)
{
const unsigned char *p1, *p2;
@@ -2690,6 +2693,89 @@ int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n)
return 0;
}
+static int memcmp_aligned(const uint64_t *p1, const uint64_t *p2, size_t size)
+{
+ const size_t block_size = sizeof(*p1);
+
+ size_t remainder = size & (block_size - 1);
+ size_t block_count = size / block_size;
+
+ while (block_count)
+ {
+ if (*p1 != *p2)
+ return memcmp_bytes(p1, p2, block_size);
+
+ p1++;
+ p2++;
+ block_count--;
+ }
+
+ return memcmp_bytes(p1, p2, remainder);
+}
+
+static int memcmp_unaligned(const uint64_t *aligned, const unsigned char *unaligned, int offset, size_t size)
+{
+ const size_t block_size = sizeof(*aligned);
+ size_t remainder = size & (block_size - 1);
+ size_t block_count = size / block_size;
+
+ int shift_prev = 8 * offset;
+ int shift_next = 8 * (block_size - offset);
+
+ const uint64_t *block = (const uint64_t *)(unaligned - offset);
+ uint64_t prev, next, merged;
+
+ prev = block[0];
+ while (block_count)
+ {
+ next = block[1];
+ merged = MERGE(prev, shift_prev, next, shift_next);
+ if (merged != *aligned)
+ return memcmp_bytes(aligned, &merged, block_size);
+
+ aligned++;
+ block++;
+ block_count--;
+
+ prev = next;
+ }
+
+ return memcmp_bytes(aligned, (const char *)block + offset, remainder);
+}
+
+/*********************************************************************
+ * memcmp (MSVCRT.@)
+ */
+int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n)
+{
+ const unsigned char *p1 = ptr1, *p2 = ptr2;
+ const size_t block_size = 8;
+ size_t align, offset;
+ int result;
+
+ if (n < block_size)
+ return memcmp_bytes(p1, p2, n);
+
+ align = -(uintptr_t)p1 & (block_size - 1);
+
+ if ((result = memcmp_bytes(p1, p2, align)))
+ return result;
+
+ p1 += align;
+ p2 += align;
+ n -= align;
+
+#if defined(__i386__) || defined(__x86_64__)
+ return memcmp_aligned((const uint64_t *)p1, (const uint64_t *)p2, n);
+#endif
+
+ offset = (uintptr_t)p2 & (block_size - 1);
+ if (!offset)
+ return memcmp_aligned((const uint64_t *)p1, (const uint64_t *)p2, n);
+
+ return memcmp_unaligned((const uint64_t *)p1, p2, offset, n);
+}
+
#if defined(__i386__) || defined(__x86_64__)
#ifdef __i386__
@@ -2946,11 +3032,6 @@ __ASM_GLOBAL_FUNC( sse2_memmove,
/*********************************************************************
* memmove (MSVCRT.@)
*/
-#ifdef WORDS_BIGENDIAN
-# define MERGE(w1, sh1, w2, sh2) ((w1 << sh1) | (w2 >> sh2))
-#else
-# define MERGE(w1, sh1, w2, sh2) ((w1 >> sh1) | (w2 << sh2))
-#endif
void * __cdecl memmove(void *dst, const void *src, size_t n)
{
#ifdef __x86_64__
--
2.32.0