Signed-off-by: Jan Sikorski jsikorski@codeweavers.com --- v2: Smarter unaligned implementation for non-x86 architectures. Use uint64_t blocks instead of size_t. Some renaming & clean up. --- dlls/msvcrt/string.c | 99 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 90 insertions(+), 9 deletions(-)
diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c index 3b352ac0bf2..7b42604c123 100644 --- a/dlls/msvcrt/string.c +++ b/dlls/msvcrt/string.c @@ -2675,10 +2675,13 @@ int CDECL I10_OUTPUT(MSVCRT__LDOUBLE ld80, int prec, int flag, struct _I10_OUTPU } #undef I10_OUTPUT_MAX_PREC
-/********************************************************************* - * memcmp (MSVCRT.@) - */ -int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n) +#ifdef WORDS_BIGENDIAN +# define MERGE(w1, sh1, w2, sh2) ((w1 << sh1) | (w2 >> sh2)) +#else +# define MERGE(w1, sh1, w2, sh2) ((w1 >> sh1) | (w2 << sh2)) +#endif + +static inline int memcmp_bytes(const void *ptr1, const void *ptr2, size_t n) { const unsigned char *p1, *p2;
@@ -2690,6 +2693,89 @@ int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n) return 0; }
+static int memcmp_aligned(const uint64_t *p1, const uint64_t *p2, size_t size) +{ + const size_t block_size = sizeof(*p1); + + size_t remainder = size & (block_size - 1); + size_t block_count = size / block_size; + + while (block_count) + { + if (*p1 != *p2) + return memcmp_bytes(p1, p2, block_size); + + p1++; + p2++; + block_count--; + } + + return memcmp_bytes(p1, p2, remainder); +} + +static int memcmp_unaligned(const uint64_t *aligned, const unsigned char *unaligned, int offset, size_t size) +{ + const size_t block_size = sizeof(*aligned); + size_t remainder = size & (block_size - 1); + size_t block_count = size / block_size; + + int shift_prev = 8 * offset; + int shift_next = 8 * (block_size - offset); + + const uint64_t *block = (const uint64_t *)(unaligned - offset); + uint64_t prev, next, merged; + + prev = block[0]; + while (block_count) + { + next = block[1]; + merged = MERGE(prev, shift_prev, next, shift_next); + if (merged != *aligned) + return memcmp_bytes(aligned, &merged, block_size); + + aligned++; + block++; + block_count--; + + prev = next; + } + + return memcmp_bytes(aligned, (const char *)block + offset, remainder); +} + +/********************************************************************* + * memcmp (MSVCRT.@) + */ +int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n) +{ + const unsigned char *p1 = ptr1, *p2 = ptr2; + const size_t block_size = 8; + size_t align, offset; + int result; + + if (n < block_size) + return memcmp_bytes(p1, p2, n); + + align = -(uintptr_t)p1 & (block_size - 1); + + if ((result = memcmp_bytes(p1, p2, align))) + return result; + + p1 += align; + p2 += align; + n -= align; + +#if defined(__i386__) || defined(__x86_64__) + return memcmp_aligned((const uint64_t *)p1, (const uint64_t *)p2, n); +#endif + + offset = (uintptr_t)p2 & (block_size - 1); + if (!offset) + return memcmp_aligned((const uint64_t *)p1, (const uint64_t *)p2, n); + + return memcmp_unaligned((const uint64_t *)p1, p2, offset, n); +} + #if defined(__i386__) || defined(__x86_64__)
#ifdef __i386__ @@ -2946,11 +3032,6 @@ __ASM_GLOBAL_FUNC( sse2_memmove, /********************************************************************* * memmove (MSVCRT.@) */ -#ifdef WORDS_BIGENDIAN -# define MERGE(w1, sh1, w2, sh2) ((w1 << sh1) | (w2 >> sh2)) -#else -# define MERGE(w1, sh1, w2, sh2) ((w1 >> sh1) | (w2 << sh2)) -#endif void * __cdecl memmove(void *dst, const void *src, size_t n) { #ifdef __x86_64__
Hi,
Sorry it took so long to review it.
I've done some tests and I don't think it makes sense to do the "merge" approach. I'm attaching a version of your patch that uses the same approach as you have used on x86/x86_64 on all platforms. If the patch looks OK please send it to wine-devel.
Notes about performance: - it has similar performance as your previous patch on x86/x86_64 - it's faster on ARM comparing to what's currently in wine - performance on ARM varies a lot depending on hardware capabilities
Thanks, Piotr