For intermediate sizes.
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/msvcrt/string.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+)
diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c index b8a5cc89663..6fb269e4185 100644 --- a/dlls/msvcrt/string.c +++ b/dlls/msvcrt/string.c @@ -2907,6 +2907,27 @@ __ASM_GLOBAL_FUNC( erms_memset_aligned_32, "stosb\n\t" MEMSET_RET )
+void *__cdecl sse2_memset_aligned_32(unsigned char *d, unsigned int c, size_t n, void *ret); +__ASM_GLOBAL_FUNC( sse2_memset_aligned_32, + MEMSET_INIT + "movd " VAL_REG ", %xmm0\n\t" + "pshufd $0, %xmm0, %xmm0\n\t" + "test $0x20, " LEN_REG "\n\t" + "je 1f\n\t" + "sub $0x20, " LEN_REG "\n\t" + "movdqa %xmm0, 0x00(" DEST_REG ", " LEN_REG ")\n\t" + "movdqa %xmm0, 0x10(" DEST_REG ", " LEN_REG ")\n\t" + "je 2f\n\t" + "1:\n\t" + "sub $0x40, " LEN_REG "\n\t" + "movdqa %xmm0, 0x00(" DEST_REG ", " LEN_REG ")\n\t" + "movdqa %xmm0, 0x10(" DEST_REG ", " LEN_REG ")\n\t" + "movdqa %xmm0, 0x20(" DEST_REG ", " LEN_REG ")\n\t" + "movdqa %xmm0, 0x30(" DEST_REG ", " LEN_REG ")\n\t" + "ja 1b\n\t" + "2:\n\t" + MEMSET_RET ) + #undef MEMSET_INIT #undef MEMSET_RET #undef DEST_REG @@ -2952,6 +2973,11 @@ void *__cdecl memset(void *dst, int c, size_t n) n = (n - a) & ~0x1f; #if defined(__i386__) || defined(__x86_64__) if (n >= 2048 && erms_supported) return erms_memset_aligned_32(d + a, v, n, dst); +#ifdef __i386__ + if (sse2_supported) return sse2_memset_aligned_32(d + a, v, n, dst); +#else + return sse2_memset_aligned_32(d + a, v, n, dst); +#endif #endif return memset_aligned_32(d + a, v, n, dst); }