Signed-off-by: Guillaume Charifi guillaume.charifi@sfr.fr --- dlls/msvcrt/string.c | 63 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+)
diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c index 4d09405094d..6d1500cb194 100644 --- a/dlls/msvcrt/string.c +++ b/dlls/msvcrt/string.c @@ -2527,6 +2527,7 @@ int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n) __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t") \ "popq " SRC_REG "\n\t" \ __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t") + #endif
void * __cdecl sse2_memmove(void *dst, const void *src, size_t n); @@ -2732,6 +2733,64 @@ __ASM_GLOBAL_FUNC( sse2_memmove, MEMMOVE_CLEANUP "ret" )
+#undef DEST_REG +#undef SRC_REG +#undef LEN_REG +#undef TMP_REG + +#ifdef __i386__ + +#define DEST_REG "%edi" +#define SRC_REG "%eax" +#define LEN_REG "%ecx" +#define TMP_REG + +#define MEMSET_INIT \ + "pushl " DEST_REG "\n\t" \ + __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \ + "movl 8(%esp), " DEST_REG "\n\t" \ + "movl 12(%esp), " SRC_REG "\n\t" \ + "movl 16(%esp), " LEN_REG "\n\t" + +#define MEMSET_CLEANUP \ + "movl 8(%esp), %eax\n\t" \ + "popl " DEST_REG "\n\t" \ + __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t") + +#else + +#define DEST_REG "%rdi" +#define SRC_REG "%rax" +#define LEN_REG "%rcx" +#define TMP_REG "%r9" + +#define MEMSET_INIT \ + "pushq " DEST_REG "\n\t" \ + __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t") \ + "movq %rcx, " DEST_REG "\n\t" \ + "movq %rdx, " SRC_REG "\n\t" \ + "movq %r8, " LEN_REG "\n\t" \ + "movq " DEST_REG ", " TMP_REG "\n\t" + +#define MEMSET_CLEANUP \ + "movq " TMP_REG ", %rax\n\t" \ + "popq " DEST_REG "\n\t" \ + __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t") + +#endif + +void * __cdecl ermsb_memset(void *dst, int c, size_t n); +__ASM_GLOBAL_FUNC( ermsb_memset, + MEMSET_INIT + "rep stosb\n\t" + MEMSET_CLEANUP + "ret" ) + +#undef DEST_REG +#undef SRC_REG +#undef LEN_REG +#undef TMP_REG + #endif
/********************************************************************* @@ -2860,9 +2919,13 @@ void * __cdecl memcpy(void *dst, const void *src, size_t n) */ void* __cdecl memset(void *dst, int c, size_t n) { +#if defined(__i386__) || defined(__x86_64__) + return ermsb_memset(dst, c, n); +#else volatile unsigned char *d = dst; /* avoid gcc optimizations */ while (n--) *d++ = c; return dst; +#endif }
/*********************************************************************
Hi Guillaume,
The "rep stosb" can't be used unconditionally since it's very slow on some processors.
RĂ©mi sent a similar patch that adds the checks, see: https://source.winehq.org/patches/data/214107 He also improved the C version of the function: https://source.winehq.org/patches/data/214106
Is the "improved C" version good enough for your use case?
Thanks, Piotr