[PATCH 1/2] msvcrt: Write memory forward in memset.
Instead of going backward, which breaks the Linux kernel transparent huge pages allocation assumptions. This can be reproduced by calling memset on large, newly allocated, memory regions. Signed-off-by: Rémi Bernon <rbernon(a)codeweavers.com> --- dlls/msvcrt/string.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c index 5655fbfe68a..48d44d3b72e 100644 --- a/dlls/msvcrt/string.c +++ b/dlls/msvcrt/string.c @@ -2857,13 +2857,14 @@ void * __cdecl memcpy(void *dst, const void *src, size_t n) static inline void memset_aligned_32(unsigned char *d, uint64_t v, size_t n) { - while (n >= 32) - { - *(uint64_t *)(d + n - 32) = v; - *(uint64_t *)(d + n - 24) = v; - *(uint64_t *)(d + n - 16) = v; - *(uint64_t *)(d + n - 8) = v; - n -= 32; + unsigned char *end = d + n; + while (d < end) + { + *(uint64_t *)(d + 0) = v; + *(uint64_t *)(d + 8) = v; + *(uint64_t *)(d + 16) = v; + *(uint64_t *)(d + 24) = v; + d += 32; } } -- 2.33.0
Although less frequently used, heap allocations are zeroed out with ntdll memset and they don't benefit from the msvcrt memset optimisation. Signed-off-by: Rémi Bernon <rbernon(a)codeweavers.com> --- dlls/ntdll/string.c | 66 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/dlls/ntdll/string.c b/dlls/ntdll/string.c index f1cea6caa83..cd4f3c6a8a2 100644 --- a/dlls/ntdll/string.c +++ b/dlls/ntdll/string.c @@ -25,6 +25,7 @@ #include <stdlib.h> #include <stdio.h> #include <string.h> +#include <stdint.h> #include "windef.h" #include "winbase.h" @@ -140,13 +141,72 @@ void * __cdecl memmove( void *dst, const void *src, size_t n ) } +static inline void memset_aligned_32( unsigned char *d, uint64_t v, size_t n ) +{ + unsigned char *end = d + n; + while (d < end) + { + *(uint64_t *)(d + 0) = v; + *(uint64_t *)(d + 8) = v; + *(uint64_t *)(d + 16) = v; + *(uint64_t *)(d + 24) = v; + d += 32; + } +} + /********************************************************************* * memset (NTDLL.@) */ -void * __cdecl memset( void *dst, int c, size_t n ) +void *__cdecl memset( void *dst, int c, size_t n ) { - volatile unsigned char *d = dst; /* avoid gcc optimizations */ - while (n--) *d++ = c; + typedef uint64_t DECLSPEC_ALIGN(1) unaligned_ui64; + typedef uint32_t DECLSPEC_ALIGN(1) unaligned_ui32; + typedef uint16_t DECLSPEC_ALIGN(1) unaligned_ui16; + + uint64_t v = 0x101010101010101ull * (unsigned char)c; + unsigned char *d = (unsigned char *)dst; + size_t a = 0x20 - ((uintptr_t)d & 0x1f); + + if (n >= 16) + { + *(unaligned_ui64 *)(d + 0) = v; + *(unaligned_ui64 *)(d + 8) = v; + *(unaligned_ui64 *)(d + n - 16) = v; + *(unaligned_ui64 *)(d + n - 8) = v; + if (n <= 32) return dst; + *(unaligned_ui64 *)(d + 16) = v; + *(unaligned_ui64 *)(d + 24) = v; + *(unaligned_ui64 *)(d + n - 32) = v; + *(unaligned_ui64 *)(d + n - 24) = v; + if (n <= 64) return dst; + + n = (n - a) & ~0x1f; + memset_aligned_32( d + a, v, n ); + return dst; + } + if (n >= 8) + { + *(unaligned_ui64 *)d = v; + *(unaligned_ui64 *)(d + n - 8) = v; + return dst; + } + if (n >= 4) + { + *(unaligned_ui32 *)d = v; + *(unaligned_ui32 *)(d + n - 4) = v; + return dst; + } + if (n >= 2) + { + *(unaligned_ui16 *)d = v; + *(unaligned_ui16 *)(d + n - 2) = v; + return dst; + } + if (n >= 1) + { + *(uint8_t *)d = v; + return dst; + } return dst; } -- 2.33.0
participants (2)
-
Piotr Caban -
Rémi Bernon