Re: [PATCH] msvcrt: Avoid disallowed unaligned writes in memset on ARM

15 Sep 2021

On 9/15/21 10:27 PM, Martin Storsjo wrote:
...
From: Martin Storsjö martin@martin.st
This fixes a regression in memset on ARM since
7b17d7081512db52ef852705445762ac4016c29f.
ARM can do 64 bit writes with the STRD instruction, but that
instruction requires a 32 bit aligned address - while these stores
are unaligned.
Two consecutive stores to uint32_t* pointers can also be fused
into one single STRD, as a uint32_t* is supposed to be properly
aligned - therefore, do these stores as stores to volatile uint32_t*
to avoid fusing them.
Signed-off-by: Martin Storsjö martin@martin.st
dlls/msvcrt/string.c | 29 +++++++++++++++++++++++++++++
  1 file changed, 29 insertions(+)

diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c
index f2b1b4a5b11..bf491a91f40 100644
--- a/dlls/msvcrt/string.c
+++ b/dlls/msvcrt/string.c
@@ -2878,15 +2878,37 @@ void *__cdecl memset(void *dst, int c, size_t n)
  if (n >= 16)
  {

+#ifdef __arm__

   *(volatile uint32_t *)(d + 0) = v;


   *(volatile uint32_t *)(d + 4) = v;


   *(volatile uint32_t *)(d + 8) = v;


   *(volatile uint32_t *)(d + 12) = v;


   *(volatile uint32_t *)(d + n - 16) = v;


   *(volatile uint32_t *)(d + n - 12) = v;


   *(volatile uint32_t *)(d + n - 8) = v;


   *(volatile uint32_t *)(d + n - 4) = v;



+#else
          *(uint64_t *)(d + 0) = v;
          *(uint64_t *)(d + 8) = v;
          *(uint64_t *)(d + n - 16) = v;
          *(uint64_t *)(d + n - 8) = v;
+#endif
          if (n <= 32) return dst;
+#ifdef __arm__

   *(volatile uint32_t *)(d + 16) = v;


   *(volatile uint32_t *)(d + 20) = v;


   *(volatile uint32_t *)(d + 24) = v;


   *(volatile uint32_t *)(d + 28) = v;


   *(volatile uint32_t *)(d + n - 32) = v;


   *(volatile uint32_t *)(d + n - 28) = v;


   *(volatile uint32_t *)(d + n - 24) = v;


   *(volatile uint32_t *)(d + n - 20) = v;



+#else
          *(uint64_t *)(d + 16) = v;
          *(uint64_t *)(d + 24) = v;
          *(uint64_t *)(d + n - 32) = v;
          *(uint64_t *)(d + n - 24) = v;
+#endif
          if (n <= 64) return dst;
      n = (n - a) & ~0x1f;

@@ -2895,8 +2917,15 @@ void *__cdecl memset(void *dst, int c, size_t n)
      }
      if (n >= 8)
      {
+#ifdef __arm__

   *(volatile uint32_t *)d = v;


   *(volatile uint32_t *)(d + 4) = v;


   *(volatile uint32_t *)(d + n - 4) = v;


   *(volatile uint32_t *)(d + n - 8) = v;



+#else
          *(uint64_t *)d = v;
          *(uint64_t *)(d + n - 8) = v;
+#endif
          return dst;
      }
      if (n >= 4)
I'm confused that it causes trouble when I thought it could benefit more 
than just Intel architectures...
Maybe it could be made not too ugly with some macro to wrap the 64bit 
stores, defined differently for arm?
-- 
Rémi Bernon rbernon@codeweavers.com


    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

2004

2003

2002

2001

Re: [PATCH] msvcrt: Avoid disallowed unaligned writes in memset on ARM

Signed-off-by: Martin Storsjö martin@martin.st