Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=27594 Signed-off-by: Myah Caron qsniyg@protonmail.com --- I've only tested on x86 as I believe x64 is different enough to warrant a separate commit if the same issue is also present there.
Though I suspect the x87 control word might need the same treatment, I couldn't create a test case that displayed a requirement for this, so I've left it untouched. --- dlls/kernel32/tests/thread.c | 36 +++++++++++++++++++++++++----- dlls/msvcrt/math.c | 43 ++++++++++++++++++++---------------- 2 files changed, 55 insertions(+), 24 deletions(-)
diff --git a/dlls/kernel32/tests/thread.c b/dlls/kernel32/tests/thread.c index b476e44cfc..9b12b5043d 100644 --- a/dlls/kernel32/tests/thread.c +++ b/dlls/kernel32/tests/thread.c @@ -1829,6 +1829,19 @@ static inline unsigned long get_fpu_cw(void) #endif }
+static inline void set_fpu_cw(unsigned int cw, unsigned int sse) +{ +#if defined(__i386__) || defined(__x86_64__) +#ifdef _MSC_VER + __asm { fldcw [cw] } + __asm { ldmxcsr [sse] } +#else + __asm__ volatile ("fldcw %0" : : "m" (cw)); + __asm__ volatile ("ldmxcsr %0" : : "m" (sse)); +#endif +#endif +} + static DWORD WINAPI fpu_thread(void *param) { struct fpu_thread_ctx *ctx = param; @@ -1870,7 +1883,7 @@ static void test_thread_fpu_cw(void) { static const struct { unsigned int cw; unsigned long fpu_cw; unsigned long fpu_cw_broken; - } expected_cw[6] = + } expected_cw[7] = { #ifdef __i386__ { _MCW_EM | _PC_53, MAKELONG( 0x27f, 0x1f80 ) }, @@ -1878,27 +1891,30 @@ static void test_thread_fpu_cw(void) { _EM_INEXACT | _RC_CHOP | _PC_24, MAKELONG( 0xc60, 0x7000 ), MAKELONG( 0xc60, 0x1f80 ) }, { _MCW_EM | _PC_53, MAKELONG( 0x27f, 0x1f80 ) }, { _EM_INEXACT | _RC_CHOP | _PC_24, MAKELONG( 0xc60, 0x7000 ), MAKELONG( 0xc60, 0x1f80 ) }, - { _MCW_EM | _PC_53, MAKELONG( 0x27f, 0x1f80 ) } + { _MCW_EM | _PC_53, MAKELONG( 0x27f, 0x1f80 ) }, + { _MCW_EM | _PC_24 | _RC_DOWN, MAKELONG( 0x47f, 0x3fa1 ) } #elif defined(__x86_64__) { _MCW_EM | _PC_64, MAKELONG( 0x27f, 0x1f80 ) }, { _MCW_EM | _PC_64, MAKELONG( 0x27f, 0x1f80 ) }, { _EM_INEXACT | _RC_CHOP | _PC_64, MAKELONG( 0x27f, 0x7000 ) }, { _MCW_EM | _PC_64, MAKELONG( 0x27f, 0x1f80 ) }, { _EM_INEXACT | _RC_CHOP | _PC_64, MAKELONG( 0x27f, 0x7000 ) }, - { _MCW_EM | _PC_64, MAKELONG( 0x27f, 0x1f80 ) } + { _MCW_EM | _PC_64, MAKELONG( 0x27f, 0x1f80 ) }, + { 0xdeadbeef, 0xdeadbeef } #elif defined(__aarch64__) { _MCW_EM | _PC_64, 0 }, { _MCW_EM | _PC_64, 0 }, { _EM_INEXACT | _RC_CHOP | _PC_64, 0xc08f00 }, { _MCW_EM | _PC_64, 0 }, { _EM_INEXACT | _RC_CHOP | _PC_64, 0xc08f00 }, - { _MCW_EM | _PC_64, 0 } + { _MCW_EM | _PC_64, 0 }, + { 0xdeadbeef, 0xdeadbeef } #else { 0xdeadbeef, 0xdeadbeef } #endif }; unsigned int initial_cw, cw; - unsigned long fpu_cw; + unsigned long initial_fpu_cw, fpu_cw;
fpu_cw = get_fpu_cw(); initial_cw = _control87( 0, 0 ); @@ -1933,6 +1949,16 @@ static void test_thread_fpu_cw(void) fpu_cw = get_fpu_cw(); ok(cw == expected_cw[5].cw, "expected %#x got %#x\n", expected_cw[5].cw, cw); ok(fpu_cw == expected_cw[5].fpu_cw, "expected %#lx got %#lx\n", expected_cw[5].fpu_cw, fpu_cw); + +#ifdef __i386__ + initial_fpu_cw = get_fpu_cw(); + set_fpu_cw(0x47f, 0x3fa1); + cw = _control87( _RC_DOWN, _MCW_RC ); + fpu_cw = get_fpu_cw(); + ok(cw == expected_cw[6].cw, "expected %#x got %#x\n", expected_cw[6].cw, cw); + ok(fpu_cw == expected_cw[6].fpu_cw, "expected %#lx got %#lx\n", expected_cw[6].fpu_cw, fpu_cw); + set_fpu_cw(initial_fpu_cw & 0xffff, (initial_fpu_cw >> 16) & 0xffff); +#endif }
static const char manifest_dep[] = diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index eae917076f..d11bd53acd 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -1107,6 +1107,7 @@ int CDECL __control87_2( unsigned int newval, unsigned int mask, #ifdef __GNUC__ unsigned long fpword; unsigned int flags; + unsigned int old_flags;
if (x86_cw) { @@ -1196,29 +1197,33 @@ int CDECL __control87_2( unsigned int newval, unsigned int mask, TRACE( "sse2 flags=%08x newval=%08x mask=%08x\n", flags, newval, mask ); if (mask) { + old_flags = flags; flags = (flags & ~mask) | (newval & mask);
- /* Convert (masked) value back to fp word */ - fpword = 0; - if (flags & MSVCRT__EM_INVALID) fpword |= 0x80; - if (flags & MSVCRT__EM_DENORMAL) fpword |= 0x100; - if (flags & MSVCRT__EM_ZERODIVIDE) fpword |= 0x200; - if (flags & MSVCRT__EM_OVERFLOW) fpword |= 0x400; - if (flags & MSVCRT__EM_UNDERFLOW) fpword |= 0x800; - if (flags & MSVCRT__EM_INEXACT) fpword |= 0x1000; - switch (flags & MSVCRT__MCW_RC) + if (flags != old_flags) { - case MSVCRT__RC_UP|MSVCRT__RC_DOWN: fpword |= 0x6000; break; - case MSVCRT__RC_UP: fpword |= 0x4000; break; - case MSVCRT__RC_DOWN: fpword |= 0x2000; break; - } - switch (flags & MSVCRT__MCW_DN) - { - case MSVCRT__DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break; - case MSVCRT__DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break; - case MSVCRT__DN_FLUSH: fpword |= 0x8040; break; + /* Convert (masked) value back to fp word */ + fpword = 0; + if (flags & MSVCRT__EM_INVALID) fpword |= 0x80; + if (flags & MSVCRT__EM_DENORMAL) fpword |= 0x100; + if (flags & MSVCRT__EM_ZERODIVIDE) fpword |= 0x200; + if (flags & MSVCRT__EM_OVERFLOW) fpword |= 0x400; + if (flags & MSVCRT__EM_UNDERFLOW) fpword |= 0x800; + if (flags & MSVCRT__EM_INEXACT) fpword |= 0x1000; + switch (flags & MSVCRT__MCW_RC) + { + case MSVCRT__RC_UP|MSVCRT__RC_DOWN: fpword |= 0x6000; break; + case MSVCRT__RC_UP: fpword |= 0x4000; break; + case MSVCRT__RC_DOWN: fpword |= 0x2000; break; + } + switch (flags & MSVCRT__MCW_DN) + { + case MSVCRT__DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break; + case MSVCRT__DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break; + case MSVCRT__DN_FLUSH: fpword |= 0x8040; break; + } + __asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) ); } - __asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) ); } *sse2_cw = flags; } -- 2.26.2
I've just tested the application linked in the bug report again. Though it is related to the bug, and a step forward to solving it, this patch doesn't actually solve the problem itself. I was testing with the wrong msvcrt version.
‐‐‐‐‐‐‐ Original Message ‐‐‐‐‐‐‐ On Friday, July 3, 2020 11:34 AM, Myah Caron qsniyg@protonmail.com wrote:
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=27594 Signed-off-by: Myah Caron qsniyg@protonmail.com
I've only tested on x86 as I believe x64 is different enough to warrant a separate commit if the same issue is also present there.
Though I suspect the x87 control word might need the same treatment, I couldn't create a test case that displayed a requirement for this, so I've left it untouched.
dlls/kernel32/tests/thread.c | 36 +++++++++++++++++++++++++----- dlls/msvcrt/math.c | 43 ++++++++++++++++++++---------------- 2 files changed, 55 insertions(+), 24 deletions(-)
diff --git a/dlls/kernel32/tests/thread.c b/dlls/kernel32/tests/thread.c index b476e44cfc..9b12b5043d 100644 --- a/dlls/kernel32/tests/thread.c +++ b/dlls/kernel32/tests/thread.c @@ -1829,6 +1829,19 @@ static inline unsigned long get_fpu_cw(void) #endif }
+static inline void set_fpu_cw(unsigned int cw, unsigned int sse) +{ +#if defined(i386) || defined(x86_64) +#ifdef _MSC_VER
__asm { fldcw [cw] }
__asm { ldmxcsr [sse] } +#else
asm volatile ("fldcw %0" : : "m" (cw));
asm volatile ("ldmxcsr %0" : : "m" (sse)); +#endif +#endif +}
static DWORD WINAPI fpu_thread(void *param) { struct fpu_thread_ctx *ctx = param; @@ -1870,7 +1883,7 @@ static void test_thread_fpu_cw(void) { static const struct { unsigned int cw; unsigned long fpu_cw; unsigned long fpu_cw_broken;
} expected_cw[6] =
} expected_cw[7] = { #ifdef i386 { _MCW_EM | _PC_53, MAKELONG( 0x27f, 0x1f80 ) }, @@ -1878,27 +1891,30 @@ static void test_thread_fpu_cw(void) { _EM_INEXACT | _RC_CHOP | _PC_24, MAKELONG( 0xc60, 0x7000 ), MAKELONG( 0xc60, 0x1f80 ) }, { _MCW_EM | _PC_53, MAKELONG( 0x27f, 0x1f80 ) }, { _EM_INEXACT | _RC_CHOP | _PC_24, MAKELONG( 0xc60, 0x7000 ), MAKELONG( 0xc60, 0x1f80 ) },
{ _MCW_EM | _PC_53, MAKELONG( 0x27f, 0x1f80 ) }
{ _MCW_EM | _PC_53, MAKELONG( 0x27f, 0x1f80 ) },
{ _MCW_EM | _PC_24 | _RC_DOWN, MAKELONG( 0x47f, 0x3fa1 ) }
#elif defined(x86_64) { _MCW_EM | _PC_64, MAKELONG( 0x27f, 0x1f80 ) }, { _MCW_EM | _PC_64, MAKELONG( 0x27f, 0x1f80 ) }, { _EM_INEXACT | _RC_CHOP | _PC_64, MAKELONG( 0x27f, 0x7000 ) }, { _MCW_EM | _PC_64, MAKELONG( 0x27f, 0x1f80 ) }, { _EM_INEXACT | _RC_CHOP | _PC_64, MAKELONG( 0x27f, 0x7000 ) },
{ _MCW_EM | _PC_64, MAKELONG( 0x27f, 0x1f80 ) }
{ _MCW_EM | _PC_64, MAKELONG( 0x27f, 0x1f80 ) },
{ 0xdeadbeef, 0xdeadbeef }
#elif defined(aarch64) { _MCW_EM | _PC_64, 0 }, { _MCW_EM | _PC_64, 0 }, { _EM_INEXACT | _RC_CHOP | _PC_64, 0xc08f00 }, { _MCW_EM | _PC_64, 0 }, { _EM_INEXACT | _RC_CHOP | _PC_64, 0xc08f00 },
{ _MCW_EM | _PC_64, 0 }
{ _MCW_EM | _PC_64, 0 },
{ 0xdeadbeef, 0xdeadbeef }
#else { 0xdeadbeef, 0xdeadbeef } #endif }; unsigned int initial_cw, cw;
unsigned long fpu_cw;
unsigned long initial_fpu_cw, fpu_cw;
fpu_cw = get_fpu_cw(); initial_cw = _control87( 0, 0 ); @@ -1933,6 +1949,16 @@ static void test_thread_fpu_cw(void) fpu_cw = get_fpu_cw(); ok(cw == expected_cw[5].cw, "expected %#x got %#x\n", expected_cw[5].cw, cw); ok(fpu_cw == expected_cw[5].fpu_cw, "expected %#lx got %#lx\n", expected_cw[5].fpu_cw, fpu_cw);
+#ifdef i386
- initial_fpu_cw = get_fpu_cw();
- set_fpu_cw(0x47f, 0x3fa1);
- cw = _control87( _RC_DOWN, _MCW_RC );
- fpu_cw = get_fpu_cw();
- ok(cw == expected_cw[6].cw, "expected %#x got %#x\n", expected_cw[6].cw, cw);
- ok(fpu_cw == expected_cw[6].fpu_cw, "expected %#lx got %#lx\n", expected_cw[6].fpu_cw, fpu_cw);
- set_fpu_cw(initial_fpu_cw & 0xffff, (initial_fpu_cw >> 16) & 0xffff);
+#endif }
static const char manifest_dep[] = diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index eae917076f..d11bd53acd 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -1107,6 +1107,7 @@ int CDECL __control87_2( unsigned int newval, unsigned int mask, #ifdef GNUC unsigned long fpword; unsigned int flags;
unsigned int old_flags;
if (x86_cw) { @@ -1196,29 +1197,33 @@ int CDECL __control87_2( unsigned int newval, unsigned int mask, TRACE( "sse2 flags=%08x newval=%08x mask=%08x\n", flags, newval, mask ); if (mask) {
old_flags = flags; flags = (flags & ~mask) | (newval & mask);
/* Convert (masked) value back to fp word */
fpword = 0;
if (flags & MSVCRT__EM_INVALID) fpword |= 0x80;
if (flags & MSVCRT__EM_DENORMAL) fpword |= 0x100;
if (flags & MSVCRT__EM_ZERODIVIDE) fpword |= 0x200;
if (flags & MSVCRT__EM_OVERFLOW) fpword |= 0x400;
if (flags & MSVCRT__EM_UNDERFLOW) fpword |= 0x800;
if (flags & MSVCRT__EM_INEXACT) fpword |= 0x1000;
switch (flags & MSVCRT__MCW_RC)
if (flags != old_flags) {
case MSVCRT__RC_UP|MSVCRT__RC_DOWN: fpword |= 0x6000; break;
case MSVCRT__RC_UP: fpword |= 0x4000; break;
case MSVCRT__RC_DOWN: fpword |= 0x2000; break;
}
switch (flags & MSVCRT__MCW_DN)
{
case MSVCRT__DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
case MSVCRT__DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
case MSVCRT__DN_FLUSH: fpword |= 0x8040; break;
/* Convert (masked) value back to fp word */
fpword = 0;
if (flags & MSVCRT__EM_INVALID) fpword |= 0x80;
if (flags & MSVCRT__EM_DENORMAL) fpword |= 0x100;
if (flags & MSVCRT__EM_ZERODIVIDE) fpword |= 0x200;
if (flags & MSVCRT__EM_OVERFLOW) fpword |= 0x400;
if (flags & MSVCRT__EM_UNDERFLOW) fpword |= 0x800;
if (flags & MSVCRT__EM_INEXACT) fpword |= 0x1000;
switch (flags & MSVCRT__MCW_RC)
{
case MSVCRT__RC_UP|MSVCRT__RC_DOWN: fpword |= 0x6000; break;
case MSVCRT__RC_UP: fpword |= 0x4000; break;
case MSVCRT__RC_DOWN: fpword |= 0x2000; break;
}
switch (flags & MSVCRT__MCW_DN)
{
case MSVCRT__DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
case MSVCRT__DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
case MSVCRT__DN_FLUSH: fpword |= 0x8040; break;
}
__asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) ); }
__asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) ); } *sse2_cw = flags;
}
2.26.2