From: Alexandre Julliard julliard@winehq.org
--- dlls/msvcrt/math.c | 257 -------------------------------------- dlls/msvcrt/msvcrt.spec | 2 - libs/musl/src/math/fma.c | 26 ++-- libs/musl/src/math/fmaf.c | 2 +- 4 files changed, 16 insertions(+), 271 deletions(-)
diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 2685e34809b..8d4ed01c02f 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -3310,263 +3310,6 @@ double CDECL floor( double x ) return u.f; }
-/********************************************************************* - * fma (MSVCRT.@) - * - * Copied from musl: src/math/fma.c - */ -struct fma_num -{ - UINT64 m; - int e; - int sign; -}; - -static struct fma_num normalize(double x) -{ - UINT64 ix = *(UINT64*)&x; - int e = ix >> 52; - int sign = e & 0x800; - struct fma_num ret; - - e &= 0x7ff; - if (!e) { - x *= 0x1p63; - ix = *(UINT64*)&x; - e = ix >> 52 & 0x7ff; - e = e ? e - 63 : 0x800; - } - ix &= (1ull << 52) - 1; - ix |= 1ull << 52; - ix <<= 1; - e -= 0x3ff + 52 + 1; - - ret.m = ix; - ret.e = e; - ret.sign = sign; - return ret; -} - -static void mul(UINT64 *hi, UINT64 *lo, UINT64 x, UINT64 y) -{ - UINT64 t1, t2, t3; - UINT64 xlo = (UINT32)x, xhi = x >> 32; - UINT64 ylo = (UINT32)y, yhi = y >> 32; - - t1 = xlo * ylo; - t2 = xlo * yhi + xhi * ylo; - t3 = xhi * yhi; - *lo = t1 + (t2 << 32); - *hi = t3 + (t2 >> 32) + (t1 > *lo); -} - -double CDECL fma( double x, double y, double z ) -{ - int e, d, sign, samesign, nonzero; - UINT64 rhi, rlo, zhi, zlo; - struct fma_num nx, ny, nz; - double r; - INT64 i; - - /* normalize so top 10bits and last bit are 0 */ - nx = normalize(x); - ny = normalize(y); - nz = normalize(z); - - if (nx.e >= 0x7ff - 0x3ff - 52 - 1 || ny.e >= 0x7ff - 0x3ff - 52 - 1) { - r = x * y + z; - if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(r)) *_errno() = EDOM; - return r; - } - if (nz.e >= 0x7ff - 0x3ff - 52 - 1) { - if (nz.e > 0x7ff - 0x3ff - 52 - 1) {/* z==0 */ - r = x * y + z; - if (!isnan(x) && !isnan(y) && isnan(r)) *_errno() = EDOM; - return r; - } - return z; - } - - /* mul: r = x*y */ - mul(&rhi, &rlo, nx.m, ny.m); - /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */ - - /* align exponents */ - e = nx.e + ny.e; - d = nz.e - e; - /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */ - if (d > 0) { - if (d < 64) { - zlo = nz.m << d; - zhi = nz.m >> (64 - d); - } else { - zlo = 0; - zhi = nz.m; - e = nz.e - 64; - d -= 64; - if (d < 64 && d) { - rlo = rhi << (64 - d) | rlo >> d | !!(rlo << (64 - d)); - rhi = rhi >> d; - } else if (d) { - rlo = 1; - rhi = 0; - } - } - } else { - zhi = 0; - d = -d; - if (d == 0) { - zlo = nz.m; - } else if (d < 64) { - zlo = nz.m >> d | !!(nz.m << (64 - d)); - } else { - zlo = 1; - } - } - - /* add */ - sign = nx.sign ^ ny.sign; - samesign = !(sign ^ nz.sign); - nonzero = 1; - if (samesign) { - /* r += z */ - rlo += zlo; - rhi += zhi + (rlo < zlo); - } else { - /* r -= z */ - UINT64 t = rlo; - rlo -= zlo; - rhi = rhi - zhi - (t < rlo); - if (rhi >> 63) { - rlo = -rlo; - rhi = -rhi - !!rlo; - sign = !sign; - } - nonzero = !!rhi; - } - - /* set rhi to top 63bit of the result (last bit is sticky) */ - if (nonzero) { - e += 64; - if (rhi >> 32) { - BitScanReverse((DWORD*)&d, rhi >> 32); - d = 31 - d - 1; - } else { - BitScanReverse((DWORD*)&d, rhi); - d = 63 - d - 1; - } - /* note: d > 0 */ - rhi = rhi << d | rlo >> (64 - d) | !!(rlo << d); - } else if (rlo) { - if (rlo >> 32) { - BitScanReverse((DWORD*)&d, rlo >> 32); - d = 31 - d - 1; - } else { - BitScanReverse((DWORD*)&d, rlo); - d = 63 - d - 1; - } - if (d < 0) - rhi = rlo >> 1 | (rlo & 1); - else - rhi = rlo << d; - } else { - /* exact +-0 */ - return x * y + z; - } - e -= d; - - /* convert to double */ - i = rhi; /* i is in [1<<62,(1<<63)-1] */ - if (sign) - i = -i; - r = i; /* |r| is in [0x1p62,0x1p63] */ - - if (e < -1022 - 62) { - /* result is subnormal before rounding */ - if (e == -1022 - 63) { - double c = 0x1p63; - if (sign) - c = -c; - if (r == c) { - /* min normal after rounding, underflow depends - on arch behaviour which can be imitated by - a double to float conversion */ - float fltmin = 0x0.ffffff8p-63 * FLT_MIN * r; - return DBL_MIN / FLT_MIN * fltmin; - } - /* one bit is lost when scaled, add another top bit to - only round once at conversion if it is inexact */ - if (rhi << 53) { - double tiny; - - i = rhi >> 1 | (rhi & 1) | 1ull << 62; - if (sign) - i = -i; - r = i; - r = 2 * r - c; /* remove top bit */ - - /* raise underflow portably, such that it - cannot be optimized away */ - tiny = DBL_MIN / FLT_MIN * r; - r += (double)(tiny * tiny) * (r - r); - } - } else { - /* only round once when scaled */ - d = 10; - i = (rhi >> d | !!(rhi << (64 - d))) << d; - if (sign) - i = -i; - r = i; - } - } - return scalbn(r, e); -} - -/********************************************************************* - * fmaf (MSVCRT.@) - * - * Copied from musl: src/math/fmaf.c - */ -float CDECL fmaf( float x, float y, float z ) -{ - union { double f; UINT64 i; } u; - double xy, err; - int e, neg; - - xy = (double)x * y; - u.f = xy + z; - e = u.i>>52 & 0x7ff; - /* Common case: The double precision result is fine. */ - if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */ - e == 0x7ff || /* NaN */ - (u.f - xy == z && u.f - z == xy) || /* exact */ - (_controlfp(0, 0) & _MCW_RC) != _RC_NEAR) /* not round-to-nearest */ - { - if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(u.f)) *_errno() = EDOM; - - /* underflow may not be raised correctly, example: - fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) */ - if (e < 0x3ff-126 && e >= 0x3ff-149 && _statusfp() & _SW_INEXACT) - fp_barrierf((float)u.f * (float)u.f); - return u.f; - } - - /* - * If result is inexact, and exactly halfway between two float values, - * we need to adjust the low-order bit in the direction of the error. - */ - neg = u.i >> 63; - if (neg == (z > xy)) - err = xy - u.f + z; - else - err = z - u.f + xy; - if (neg == (err < 0)) - u.i++; - else - u.i--; - return u.f; -} - #if defined(__i386__) || defined(__x86_64__) static void _setfp_sse( unsigned int *cw, unsigned int cw_mask, unsigned int *sw, unsigned int sw_mask ) diff --git a/dlls/msvcrt/msvcrt.spec b/dlls/msvcrt/msvcrt.spec index f339757438f..4d4228cc819 100644 --- a/dlls/msvcrt/msvcrt.spec +++ b/dlls/msvcrt/msvcrt.spec @@ -1297,8 +1297,6 @@ @ cdecl fgetws(ptr long ptr) @ cdecl floor(double) @ cdecl -arch=!i386 floorf(float) -@ cdecl fma(double double double) -@ cdecl -arch=!i386 fmaf(float float float) @ cdecl fmod(double double) @ cdecl -arch=!i386 fmodf(float float) @ cdecl fopen(str str) diff --git a/libs/musl/src/math/fma.c b/libs/musl/src/math/fma.c index 9639756a8f3..0de94002400 100644 --- a/libs/musl/src/math/fma.c +++ b/libs/musl/src/math/fma.c @@ -58,19 +58,23 @@ static void mul(uint64_t *hi, uint64_t *lo, uint64_t x, uint64_t y)
double __cdecl fma(double x, double y, double z) { - #pragma STDC FENV_ACCESS ON - /* normalize so top 10bits and last bit are 0 */ struct num nx, ny, nz; nx = normalize(x); ny = normalize(y); nz = normalize(z);
- if (nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN) - return x*y + z; + if (nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN) { + double r = x * y + z; + if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(r)) errno = EDOM; + return r; + } if (nz.e >= ZEROINFNAN) { - if (nz.e > ZEROINFNAN) /* z==0 */ - return x*y + z; + if (nz.e > ZEROINFNAN) { /* z==0 */ + double r = x * y + z; + if (!isnan(x) && !isnan(y) && isnan(r)) errno = EDOM; + return r; + } return z; }
@@ -86,7 +90,7 @@ double __cdecl fma(double x, double y, double z) if (d > 0) { if (d < 64) { zlo = nz.m<<d; - zhi = nz.m>>64-d; + zhi = nz.m>>(64-d); } else { zlo = 0; zhi = nz.m; @@ -94,7 +98,7 @@ double __cdecl fma(double x, double y, double z) d -= 64; if (d == 0) { } else if (d < 64) { - rlo = rhi<<64-d | rlo>>d | !!(rlo<<64-d); + rlo = rhi<<(64-d) | rlo>>d | !!(rlo<<(64-d)); rhi = rhi>>d; } else { rlo = 1; @@ -107,7 +111,7 @@ double __cdecl fma(double x, double y, double z) if (d == 0) { zlo = nz.m; } else if (d < 64) { - zlo = nz.m>>d | !!(nz.m<<64-d); + zlo = nz.m>>d | !!(nz.m<<(64-d)); } else { zlo = 1; } @@ -139,7 +143,7 @@ double __cdecl fma(double x, double y, double z) e += 64; d = a_clz_64(rhi)-1; /* note: d > 0 */ - rhi = rhi<<d | rlo>>64-d | !!(rlo<<d); + rhi = rhi<<d | rlo>>(64-d) | !!(rlo<<d); } else if (rlo) { d = a_clz_64(rlo)-1; if (d < 0) @@ -190,7 +194,7 @@ double __cdecl fma(double x, double y, double z) } else { /* only round once when scaled */ d = 10; - i = ( rhi>>d | !!(rhi<<64-d) ) << d; + i = ( rhi>>d | !!(rhi<<(64-d)) ) << d; if (sign) i = -i; r = i; diff --git a/libs/musl/src/math/fmaf.c b/libs/musl/src/math/fmaf.c index e4c3847c0a1..3e4636fd204 100644 --- a/libs/musl/src/math/fmaf.c +++ b/libs/musl/src/math/fmaf.c @@ -39,7 +39,6 @@ */ float __cdecl fmaf(float x, float y, float z) { - #pragma STDC FENV_ACCESS ON double xy, result; union {double f; uint64_t i;} u; int e; @@ -54,6 +53,7 @@ float __cdecl fmaf(float x, float y, float z) (result - xy == z && result - z == xy) || /* exact */ fegetround() != FE_TONEAREST) /* not round-to-nearest */ { + if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(u.f)) errno = EDOM; /* underflow may not be raised correctly, example: fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f)