From: Alexandre Julliard julliard@winehq.org
--- dlls/msvcrt/math.c | 245 +-------------------------------------- dlls/msvcrt/msvcrt.spec | 2 +- libs/musl/src/math/exp.c | 28 ++--- 3 files changed, 16 insertions(+), 259 deletions(-)
diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index b44bc051453..ba89c8446ba 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -714,251 +714,16 @@ double CDECL atan( double x ) return sign ? -z : z; }
-/* Copied from musl: src/math/exp_data.c */ -static const UINT64 exp_T[] = { - 0x0ULL, 0x3ff0000000000000ULL, - 0x3c9b3b4f1a88bf6eULL, 0x3feff63da9fb3335ULL, - 0xbc7160139cd8dc5dULL, 0x3fefec9a3e778061ULL, - 0xbc905e7a108766d1ULL, 0x3fefe315e86e7f85ULL, - 0x3c8cd2523567f613ULL, 0x3fefd9b0d3158574ULL, - 0xbc8bce8023f98efaULL, 0x3fefd06b29ddf6deULL, - 0x3c60f74e61e6c861ULL, 0x3fefc74518759bc8ULL, - 0x3c90a3e45b33d399ULL, 0x3fefbe3ecac6f383ULL, - 0x3c979aa65d837b6dULL, 0x3fefb5586cf9890fULL, - 0x3c8eb51a92fdeffcULL, 0x3fefac922b7247f7ULL, - 0x3c3ebe3d702f9cd1ULL, 0x3fefa3ec32d3d1a2ULL, - 0xbc6a033489906e0bULL, 0x3fef9b66affed31bULL, - 0xbc9556522a2fbd0eULL, 0x3fef9301d0125b51ULL, - 0xbc5080ef8c4eea55ULL, 0x3fef8abdc06c31ccULL, - 0xbc91c923b9d5f416ULL, 0x3fef829aaea92de0ULL, - 0x3c80d3e3e95c55afULL, 0x3fef7a98c8a58e51ULL, - 0xbc801b15eaa59348ULL, 0x3fef72b83c7d517bULL, - 0xbc8f1ff055de323dULL, 0x3fef6af9388c8deaULL, - 0x3c8b898c3f1353bfULL, 0x3fef635beb6fcb75ULL, - 0xbc96d99c7611eb26ULL, 0x3fef5be084045cd4ULL, - 0x3c9aecf73e3a2f60ULL, 0x3fef54873168b9aaULL, - 0xbc8fe782cb86389dULL, 0x3fef4d5022fcd91dULL, - 0x3c8a6f4144a6c38dULL, 0x3fef463b88628cd6ULL, - 0x3c807a05b0e4047dULL, 0x3fef3f49917ddc96ULL, - 0x3c968efde3a8a894ULL, 0x3fef387a6e756238ULL, - 0x3c875e18f274487dULL, 0x3fef31ce4fb2a63fULL, - 0x3c80472b981fe7f2ULL, 0x3fef2b4565e27cddULL, - 0xbc96b87b3f71085eULL, 0x3fef24dfe1f56381ULL, - 0x3c82f7e16d09ab31ULL, 0x3fef1e9df51fdee1ULL, - 0xbc3d219b1a6fbffaULL, 0x3fef187fd0dad990ULL, - 0x3c8b3782720c0ab4ULL, 0x3fef1285a6e4030bULL, - 0x3c6e149289cecb8fULL, 0x3fef0cafa93e2f56ULL, - 0x3c834d754db0abb6ULL, 0x3fef06fe0a31b715ULL, - 0x3c864201e2ac744cULL, 0x3fef0170fc4cd831ULL, - 0x3c8fdd395dd3f84aULL, 0x3feefc08b26416ffULL, - 0xbc86a3803b8e5b04ULL, 0x3feef6c55f929ff1ULL, - 0xbc924aedcc4b5068ULL, 0x3feef1a7373aa9cbULL, - 0xbc9907f81b512d8eULL, 0x3feeecae6d05d866ULL, - 0xbc71d1e83e9436d2ULL, 0x3feee7db34e59ff7ULL, - 0xbc991919b3ce1b15ULL, 0x3feee32dc313a8e5ULL, - 0x3c859f48a72a4c6dULL, 0x3feedea64c123422ULL, - 0xbc9312607a28698aULL, 0x3feeda4504ac801cULL, - 0xbc58a78f4817895bULL, 0x3feed60a21f72e2aULL, - 0xbc7c2c9b67499a1bULL, 0x3feed1f5d950a897ULL, - 0x3c4363ed60c2ac11ULL, 0x3feece086061892dULL, - 0x3c9666093b0664efULL, 0x3feeca41ed1d0057ULL, - 0x3c6ecce1daa10379ULL, 0x3feec6a2b5c13cd0ULL, - 0x3c93ff8e3f0f1230ULL, 0x3feec32af0d7d3deULL, - 0x3c7690cebb7aafb0ULL, 0x3feebfdad5362a27ULL, - 0x3c931dbdeb54e077ULL, 0x3feebcb299fddd0dULL, - 0xbc8f94340071a38eULL, 0x3feeb9b2769d2ca7ULL, - 0xbc87deccdc93a349ULL, 0x3feeb6daa2cf6642ULL, - 0xbc78dec6bd0f385fULL, 0x3feeb42b569d4f82ULL, - 0xbc861246ec7b5cf6ULL, 0x3feeb1a4ca5d920fULL, - 0x3c93350518fdd78eULL, 0x3feeaf4736b527daULL, - 0x3c7b98b72f8a9b05ULL, 0x3feead12d497c7fdULL, - 0x3c9063e1e21c5409ULL, 0x3feeab07dd485429ULL, - 0x3c34c7855019c6eaULL, 0x3feea9268a5946b7ULL, - 0x3c9432e62b64c035ULL, 0x3feea76f15ad2148ULL, - 0xbc8ce44a6199769fULL, 0x3feea5e1b976dc09ULL, - 0xbc8c33c53bef4da8ULL, 0x3feea47eb03a5585ULL, - 0xbc845378892be9aeULL, 0x3feea34634ccc320ULL, - 0xbc93cedd78565858ULL, 0x3feea23882552225ULL, - 0x3c5710aa807e1964ULL, 0x3feea155d44ca973ULL, - 0xbc93b3efbf5e2228ULL, 0x3feea09e667f3bcdULL, - 0xbc6a12ad8734b982ULL, 0x3feea012750bdabfULL, - 0xbc6367efb86da9eeULL, 0x3fee9fb23c651a2fULL, - 0xbc80dc3d54e08851ULL, 0x3fee9f7df9519484ULL, - 0xbc781f647e5a3ecfULL, 0x3fee9f75e8ec5f74ULL, - 0xbc86ee4ac08b7db0ULL, 0x3fee9f9a48a58174ULL, - 0xbc8619321e55e68aULL, 0x3fee9feb564267c9ULL, - 0x3c909ccb5e09d4d3ULL, 0x3feea0694fde5d3fULL, - 0xbc7b32dcb94da51dULL, 0x3feea11473eb0187ULL, - 0x3c94ecfd5467c06bULL, 0x3feea1ed0130c132ULL, - 0x3c65ebe1abd66c55ULL, 0x3feea2f336cf4e62ULL, - 0xbc88a1c52fb3cf42ULL, 0x3feea427543e1a12ULL, - 0xbc9369b6f13b3734ULL, 0x3feea589994cce13ULL, - 0xbc805e843a19ff1eULL, 0x3feea71a4623c7adULL, - 0xbc94d450d872576eULL, 0x3feea8d99b4492edULL, - 0x3c90ad675b0e8a00ULL, 0x3feeaac7d98a6699ULL, - 0x3c8db72fc1f0eab4ULL, 0x3feeace5422aa0dbULL, - 0xbc65b6609cc5e7ffULL, 0x3feeaf3216b5448cULL, - 0x3c7bf68359f35f44ULL, 0x3feeb1ae99157736ULL, - 0xbc93091fa71e3d83ULL, 0x3feeb45b0b91ffc6ULL, - 0xbc5da9b88b6c1e29ULL, 0x3feeb737b0cdc5e5ULL, - 0xbc6c23f97c90b959ULL, 0x3feeba44cbc8520fULL, - 0xbc92434322f4f9aaULL, 0x3feebd829fde4e50ULL, - 0xbc85ca6cd7668e4bULL, 0x3feec0f170ca07baULL, - 0x3c71affc2b91ce27ULL, 0x3feec49182a3f090ULL, - 0x3c6dd235e10a73bbULL, 0x3feec86319e32323ULL, - 0xbc87c50422622263ULL, 0x3feecc667b5de565ULL, - 0x3c8b1c86e3e231d5ULL, 0x3feed09bec4a2d33ULL, - 0xbc91bbd1d3bcbb15ULL, 0x3feed503b23e255dULL, - 0x3c90cc319cee31d2ULL, 0x3feed99e1330b358ULL, - 0x3c8469846e735ab3ULL, 0x3feede6b5579fdbfULL, - 0xbc82dfcd978e9db4ULL, 0x3feee36bbfd3f37aULL, - 0x3c8c1a7792cb3387ULL, 0x3feee89f995ad3adULL, - 0xbc907b8f4ad1d9faULL, 0x3feeee07298db666ULL, - 0xbc55c3d956dcaebaULL, 0x3feef3a2b84f15fbULL, - 0xbc90a40e3da6f640ULL, 0x3feef9728de5593aULL, - 0xbc68d6f438ad9334ULL, 0x3feeff76f2fb5e47ULL, - 0xbc91eee26b588a35ULL, 0x3fef05b030a1064aULL, - 0x3c74ffd70a5fddcdULL, 0x3fef0c1e904bc1d2ULL, - 0xbc91bdfbfa9298acULL, 0x3fef12c25bd71e09ULL, - 0x3c736eae30af0cb3ULL, 0x3fef199bdd85529cULL, - 0x3c8ee3325c9ffd94ULL, 0x3fef20ab5fffd07aULL, - 0x3c84e08fd10959acULL, 0x3fef27f12e57d14bULL, - 0x3c63cdaf384e1a67ULL, 0x3fef2f6d9406e7b5ULL, - 0x3c676b2c6c921968ULL, 0x3fef3720dcef9069ULL, - 0xbc808a1883ccb5d2ULL, 0x3fef3f0b555dc3faULL, - 0xbc8fad5d3ffffa6fULL, 0x3fef472d4a07897cULL, - 0xbc900dae3875a949ULL, 0x3fef4f87080d89f2ULL, - 0x3c74a385a63d07a7ULL, 0x3fef5818dcfba487ULL, - 0xbc82919e2040220fULL, 0x3fef60e316c98398ULL, - 0x3c8e5a50d5c192acULL, 0x3fef69e603db3285ULL, - 0x3c843a59ac016b4bULL, 0x3fef7321f301b460ULL, - 0xbc82d52107b43e1fULL, 0x3fef7c97337b9b5fULL, - 0xbc892ab93b470dc9ULL, 0x3fef864614f5a129ULL, - 0x3c74b604603a88d3ULL, 0x3fef902ee78b3ff6ULL, - 0x3c83c5ec519d7271ULL, 0x3fef9a51fbc74c83ULL, - 0xbc8ff7128fd391f0ULL, 0x3fefa4afa2a490daULL, - 0xbc8dae98e223747dULL, 0x3fefaf482d8e67f1ULL, - 0x3c8ec3bc41aa2008ULL, 0x3fefba1bee615a27ULL, - 0x3c842b94c3a9eb32ULL, 0x3fefc52b376bba97ULL, - 0x3c8a64a931d185eeULL, 0x3fefd0765b6e4540ULL, - 0xbc8e37bae43be3edULL, 0x3fefdbfdad9cbe14ULL, - 0x3c77893b4d91cd9dULL, 0x3fefe7c1819e90d8ULL, - 0x3c5305c14160cc89ULL, 0x3feff3c22b8f71f1ULL -}; - /********************************************************************* * exp (MSVCRT.@) - * - * Copied from musl: src/math/exp.c */ -double CDECL exp( double x ) +#if _MSVCR_VER == 0 /* other versions call exp() directly */ +double CDECL MSVCRT_exp( double x ) { - static const double C[] = { - 0x1.ffffffffffdbdp-2, - 0x1.555555555543cp-3, - 0x1.55555cf172b91p-5, - 0x1.1111167a4d017p-7 - }; - static const double invln2N = 0x1.71547652b82fep0 * (1 << 7), - negln2hiN = -0x1.62e42fefa0000p-8, - negln2loN = -0x1.cf79abc9e3b3ap-47; - - UINT32 abstop; - UINT64 ki, idx, top, sbits; - double kd, z, r, r2, scale, tail, tmp; - - abstop = (*(UINT64*)&x >> 52) & 0x7ff; - if (abstop - 0x3c9 >= 0x408 - 0x3c9) { - if (abstop - 0x3c9 >= 0x80000000) - /* Avoid spurious underflow for tiny x. */ - /* Note: 0 is common input. */ - return 1.0 + x; - if (abstop >= 0x409) { - if (*(UINT64*)&x == 0xfff0000000000000ULL) - return 0.0; -#if _MSVCR_VER == 0 - if (*(UINT64*)&x > 0x7ff0000000000000ULL) - return math_error(_DOMAIN, "exp", x, 0, 1.0 + x); -#endif - if (abstop >= 0x7ff) - return 1.0 + x; - if (*(UINT64*)&x >> 63) - return math_error(_UNDERFLOW, "exp", x, 0, fp_barrier(DBL_MIN) * DBL_MIN); - else - return math_error(_OVERFLOW, "exp", x, 0, fp_barrier(DBL_MAX) * DBL_MAX); - } - /* Large x is special cased below. */ - abstop = 0; - } - - /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ - /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ - z = invln2N * x; - kd = round(z); - ki = (INT64)kd; - - r = x + kd * negln2hiN + kd * negln2loN; - /* 2^(k/N) ~= scale * (1 + tail). */ - idx = 2 * (ki % (1 << 7)); - top = ki << (52 - 7); - tail = *(double*)&exp_T[idx]; - /* This is only a valid scale when -1023*N < k < 1024*N. */ - sbits = exp_T[idx + 1] + top; - /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */ - /* Evaluation is optimized assuming superscalar pipelined execution. */ - r2 = r * r; - /* Without fma the worst case error is 0.25/N ulp larger. */ - /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */ - tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]); - if (abstop == 0) { - /* Handle cases that may overflow or underflow when computing the result that - is scale*(1+TMP) without intermediate rounding. The bit representation of - scale is in SBITS, however it has a computed exponent that may have - overflown into the sign bit so that needs to be adjusted before using it as - a double. (int32_t)KI is the k used in the argument reduction and exponent - adjustment of scale, positive k here means the result may overflow and - negative k means the result may underflow. */ - double scale, y; - - if ((ki & 0x80000000) == 0) { - /* k > 0, the exponent of scale might have overflowed by <= 460. */ - sbits -= 1009ull << 52; - scale = *(double*)&sbits; - y = 0x1p1009 * (scale + scale * tmp); - if (isinf(y)) - return math_error(_OVERFLOW, "exp", x, 0, y); - return y; - } - /* k < 0, need special care in the subnormal range. */ - sbits += 1022ull << 52; - scale = *(double*)&sbits; - y = scale + scale * tmp; - if (y < 1.0) { - /* Round y to the right precision before scaling it into the subnormal - range to avoid double rounding that can cause 0.5+E/2 ulp error where - E is the worst-case ulp error outside the subnormal range. So this - is only useful if the goal is better than 1 ulp worst-case error. */ - double hi, lo; - lo = scale - y + scale * tmp; - hi = 1.0 + y; - lo = 1.0 - hi + y + lo; - y = hi + lo - 1.0; - /* Avoid -0.0 with downward rounding. */ - if (y == 0.0) - y = 0.0; - /* The underflow exception needs to be signaled explicitly. */ - fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022); - y = 0x1p-1022 * y; - return math_error(_UNDERFLOW, "exp", x, 0, y); - } - y = 0x1p-1022 * y; - return y; - } - scale = *(double*)&sbits; - /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there - is no spurious underflow here even without fma. */ - return scale + scale * tmp; + if (isnan( x )) return math_error(_DOMAIN, "exp", x, 0, 1.0 + x); + return exp( x ); } +#endif
static BOOL sqrt_validate( double *x, BOOL update_sw ) { diff --git a/dlls/msvcrt/msvcrt.spec b/dlls/msvcrt/msvcrt.spec index 8bf1c87d73c..aaca212c63c 100644 --- a/dlls/msvcrt/msvcrt.spec +++ b/dlls/msvcrt/msvcrt.spec @@ -1282,7 +1282,7 @@ @ cdecl -arch=win64 difftime(long long) _difftime64 @ cdecl -ret64 div(long long) @ cdecl exit(long) -@ cdecl exp(double) +@ cdecl exp(double) MSVCRT_exp @ cdecl -arch=!i386 expf(float) @ cdecl fabs(double) @ cdecl -arch=arm,arm64 fabsf(float) diff --git a/libs/musl/src/math/exp.c b/libs/musl/src/math/exp.c index cb5c077ee37..db67213b7d3 100644 --- a/libs/musl/src/math/exp.c +++ b/libs/musl/src/math/exp.c @@ -28,7 +28,7 @@ a double. (int32_t)KI is the k used in the argument reduction and exponent adjustment of scale, positive k here means the result may overflow and negative k means the result may underflow. */ -static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) +static inline double specialcase(double x, double_t tmp, uint64_t sbits, uint64_t ki) { double_t scale, y;
@@ -37,6 +37,8 @@ static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) sbits -= 1009ull << 52; scale = asdouble(sbits); y = 0x1p1009 * (scale + scale * tmp); + if (isinf(y)) + return math_error(_OVERFLOW, "exp", x, 0, y); return eval_as_double(y); } /* k < 0, need special care in the subnormal range. */ @@ -58,6 +60,8 @@ static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) y = 0.0; /* The underflow exception needs to be signaled explicitly. */ fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022); + y = 0x1p-1022 * y; + return math_error(_UNDERFLOW, "exp", x, 0, y); } y = 0x1p-1022 * y; return eval_as_double(y); @@ -87,9 +91,9 @@ double __cdecl exp(double x) if (abstop >= top12(INFINITY)) return 1.0 + x; if (asuint64(x) >> 63) - return __math_uflow(0); + return math_error(_UNDERFLOW, "exp", x, 0, fp_barrier(DBL_MIN) * DBL_MIN); else - return __math_oflow(0); + return math_error(_OVERFLOW, "exp", x, 0, fp_barrier(DBL_MAX) * DBL_MAX); } /* Large x is special cased below. */ abstop = 0; @@ -98,20 +102,8 @@ double __cdecl exp(double x) /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ z = InvLn2N * x; -#if TOINT_INTRINSICS - kd = roundtoint(z); - ki = converttoint(z); -#elif EXP_USE_TOINT_NARROW - /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */ - kd = eval_as_double(z + Shift); - ki = asuint64(kd) >> 16; - kd = (double_t)(int32_t)ki; -#else - /* z - kd is in [-1, 1] in non-nearest rounding modes. */ - kd = eval_as_double(z + Shift); - ki = asuint64(kd); - kd -= Shift; -#endif + kd = round(z); + ki = (int64_t)kd; r = x + kd * NegLn2hiN + kd * NegLn2loN; /* 2^(k/N) ~= scale * (1 + tail). */ idx = 2 * (ki % N); @@ -126,7 +118,7 @@ double __cdecl exp(double x) /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */ tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); if (predict_false(abstop == 0)) - return specialcase(tmp, sbits, ki); + return specialcase(x, tmp, sbits, ki); scale = asdouble(sbits); /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there is no spurious underflow here even without fma. */