[PATCH 0/12] MR2608: msvcrt: Use still more functions from the bundled musl library.

List overview All Threads

newer

older

[PATCH v2 0/6] MR2598: imm32: Move...

[PATCH 0/6] MR2598: imm32: Move...

Alexandre Julliard (＠julliard)

7 Apr 2023 7 Apr '23

11:17 a.m.

-- https://gitlab.winehq.org/wine/wine/-/merge_requests/2608

Show replies by date

Alexandre Julliard

7 Apr 7 Apr

11:17 a.m.

New subject: [PATCH 01/12] msvcrt: Use the round()/roundf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

With the changes from df9c11ffa8ddc3b4dbb22f6e304cba8cfcda8ada. --- dlls/msvcrt/math.c | 68 +++---------------------------------- libs/musl/src/math/round.c | 32 +++++++---------- libs/musl/src/math/roundf.c | 2 +- 3 files changed, 18 insertions(+), 84 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 8d4ed01c02f..d9957ad4395 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -467,27 +467,6 @@ recompute: return n & 7; }

-/* Based on musl implementation: src/math/round.c */ -static double __round(double x) -{ - ULONGLONG llx = *(ULONGLONG*)&x, tmp; - int e = (llx >> 52 & 0x7ff) - 0x3ff; - - if (e >= 52) - return x; - if (e < -1) - return 0 * x; - else if (e == -1) - return signbit(x) ? -1 : 1; - - tmp = 0x000fffffffffffffULL >> e; - if (!(llx & tmp)) - return x; - llx += 0x0008000000000000ULL >> e; - llx &= ~tmp; - return *(double*)&llx; -} - #ifndef __i386__ /* Copied from musl: src/math/__sindf.c */ static float __sindf(double x) @@ -945,7 +924,7 @@ float CDECL expf( float x ) /* Round and convert z to int, the result is in [-150*N, 128*N] and ideally ties-to-even rule is used, otherwise the magnitude of r can be bigger which gives larger approximation error. */ - kd = __round(z); + kd = round(z); ki = (INT64)kd; r = z - kd;

@@ -1037,7 +1016,7 @@ static float powf_exp2(double xd, UINT32 sign_bias) double kd, z, r, r2, y, s;

/* N*x = k + r with r in [-1/2, 1/2] */ - kd = __round(xd); /* k */ + kd = round(xd); /* k */ ki = (INT64)kd; r = xd - kd;

@@ -2187,7 +2166,7 @@ double CDECL exp( double x ) /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ z = invln2N * x; - kd = __round(z); + kd = round(z); ki = (INT64)kd;

r = x + kd * negln2hiN + kd * negln2loN; @@ -2495,7 +2474,7 @@ static double pow_exp(double argx, double argy, double x, double xtail, UINT32 s /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ z = invln2N * x; - kd = __round(z); + kd = round(z); ki = (INT64)kd; r = x + kd * negln2hiN + kd * negln2loN; /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ @@ -5336,45 +5315,6 @@ __int64 CDECL llrintf(float x) return f; }

-/********************************************************************* - * round (MSVCR120.@) - */ -double CDECL round(double x) -{ - return __round(x); -} - -/********************************************************************* - * roundf (MSVCR120.@) - * - * Copied from musl: src/math/roundf.c - */ -float CDECL roundf(float x) -{ - static const float toint = 1 / FLT_EPSILON; - - unsigned int ix = *(unsigned int*)&x; - int e = ix >> 23 & 0xff; - float y; - - if (e >= 0x7f + 23) - return x; - if (ix >> 31) - x = -x; - if (e < 0x7f - 1) - return 0 * *(float*)&ix; - y = fp_barrierf(x + toint) - toint - x; - if (y > 0.5f) - y = y + x - 1; - else if (y <= -0.5f) - y = y + x + 1; - else - y = y + x; - if (ix >> 31) - y = -y; - return y; -} - /********************************************************************* * lround (MSVCR120.@) * diff --git a/libs/musl/src/math/round.c b/libs/musl/src/math/round.c index 853b6d8fd65..b8c4c6f1785 100644 --- a/libs/musl/src/math/round.c +++ b/libs/musl/src/math/round.c @@ -5,31 +5,25 @@ #elif FLT_EVAL_METHOD==2 #define EPS LDBL_EPSILON #endif -static const double_t toint = 1/EPS;

double __cdecl round(double x) { union {double f; uint64_t i;} u = {x}; - int e = u.i >> 52 & 0x7ff; + uint64_t tmp; + int e = (u.i >> 52 & 0x7ff) - 0x3ff; double_t y;

- if (e >= 0x3ff+52) + if (e >= 52) return x; - if (u.i >> 63) - x = -x; - if (e < 0x3ff-1) { - /* raise inexact if x!=0 */ - FORCE_EVAL(x + toint); + if (e < -1) return 0*u.f; - } - y = x + toint - toint - x; - if (y > 0.5) - y = y + x - 1; - else if (y <= -0.5) - y = y + x + 1; - else - y = y + x; - if (u.i >> 63) - y = -y; - return y; + if (e == -1) + return (u.i >> 63) ? -1 : 1; + + tmp = 0x000fffffffffffffULL >> e; + if (!(u.i & tmp)) + return x; + u.i += 0x0008000000000000ULL >> e; + u.i &= ~tmp; + return u.f; } diff --git a/libs/musl/src/math/roundf.c b/libs/musl/src/math/roundf.c index b8c20778a1a..88130777fd7 100644 --- a/libs/musl/src/math/roundf.c +++ b/libs/musl/src/math/roundf.c @@ -23,7 +23,7 @@ float __cdecl roundf(float x) FORCE_EVAL(x + toint); return 0*u.f; } - y = x + toint - toint - x; + y = fp_barrierf(x + toint) - toint - x; if (y > 0.5f) y = y + x - 1; else if (y <= -0.5f)

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2608

Alexandre Julliard

11:17 a.m.

New subject: [PATCH 02/12] msvcrt: Use the floor()/floorf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

With the changes from 29c07324c19bbdaf5255b230cca5e0db5c5796c4. --- dlls/msvcrt/math.c | 58 -------------------------------------- libs/musl/src/math/floor.c | 30 ++++++++++---------- 2 files changed, 15 insertions(+), 73 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index d9957ad4395..aee19f297cc 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -1426,35 +1426,6 @@ float CDECL ceilf( float x ) return u.f; }

-/********************************************************************* - * floorf (MSVCRT.@) - * - * Copied from musl: src/math/floorf.c - */ -float CDECL floorf( float x ) -{ - union {float f; UINT32 i;} u = {x}; - int e = (int)(u.i >> 23 & 0xff) - 0x7f; - UINT32 m; - - if (e >= 23) - return x; - if (e >= 0) { - m = 0x007fffff >> e; - if ((u.i & m) == 0) - return x; - if (u.i >> 31) - u.i += m; - u.i &= ~m; - } else { - if (u.i >> 31 == 0) - return 0; - else if (u.i << 1) - return -1; - } - return u.f; -} - #endif

/********************************************************************* @@ -3260,35 +3231,6 @@ double CDECL ceil( double x ) return u.f; }

-/********************************************************************* - * floor (MSVCRT.@) - * - * Based on musl: src/math/floorf.c - */ -double CDECL floor( double x ) -{ - union {double f; UINT64 i;} u = {x}; - int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff; - UINT64 m; - - if (e >= 52) - return x; - if (e >= 0) { - m = 0x000fffffffffffffULL >> e; - if ((u.i & m) == 0) - return x; - if (u.i >> 63) - u.i += m; - u.i &= ~m; - } else { - if (u.i >> 63 == 0) - return 0; - else if (u.i << 1) - return -1; - } - return u.f; -} - #if defined(__i386__) || defined(__x86_64__) static void _setfp_sse( unsigned int *cw, unsigned int cw_mask, unsigned int *sw, unsigned int sw_mask ) diff --git a/libs/musl/src/math/floor.c b/libs/musl/src/math/floor.c index ce3a359a02a..aad6c0d9940 100644 --- a/libs/musl/src/math/floor.c +++ b/libs/musl/src/math/floor.c @@ -5,27 +5,27 @@ #elif FLT_EVAL_METHOD==2 #define EPS LDBL_EPSILON #endif -static const double_t toint = 1/EPS;

double __cdecl floor(double x) { union {double f; uint64_t i;} u = {x}; - int e = u.i >> 52 & 0x7ff; + int e = (u.i >> 52 & 0x7ff) - 0x3ff; double_t y;

- if (e >= 0x3ff+52 || x == 0) + if (e >= 52) return x; - /* y = int(x) - x, where int(x) is an integer neighbor of x */ - if (u.i >> 63) - y = x - toint + toint - x; - else - y = x + toint - toint - x; - /* special case because of non-nearest rounding modes */ - if (e <= 0x3ff-1) { - FORCE_EVAL(y); - return u.i >> 63 ? -1 : 0; + if (e >= 0) { + uint64_t m = 0x000fffffffffffffULL >> e; + if ((u.i & m) == 0) + return x; + if (u.i >> 63) + u.i += m; + u.i &= ~m; + } else { + if (u.i >> 63 == 0) + return 0; + if (u.i << 1) + return -1; } - if (y > 0) - return x + y - 1; - return x + y; + return u.f; }

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2608

Alexandre Julliard

11:17 a.m.

New subject: [PATCH 03/12] msvcrt: Use the ceil()/ceilf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

With the changes from 2a5e68ab807939b1b5b1484a189717b659b0a28e. --- dlls/msvcrt/math.c | 58 --------------------------------------- libs/musl/src/math/ceil.c | 32 ++++++++++----------- 2 files changed, 16 insertions(+), 74 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index aee19f297cc..d8563873562 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -1397,35 +1397,6 @@ float CDECL tanhf( float x ) return sign ? -t : t; }

-/********************************************************************* - * ceilf (MSVCRT.@) - * - * Copied from musl: src/math/ceilf.c - */ -float CDECL ceilf( float x ) -{ - union {float f; UINT32 i;} u = {x}; - int e = (int)(u.i >> 23 & 0xff) - 0x7f; - UINT32 m; - - if (e >= 23) - return x; - if (e >= 0) { - m = 0x007fffff >> e; - if ((u.i & m) == 0) - return x; - if (u.i >> 31 == 0) - u.i += m; - u.i &= ~m; - } else { - if (u.i >> 31) - return -0.0; - else if (u.i << 1) - return 1.0; - } - return u.f; -} - #endif

/********************************************************************* @@ -3202,35 +3173,6 @@ __int64 CDECL _abs64( __int64 n ) return n >= 0 ? n : -n; }

-/********************************************************************* - * ceil (MSVCRT.@) - * - * Based on musl: src/math/ceilf.c - */ -double CDECL ceil( double x ) -{ - union {double f; UINT64 i;} u = {x}; - int e = (u.i >> 52 & 0x7ff) - 0x3ff; - UINT64 m; - - if (e >= 52) - return x; - if (e >= 0) { - m = 0x000fffffffffffffULL >> e; - if ((u.i & m) == 0) - return x; - if (u.i >> 63 == 0) - u.i += m; - u.i &= ~m; - } else { - if (u.i >> 63) - return -0.0; - else if (u.i << 1) - return 1.0; - } - return u.f; -} - #if defined(__i386__) || defined(__x86_64__) static void _setfp_sse( unsigned int *cw, unsigned int cw_mask, unsigned int *sw, unsigned int sw_mask ) diff --git a/libs/musl/src/math/ceil.c b/libs/musl/src/math/ceil.c index 781a90bd685..ba2a689907e 100644 --- a/libs/musl/src/math/ceil.c +++ b/libs/musl/src/math/ceil.c @@ -5,27 +5,27 @@ #elif FLT_EVAL_METHOD==2 #define EPS LDBL_EPSILON #endif -static const double_t toint = 1/EPS;

double __cdecl ceil(double x) { union {double f; uint64_t i;} u = {x}; - int e = u.i >> 52 & 0x7ff; + int e = (u.i >> 52 & 0x7ff) - 0x3ff; double_t y;

- if (e >= 0x3ff+52 || x == 0) + if (e >= 52) return x; - /* y = int(x) - x, where int(x) is an integer neighbor of x */ - if (u.i >> 63) - y = x - toint + toint - x; - else - y = x + toint - toint - x; - /* special case because of non-nearest rounding modes */ - if (e <= 0x3ff-1) { - FORCE_EVAL(y); - return u.i >> 63 ? -0.0 : 1; - } - if (y < 0) - return x + y + 1; - return x + y; + if (e >= 0) { + uint64_t m = 0x000fffffffffffffULL >> e; + if ((u.i & m) == 0) + return x; + if (u.i >> 63 == 0) + u.i += m; + u.i &= ~m; + } else { + if (u.i >> 63) + return -0.0; + if (u.i << 1) + return 1.0; + } + return u.f; }

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2608

Alexandre Julliard

11:17 a.m.

New subject: [PATCH 04/12] msvcrt: Use the nextafter()/nextafterf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

diff --git a/dlls/crtdll/crtdll.spec b/dlls/crtdll/crtdll.spec index 3a90ad7ef74..0e863362f77 100644 --- a/dlls/crtdll/crtdll.spec +++ b/dlls/crtdll/crtdll.spec @@ -241,7 +241,7 @@ @ cdecl _mkdir(str) @ cdecl _mktemp(str) @ cdecl _msize(ptr) -@ cdecl _nextafter(double double) +@ cdecl _nextafter(double double) nextafter @ cdecl _onexit(ptr) @ varargs _open(str long) @ cdecl _open_osfhandle(long long) diff --git a/dlls/msvcr100/msvcr100.spec b/dlls/msvcr100/msvcr100.spec index 1b9abbfdec2..abae3f4b301 100644 --- a/dlls/msvcr100/msvcr100.spec +++ b/dlls/msvcr100/msvcr100.spec @@ -1215,8 +1215,8 @@ @ cdecl _mktime32(ptr) @ cdecl _mktime64(ptr) @ cdecl _msize(ptr) -@ cdecl _nextafter(double double) -@ cdecl -arch=x86_64 _nextafterf(float float) +@ cdecl _nextafter(double double) nextafter +@ cdecl -arch=x86_64 _nextafterf(float float) nextafterf @ cdecl _onexit(ptr) @ varargs _open(str long) @ cdecl _open_osfhandle(long long) diff --git a/dlls/msvcr110/msvcr110.spec b/dlls/msvcr110/msvcr110.spec index bba1222a44e..e83ce328928 100644 --- a/dlls/msvcr110/msvcr110.spec +++ b/dlls/msvcr110/msvcr110.spec @@ -1572,8 +1572,8 @@ @ cdecl _mktime32(ptr) @ cdecl _mktime64(ptr) @ cdecl _msize(ptr) -@ cdecl _nextafter(double double) -@ cdecl -arch=x86_64 _nextafterf(float float) +@ cdecl _nextafter(double double) nextafter +@ cdecl -arch=x86_64 _nextafterf(float float) nextafterf @ cdecl _onexit(ptr) @ varargs _open(str long) @ cdecl _open_osfhandle(long long) diff --git a/dlls/msvcr120/msvcr120.spec b/dlls/msvcr120/msvcr120.spec index 654fb4071f8..cdd5848f7e9 100644 --- a/dlls/msvcr120/msvcr120.spec +++ b/dlls/msvcr120/msvcr120.spec @@ -1583,8 +1583,8 @@ @ cdecl _mktime32(ptr) @ cdecl _mktime64(ptr) @ cdecl _msize(ptr) -@ cdecl _nextafter(double double) -@ cdecl -arch=x86_64 _nextafterf(float float) +@ cdecl _nextafter(double double) nextafter +@ cdecl -arch=x86_64 _nextafterf(float float) nextafterf @ cdecl _onexit(ptr) @ varargs _open(str long) @ cdecl _open_osfhandle(long long) @@ -2300,9 +2300,9 @@ @ cdecl nearbyint(double) @ cdecl nearbyintf(float) @ cdecl nearbyintl(double) nearbyint -@ cdecl nextafter(double double) _nextafter -@ cdecl nextafterf(float float) _nextafterf -@ cdecl nextafterl(double double) _nextafter +@ cdecl nextafter(double double) +@ cdecl nextafterf(float float) +@ cdecl nextafterl(double double) nextafter @ cdecl nexttoward(double double) MSVCRT_nexttoward @ cdecl nexttowardf(float double) MSVCRT_nexttowardf @ cdecl nexttowardl(double double) MSVCRT_nexttoward diff --git a/dlls/msvcr70/msvcr70.spec b/dlls/msvcr70/msvcr70.spec index 1e02dc95d2d..3300cf36bf3 100644 --- a/dlls/msvcr70/msvcr70.spec +++ b/dlls/msvcr70/msvcr70.spec @@ -479,7 +479,7 @@ @ cdecl _mktemp(str) @ cdecl _mktime64(ptr) @ cdecl _msize(ptr) -@ cdecl _nextafter(double double) +@ cdecl _nextafter(double double) nextafter @ cdecl _onexit(ptr) @ varargs _open(str long) @ cdecl _open_osfhandle(long long) diff --git a/dlls/msvcr71/msvcr71.spec b/dlls/msvcr71/msvcr71.spec index 1a075986ddd..5b68d3217af 100644 --- a/dlls/msvcr71/msvcr71.spec +++ b/dlls/msvcr71/msvcr71.spec @@ -474,7 +474,7 @@ @ cdecl _mktemp(str) @ cdecl _mktime64(ptr) @ cdecl _msize(ptr) -@ cdecl _nextafter(double double) +@ cdecl _nextafter(double double) nextafter @ cdecl _onexit(ptr) @ varargs _open(str long) @ cdecl _open_osfhandle(long long) diff --git a/dlls/msvcr80/msvcr80.spec b/dlls/msvcr80/msvcr80.spec index 2ea91c0ce8a..7c7c2f8bcdb 100644 --- a/dlls/msvcr80/msvcr80.spec +++ b/dlls/msvcr80/msvcr80.spec @@ -887,8 +887,8 @@ @ cdecl _mktime32(ptr) @ cdecl _mktime64(ptr) @ cdecl _msize(ptr) -@ cdecl _nextafter(double double) -@ cdecl -arch=x86_64 _nextafterf(float float) +@ cdecl _nextafter(double double) nextafter +@ cdecl -arch=x86_64 _nextafterf(float float) nextafterf @ cdecl _onexit(ptr) @ varargs _open(str long) @ cdecl _open_osfhandle(long long) diff --git a/dlls/msvcr90/msvcr90.spec b/dlls/msvcr90/msvcr90.spec index fb274878d39..c1e4a406265 100644 --- a/dlls/msvcr90/msvcr90.spec +++ b/dlls/msvcr90/msvcr90.spec @@ -865,8 +865,8 @@ @ cdecl _mktime32(ptr) @ cdecl _mktime64(ptr) @ cdecl _msize(ptr) -@ cdecl _nextafter(double double) -@ cdecl -arch=x86_64 _nextafterf(float float) +@ cdecl _nextafter(double double) nextafter +@ cdecl -arch=x86_64 _nextafterf(float float) nextafterf @ cdecl _onexit(ptr) @ varargs _open(str long) @ cdecl _open_osfhandle(long long) diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index d8563873562..dbd9bde4d32 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -216,49 +216,6 @@ float CDECL _chgsignf( float num ) return u.f; }

-/********************************************************************* - * _nextafterf (MSVCRT.@) - * - * Copied from musl: src/math/nextafterf.c - */ -float CDECL _nextafterf( float x, float y ) -{ - unsigned int ix = *(unsigned int*)&x; - unsigned int iy = *(unsigned int*)&y; - unsigned int ax, ay, e; - - if (isnan(x) || isnan(y)) - return x + y; - if (x == y) { - if (_fpclassf(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ )) - *_errno() = ERANGE; - return y; - } - ax = ix & 0x7fffffff; - ay = iy & 0x7fffffff; - if (ax == 0) { - if (ay == 0) - return y; - ix = (iy & 0x80000000) | 1; - } else if (ax > ay || ((ix ^ iy) & 0x80000000)) - ix--; - else - ix++; - e = ix & 0x7f800000; - /* raise overflow if ix is infinite and x is finite */ - if (e == 0x7f800000) { - fp_barrierf(x + x); - *_errno() = ERANGE; - } - /* raise underflow if ix is subnormal or zero */ - y = *(float*)&ix; - if (e == 0) { - fp_barrierf(x * x + y * y); - *_errno() = ERANGE; - } - return y; -} - #endif

/* Copied from musl: src/math/__rem_pio2_large.c */ @@ -4124,7 +4081,7 @@ float CDECL nearbyintf(float x) */ double CDECL MSVCRT_nexttoward(double num, double next) { - return _nextafter(num, next); + return nextafter(num, next); }

/********************************************************************* @@ -4174,50 +4131,6 @@ float CDECL MSVCRT_nexttowardf(float x, double y)

#endif /* _MSVCR_VER>=120 */

-/********************************************************************* - * _nextafter (MSVCRT.@) - * - * Copied from musl: src/math/nextafter.c - */ -double CDECL _nextafter(double x, double y) -{ - ULONGLONG llx = *(ULONGLONG*)&x; - ULONGLONG lly = *(ULONGLONG*)&y; - ULONGLONG ax, ay; - int e; - - if (isnan(x) || isnan(y)) - return x + y; - if (llx == lly) { - if (_fpclass(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ )) - *_errno() = ERANGE; - return y; - } - ax = llx & -1ULL / 2; - ay = lly & -1ULL / 2; - if (ax == 0) { - if (ay == 0) - return y; - llx = (lly & 1ULL << 63) | 1; - } else if (ax > ay || ((llx ^ lly) & 1ULL << 63)) - llx--; - else - llx++; - e = llx >> 52 & 0x7ff; - /* raise overflow if llx is infinite and x is finite */ - if (e == 0x7ff) { - fp_barrier(x + x); - *_errno() = ERANGE; - } - /* raise underflow if llx is subnormal or zero */ - y = *(double*)&llx; - if (e == 0) { - fp_barrier(x * x + y * y); - *_errno() = ERANGE; - } - return y; -} - /********************************************************************* * _ecvt (MSVCRT.@) */ diff --git a/dlls/msvcrt/msvcrt.spec b/dlls/msvcrt/msvcrt.spec index 4d4228cc819..0b88ba97135 100644 --- a/dlls/msvcrt/msvcrt.spec +++ b/dlls/msvcrt/msvcrt.spec @@ -840,8 +840,8 @@ @ cdecl _msize(ptr) # stub -arch=win32 _msize_debug(ptr long) # stub -arch=win64 _msize_dbg(ptr long) -@ cdecl _nextafter(double double) -@ cdecl -arch=x86_64 _nextafterf(float float) +@ cdecl _nextafter(double double) nextafter +@ cdecl -arch=x86_64 _nextafterf(float float) nextafterf @ cdecl _onexit(ptr) @ varargs _open(str long) @ cdecl _open_osfhandle(long long) diff --git a/dlls/msvcrtd/msvcrtd.spec b/dlls/msvcrtd/msvcrtd.spec index 68fa80d532f..2c3f4912a03 100644 --- a/dlls/msvcrtd/msvcrtd.spec +++ b/dlls/msvcrtd/msvcrtd.spec @@ -457,7 +457,7 @@ @ cdecl _mktemp(str) @ cdecl _msize(ptr) @ cdecl _msize_dbg(ptr) _msize -@ cdecl _nextafter(double double) +@ cdecl _nextafter(double double) nextafter @ cdecl _onexit(ptr) @ varargs _open(str long) @ cdecl _open_osfhandle(long long) diff --git a/dlls/ucrtbase/ucrtbase.spec b/dlls/ucrtbase/ucrtbase.spec index 51137681f14..a6db1a60c5d 100644 --- a/dlls/ucrtbase/ucrtbase.spec +++ b/dlls/ucrtbase/ucrtbase.spec @@ -731,8 +731,8 @@ @ cdecl _mktime32(ptr) @ cdecl _mktime64(ptr) @ cdecl _msize(ptr) -@ cdecl _nextafter(double double) -@ cdecl -arch=x86_64 _nextafterf(float float) +@ cdecl _nextafter(double double) nextafter +@ cdecl -arch=x86_64 _nextafterf(float float) nextafterf @ cdecl -arch=i386 _o__CIacos() _CIacos @ cdecl -arch=i386 _o__CIasin() _CIasin @ cdecl -arch=i386 _o__CIatan() _CIatan @@ -1299,8 +1299,8 @@ @ cdecl _o__mktime32(ptr) _mktime32 @ cdecl _o__mktime64(ptr) _mktime64 @ cdecl _o__msize(ptr) _msize -@ cdecl _o__nextafter(double double) _nextafter -@ cdecl -arch=x86_64 _o__nextafterf(float float) _nextafterf +@ cdecl _o__nextafter(double double) nextafter +@ cdecl -arch=x86_64 _o__nextafterf(float float) nextafterf @ cdecl _o__open_osfhandle(long long) _open_osfhandle @ cdecl _o__pclose(ptr) _pclose @ cdecl _o__pipe(ptr long long) _pipe @@ -1730,9 +1730,9 @@ @ cdecl _o_nearbyint(double) nearbyint @ cdecl _o_nearbyintf(float) nearbyintf @ cdecl _o_nearbyintl(double) nearbyint -@ cdecl _o_nextafter(double double) _nextafter -@ cdecl _o_nextafterf(float float) _nextafterf -@ cdecl _o_nextafterl(double double) _nextafter +@ cdecl _o_nextafter(double double) nextafter +@ cdecl _o_nextafterf(float float) nextafterf +@ cdecl _o_nextafterl(double double) nextafter @ cdecl _o_nexttoward(double double) MSVCRT_nexttoward @ cdecl _o_nexttowardf(float double) MSVCRT_nexttowardf @ cdecl _o_nexttowardl(double double) MSVCRT_nexttoward @@ -2438,9 +2438,9 @@ @ cdecl nearbyint(double) @ cdecl nearbyintf(float) @ cdecl nearbyintl(double) nearbyint -@ cdecl nextafter(double double) _nextafter -@ cdecl nextafterf(float float) _nextafterf -@ cdecl nextafterl(double double) _nextafter +@ cdecl nextafter(double double) +@ cdecl nextafterf(float float) +@ cdecl nextafterl(double double) nextafter @ cdecl nexttoward(double double) MSVCRT_nexttoward @ cdecl nexttowardf(float double) MSVCRT_nexttowardf @ cdecl nexttowardl(double double) MSVCRT_nexttoward diff --git a/libs/musl/src/math/nextafter.c b/libs/musl/src/math/nextafter.c index 232042355a3..95f71439558 100644 --- a/libs/musl/src/math/nextafter.c +++ b/libs/musl/src/math/nextafter.c @@ -8,8 +8,12 @@ double __cdecl nextafter(double x, double y)

if (isnan(x) || isnan(y)) return x + y; - if (ux.i == uy.i) + if (ux.i == uy.i) { + e = ux.i >> 52 & 0x7ff; + if (!e) + errno = ERANGE; return y; + } ax = ux.i & -1ULL/2; ay = uy.i & -1ULL/2; if (ax == 0) { @@ -22,10 +26,14 @@ double __cdecl nextafter(double x, double y) ux.i++; e = ux.i >> 52 & 0x7ff; /* raise overflow if ux.f is infinite and x is finite */ - if (e == 0x7ff) + if (e == 0x7ff) { FORCE_EVAL(x+x); + errno = ERANGE; + } /* raise underflow if ux.f is subnormal or zero */ - if (e == 0) + if (e == 0) { FORCE_EVAL(x*x + ux.f*ux.f); + errno = ERANGE; + } return ux.f; } diff --git a/libs/musl/src/math/nextafterf.c b/libs/musl/src/math/nextafterf.c index 8da11a0f33e..3b044d3f28a 100644 --- a/libs/musl/src/math/nextafterf.c +++ b/libs/musl/src/math/nextafterf.c @@ -7,8 +7,12 @@ float __cdecl nextafterf(float x, float y)

if (isnan(x) || isnan(y)) return x + y; - if (ux.i == uy.i) + if (ux.i == uy.i) { + e = ux.i & 0x7f800000; + if (!e) + errno = ERANGE; return y; + } ax = ux.i & 0x7fffffff; ay = uy.i & 0x7fffffff; if (ax == 0) { @@ -21,10 +25,14 @@ float __cdecl nextafterf(float x, float y) ux.i++; e = ux.i & 0x7f800000; /* raise overflow if ux.f is infinite and x is finite */ - if (e == 0x7f800000) + if (e == 0x7f800000) { FORCE_EVAL(x+x); + errno = ERANGE; + } /* raise underflow if ux.f is subnormal or zero */ - if (e == 0) + if (e == 0) { FORCE_EVAL(x*x + ux.f*ux.f); + errno = ERANGE; + } return ux.f; }

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2608

Alexandre Julliard

11:17 a.m.

New subject: [PATCH 05/12] msvcrt: Use the nexttoward()/nexttowardf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

Rename the musl functions to avoid compiler warnings about the signature mismatch (double vs. long double). --- dlls/msvcr120/msvcr120.spec | 6 ++-- dlls/msvcrt/math.c | 53 -------------------------------- dlls/ucrtbase/ucrtbase.spec | 12 ++++---- libs/musl/src/math/nexttoward.c | 2 +- libs/musl/src/math/nexttowardf.c | 10 ++++-- 5 files changed, 17 insertions(+), 66 deletions(-)

diff --git a/dlls/msvcr120/msvcr120.spec b/dlls/msvcr120/msvcr120.spec index cdd5848f7e9..102d6319f47 100644 --- a/dlls/msvcr120/msvcr120.spec +++ b/dlls/msvcr120/msvcr120.spec @@ -2303,9 +2303,9 @@ @ cdecl nextafter(double double) @ cdecl nextafterf(float float) @ cdecl nextafterl(double double) nextafter -@ cdecl nexttoward(double double) MSVCRT_nexttoward -@ cdecl nexttowardf(float double) MSVCRT_nexttowardf -@ cdecl nexttowardl(double double) MSVCRT_nexttoward +@ cdecl nexttoward(double double) __nexttoward +@ cdecl nexttowardf(float double) __nexttowardf +@ cdecl nexttowardl(double double) __nexttoward @ stub norm @ stub normf @ stub norml diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index dbd9bde4d32..68218b7007b 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -4076,59 +4076,6 @@ float CDECL nearbyintf(float x) return x; }

-/********************************************************************* - * nexttoward (MSVCR120.@) - */ -double CDECL MSVCRT_nexttoward(double num, double next) -{ - return nextafter(num, next); -} - -/********************************************************************* - * nexttowardf (MSVCR120.@) - * - * Copied from musl: src/math/nexttowardf.c - */ -float CDECL MSVCRT_nexttowardf(float x, double y) -{ - unsigned int ix = *(unsigned int*)&x; - unsigned int e; - float ret; - - if (isnan(x) || isnan(y)) - return x + y; - if (x == y) - return y; - if (x == 0) { - ix = 1; - if (signbit(y)) - ix |= 0x80000000; - } else if (x < y) { - if (signbit(x)) - ix--; - else - ix++; - } else { - if (signbit(x)) - ix++; - else - ix--; - } - e = ix & 0x7f800000; - /* raise overflow if ix is infinite and x is finite */ - if (e == 0x7f800000) { - fp_barrierf(x + x); - *_errno() = ERANGE; - } - ret = *(float*)&ix; - /* raise underflow if ret is subnormal or zero */ - if (e == 0) { - fp_barrierf(x * x + ret * ret); - *_errno() = ERANGE; - } - return ret; -} - #endif /* _MSVCR_VER>=120 */

/********************************************************************* diff --git a/dlls/ucrtbase/ucrtbase.spec b/dlls/ucrtbase/ucrtbase.spec index a6db1a60c5d..a476681fc43 100644 --- a/dlls/ucrtbase/ucrtbase.spec +++ b/dlls/ucrtbase/ucrtbase.spec @@ -1733,9 +1733,9 @@ @ cdecl _o_nextafter(double double) nextafter @ cdecl _o_nextafterf(float float) nextafterf @ cdecl _o_nextafterl(double double) nextafter -@ cdecl _o_nexttoward(double double) MSVCRT_nexttoward -@ cdecl _o_nexttowardf(float double) MSVCRT_nexttowardf -@ cdecl _o_nexttowardl(double double) MSVCRT_nexttoward +@ cdecl _o_nexttoward(double double) __nexttoward +@ cdecl _o_nexttowardf(float double) __nexttowardf +@ cdecl _o_nexttowardl(double double) __nexttoward @ cdecl _o_pow(double double) pow @ cdecl -arch=!i386 _o_powf(float float) powf @ cdecl _o_putc(long ptr) putc @@ -2441,9 +2441,9 @@ @ cdecl nextafter(double double) @ cdecl nextafterf(float float) @ cdecl nextafterl(double double) nextafter -@ cdecl nexttoward(double double) MSVCRT_nexttoward -@ cdecl nexttowardf(float double) MSVCRT_nexttowardf -@ cdecl nexttowardl(double double) MSVCRT_nexttoward +@ cdecl nexttoward(double double) __nexttoward +@ cdecl nexttowardf(float double) __nexttowardf +@ cdecl nexttowardl(double double) __nexttoward @ stub norm @ stub normf @ stub norml diff --git a/libs/musl/src/math/nexttoward.c b/libs/musl/src/math/nexttoward.c index e4cef9535c3..85fbb486129 100644 --- a/libs/musl/src/math/nexttoward.c +++ b/libs/musl/src/math/nexttoward.c @@ -1,7 +1,7 @@ #include "libm.h"

#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -double __cdecl nexttoward(double x, long double y) +double __cdecl __nexttoward(double x, double y) { return nextafter(x, y); } diff --git a/libs/musl/src/math/nexttowardf.c b/libs/musl/src/math/nexttowardf.c index 6c32f6c6733..8c10cdc9f08 100644 --- a/libs/musl/src/math/nexttowardf.c +++ b/libs/musl/src/math/nexttowardf.c @@ -1,6 +1,6 @@ #include "libm.h"

-float __cdecl nexttowardf(float x, long double y) +float __cdecl __nexttowardf(float x, double y) { union {float f; uint32_t i;} ux = {x}; uint32_t e; @@ -26,10 +26,14 @@ float __cdecl nexttowardf(float x, long double y) } e = ux.i & 0x7f800000; /* raise overflow if ux.f is infinite and x is finite */ - if (e == 0x7f800000) + if (e == 0x7f800000) { FORCE_EVAL(x+x); + errno = ERANGE; + } /* raise underflow if ux.f is subnormal or zero */ - if (e == 0) + if (e == 0) { FORCE_EVAL(x*x + ux.f*ux.f); + errno = ERANGE; + } return ux.f; }

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2608

Alexandre Julliard

11:17 a.m.

New subject: [PATCH 06/12] msvcrt: Use the __rem_pio2()/__rem_pio2f() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

--- dlls/msvcrt/math.c | 424 +------------------------------ libs/musl/src/math/__rem_pio2.c | 2 +- libs/musl/src/math/__rem_pio2f.c | 2 +- 3 files changed, 10 insertions(+), 418 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 68218b7007b..befd05e9ad0 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -218,212 +218,6 @@ float CDECL _chgsignf( float num )

#endif

-/* Copied from musl: src/math/__rem_pio2_large.c */ -static int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec) -{ - static const int init_jk[] = {3, 4}; - static const INT32 ipio2[] = { - 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, - 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, - 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, - 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, - 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, - 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, - 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, - 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, - 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, - 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, - 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, - }; - static const double PIo2[] = { - 1.57079625129699707031e+00, - 7.54978941586159635335e-08, - 5.39030252995776476554e-15, - 3.28200341580791294123e-22, - 1.27065575308067607349e-29, - 1.22933308981111328932e-36, - 2.73370053816464559624e-44, - 2.16741683877804819444e-51, - }; - - INT32 jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih; - double z, fw, f[20], fq[20] = {0}, q[20]; - - /* initialize jk*/ - jk = init_jk[prec]; - jp = jk; - - /* determine jx,jv,q0, note that 3>q0 */ - jx = nx - 1; - jv = (e0 - 3) / 24; - if(jv < 0) jv = 0; - q0 = e0 - 24 * (jv + 1); - - /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ - j = jv - jx; - m = jx + jk; - for (i = 0; i <= m; i++, j++) - f[i] = j < 0 ? 0.0 : (double)ipio2[j]; - - /* compute q[0],q[1],...q[jk] */ - for (i = 0; i <= jk; i++) { - for (j = 0, fw = 0.0; j <= jx; j++) - fw += x[j] * f[jx + i - j]; - q[i] = fw; - } - - jz = jk; -recompute: - /* distill q[] into iq[] reversingly */ - for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) { - fw = (double)(INT32)(0x1p-24 * z); - iq[i] = (INT32)(z - 0x1p24 * fw); - z = q[j - 1] + fw; - } - - /* compute n */ - z = scalbn(z, q0); /* actual value of z */ - z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */ - n = (INT32)z; - z -= (double)n; - ih = 0; - if (q0 > 0) { /* need iq[jz-1] to determine n */ - i = iq[jz - 1] >> (24 - q0); - n += i; - iq[jz - 1] -= i << (24 - q0); - ih = iq[jz - 1] >> (23 - q0); - } - else if (q0 == 0) ih = iq[jz - 1] >> 23; - else if (z >= 0.5) ih = 2; - - if (ih > 0) { /* q > 0.5 */ - n += 1; - carry = 0; - for (i = 0; i < jz; i++) { /* compute 1-q */ - j = iq[i]; - if (carry == 0) { - if (j != 0) { - carry = 1; - iq[i] = 0x1000000 - j; - } - } else - iq[i] = 0xffffff - j; - } - if (q0 > 0) { /* rare case: chance is 1 in 12 */ - switch(q0) { - case 1: - iq[jz - 1] &= 0x7fffff; - break; - case 2: - iq[jz - 1] &= 0x3fffff; - break; - } - } - if (ih == 2) { - z = 1.0 - z; - if (carry != 0) - z -= scalbn(1.0, q0); - } - } - - /* check if recomputation is needed */ - if (z == 0.0) { - j = 0; - for (i = jz - 1; i >= jk; i--) j |= iq[i]; - if (j == 0) { /* need recomputation */ - for (k = 1; iq[jk - k] == 0; k++); /* k = no. of terms needed */ - - for (i = jz + 1; i <= jz + k; i++) { /* add q[jz+1] to q[jz+k] */ - f[jx + i] = (double)ipio2[jv + i]; - for (j = 0, fw = 0.0; j <= jx; j++) - fw += x[j] * f[jx + i - j]; - q[i] = fw; - } - jz += k; - goto recompute; - } - } - - /* chop off zero terms */ - if (z == 0.0) { - jz -= 1; - q0 -= 24; - while (iq[jz] == 0) { - jz--; - q0 -= 24; - } - } else { /* break z into 24-bit if necessary */ - z = scalbn(z, -q0); - if (z >= 0x1p24) { - fw = (double)(INT32)(0x1p-24 * z); - iq[jz] = (INT32)(z - 0x1p24 * fw); - jz += 1; - q0 += 24; - iq[jz] = (INT32)fw; - } else - iq[jz] = (INT32)z; - } - - /* convert integer "bit" chunk to floating-point value */ - fw = scalbn(1.0, q0); - for (i = jz; i >= 0; i--) { - q[i] = fw * (double)iq[i]; - fw *= 0x1p-24; - } - - /* compute PIo2[0,...,jp]*q[jz,...,0] */ - for(i = jz; i >= 0; i--) { - for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) - fw += PIo2[k] * q[i + k]; - fq[jz - i] = fw; - } - - /* compress fq[] into y[] */ - switch(prec) { - case 0: - fw = 0.0; - for (i = jz; i >= 0; i--) - fw += fq[i]; - y[0] = ih == 0 ? fw : -fw; - break; - case 1: - case 2: - fw = 0.0; - for (i = jz; i >= 0; i--) - fw += fq[i]; - fw = (double)fw; - y[0] = ih==0 ? fw : -fw; - fw = fq[0] - fw; - for (i = 1; i <= jz; i++) - fw += fq[i]; - y[1] = ih == 0 ? fw : -fw; - break; - case 3: /* painful */ - for (i = jz; i > 0; i--) { - fw = fq[i - 1] + fq[i]; - fq[i] += fq[i - 1] - fw; - fq[i - 1] = fw; - } - for (i = jz; i > 1; i--) { - fw = fq[i - 1] + fq[i]; - fq[i] += fq[i - 1] - fw; - fq[i - 1] = fw; - } - for (fw = 0.0, i = jz; i >= 2; i--) - fw += fq[i]; - if (ih == 0) { - y[0] = fq[0]; - y[1] = fq[1]; - y[2] = fw; - } else { - y[0] = -fq[0]; - y[1] = -fq[1]; - y[2] = -fw; - } - } - return n & 7; -} - #ifndef __i386__ /* Copied from musl: src/math/__sindf.c */ static float __sindf(double x) @@ -731,56 +525,7 @@ float CDECL atanf( float x ) return sign ? -z : z; }

-/* Copied from musl: src/math/__rem_pio2f.c */ -static int __rem_pio2f(float x, double *y) -{ - static const double toint = 1.5 / DBL_EPSILON, - pio4 = 0x1.921fb6p-1, - invpio2 = 6.36619772367581382433e-01, - pio2_1 = 1.57079631090164184570e+00, - pio2_1t = 1.58932547735281966916e-08; - - union {float f; uint32_t i;} u = {x}; - double tx[1], ty[1], fn; - UINT32 ix; - int n, sign, e0; - - ix = u.i & 0x7fffffff; - /* 25+53 bit pi is good enough for medium size */ - if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */ - /* Use a specialized rint() to get fn. */ - fn = fp_barrier(x * invpio2 + toint) - toint; - n = (int)fn; - *y = x - fn * pio2_1 - fn * pio2_1t; - /* Matters with directed rounding. */ - if (*y < -pio4) { - n--; - fn--; - *y = x - fn * pio2_1 - fn * pio2_1t; - } else if (*y > pio4) { - n++; - fn++; - *y = x - fn * pio2_1 - fn * pio2_1t; - } - return n; - } - if(ix >= 0x7f800000) { /* x is inf or NaN */ - *y = x - x; - return 0; - } - /* scale x into [2^23, 2^24-1] */ - sign = u.i >> 31; - e0 = (ix >> 23) - (0x7f + 23); /* e0 = ilogb(|x|)-23, positive */ - u.i = ix - (e0 << 23); - tx[0] = u.f; - n = __rem_pio2_large(tx, ty, e0, 1, 0); - if (sign) { - *y = -ty[0]; - return -n; - } - *y = ty[0]; - return n; -} +extern int __rem_pio2f(float x, double *y);

/********************************************************************* * cosf (MSVCRT.@) @@ -1627,8 +1372,12 @@ double CDECL atan( double x ) return sign ? -z : z; }

-/* Copied from musl: src/math/rint.c */ -static double __rint(double x) +/********************************************************************* + * rint (MSVCR120.@) + * + * Copied from musl: src/math/rint.c + */ +double CDECL rint(double x) { static const double toint = 1 / DBL_EPSILON;

@@ -1654,156 +1403,7 @@ static double __rint(double x) return y; }

-/* Copied from musl: src/math/__rem_pio2.c */ -static int __rem_pio2(double x, double *y) -{ - static const double pio4 = 0x1.921fb54442d18p-1, - invpio2 = 6.36619772367581382433e-01, - pio2_1 = 1.57079632673412561417e+00, - pio2_1t = 6.07710050650619224932e-11, - pio2_2 = 6.07710050630396597660e-11, - pio2_2t = 2.02226624879595063154e-21, - pio2_3 = 2.02226624871116645580e-21, - pio2_3t = 8.47842766036889956997e-32; - - union {double f; UINT64 i;} u = {x}; - double z, w, t, r, fn, tx[3], ty[2]; - UINT32 ix; - int sign, n, ex, ey, i; - - sign = u.i >> 63; - ix = u.i >> 32 & 0x7fffffff; - if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */ - if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */ - goto medium; /* cancellation -- use medium case */ - if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */ - if (!sign) { - z = x - pio2_1; /* one round good to 85 bits */ - y[0] = z - pio2_1t; - y[1] = (z - y[0]) - pio2_1t; - return 1; - } else { - z = x + pio2_1; - y[0] = z + pio2_1t; - y[1] = (z - y[0]) + pio2_1t; - return -1; - } - } else { - if (!sign) { - z = x - 2 * pio2_1; - y[0] = z - 2 * pio2_1t; - y[1] = (z - y[0]) - 2 * pio2_1t; - return 2; - } else { - z = x + 2 * pio2_1; - y[0] = z + 2 * pio2_1t; - y[1] = (z - y[0]) + 2 * pio2_1t; - return -2; - } - } - } - if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */ - if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */ - if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */ - goto medium; - if (!sign) { - z = x - 3 * pio2_1; - y[0] = z - 3 * pio2_1t; - y[1] = (z - y[0]) - 3 * pio2_1t; - return 3; - } else { - z = x + 3 * pio2_1; - y[0] = z + 3 * pio2_1t; - y[1] = (z - y[0]) + 3 * pio2_1t; - return -3; - } - } else { - if (ix == 0x401921fb) /* |x| ~= 4pi/2 */ - goto medium; - if (!sign) { - z = x - 4 * pio2_1; - y[0] = z - 4 * pio2_1t; - y[1] = (z - y[0]) - 4 * pio2_1t; - return 4; - } else { - z = x + 4 * pio2_1; - y[0] = z + 4 * pio2_1t; - y[1] = (z - y[0]) + 4 * pio2_1t; - return -4; - } - } - } - if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */ -medium: - fn = __rint(x * invpio2); - n = (INT32)fn; - r = x - fn * pio2_1; - w = fn * pio2_1t; /* 1st round, good to 85 bits */ - /* Matters with directed rounding. */ - if (r - w < -pio4) { - n--; - fn--; - r = x - fn * pio2_1; - w = fn * pio2_1t; - } else if (r - w > pio4) { - n++; - fn++; - r = x - fn * pio2_1; - w = fn * pio2_1t; - } - y[0] = r - w; - u.f = y[0]; - ey = u.i >> 52 & 0x7ff; - ex = ix >> 20; - if (ex - ey > 16) { /* 2nd round, good to 118 bits */ - t = r; - w = fn * pio2_2; - r = t - w; - w = fn * pio2_2t - ((t - r) - w); - y[0] = r - w; - u.f = y[0]; - ey = u.i >> 52 & 0x7ff; - if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */ - t = r; - w = fn * pio2_3; - r = t - w; - w = fn * pio2_3t - ((t - r) - w); - y[0] = r - w; - } - } - y[1] = (r - y[0]) - w; - return n; - } - /* - * all other (large) arguments - */ - if (ix >= 0x7ff00000) { /* x is inf or NaN */ - y[0] = y[1] = x - x; - return 0; - } - /* set z = scalbn(|x|,-ilogb(x)+23) */ - u.f = x; - u.i &= (UINT64)-1 >> 12; - u.i |= (UINT64)(0x3ff + 23) << 52; - z = u.f; - for (i = 0; i < 2; i++) { - tx[i] = (double)(INT32)z; - z = (z - tx[i]) * 0x1p24; - } - tx[i] = z; - /* skip zero terms, first term is non-zero */ - while (tx[i] == 0.0) - i--; - n = __rem_pio2_large(tx, ty, (int)(ix >> 20) - (0x3ff + 23), i + 1, 1); - if (sign) { - y[0] = -ty[0]; - y[1] = -ty[1]; - return -n; - } - y[0] = ty[0]; - y[1] = ty[1]; - return n; -} +extern int __rem_pio2(double x, double *y);

/* Copied from musl: src/math/__sin.c */ static double __sin(double x, double y, int iy) @@ -4962,14 +4562,6 @@ void __cdecl __libm_sse2_sqrt_precise(void)

#if _MSVCR_VER>=120

-/********************************************************************* - * rint (MSVCR120.@) - */ -double CDECL rint(double x) -{ - return __rint(x); -} - /********************************************************************* * rintf (MSVCR120.@) * diff --git a/libs/musl/src/math/__rem_pio2.c b/libs/musl/src/math/__rem_pio2.c index dcf672fbd70..3addef65b35 100644 --- a/libs/musl/src/math/__rem_pio2.c +++ b/libs/musl/src/math/__rem_pio2.c @@ -119,7 +119,7 @@ int __rem_pio2(double x, double *y) if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */ medium: /* rint(x/(pi/2)) */ - fn = (double_t)x*invpio2 + toint - toint; + fn = rint(x * invpio2); n = (int32_t)fn; r = x - fn*pio2_1; w = fn*pio2_1t; /* 1st round, good to 85 bits */ diff --git a/libs/musl/src/math/__rem_pio2f.c b/libs/musl/src/math/__rem_pio2f.c index e67656431a8..e36fb47f75c 100644 --- a/libs/musl/src/math/__rem_pio2f.c +++ b/libs/musl/src/math/__rem_pio2f.c @@ -52,7 +52,7 @@ int __rem_pio2f(float x, double *y) /* 25+53 bit pi is good enough for medium size */ if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */ /* Use a specialized rint() to get fn. */ - fn = (double_t)x*invpio2 + toint - toint; + fn = fp_barrier(x * invpio2 + toint) - toint; n = (int32_t)fn; *y = x - fn*pio2_1 - fn*pio2_1t; /* Matters with directed rounding. */

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2608

Alexandre Julliard

11:17 a.m.

New subject: [PATCH 07/12] msvcrt: Use the __sindf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

With the changes from ee7b5ebc3a7843fc2997d5cc5f7784c4d51b4f8d. --- dlls/msvcrt/math.c | 20 +------------------- libs/musl/src/math/__sindf.c | 10 ++++++---- 2 files changed, 7 insertions(+), 23 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index befd05e9ad0..8fa904b1cc9 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -219,25 +219,7 @@ float CDECL _chgsignf( float num ) #endif

#ifndef __i386__ -/* Copied from musl: src/math/__sindf.c */ -static float __sindf(double x) -{ - static const double S1 = -0x1.5555555555555p-3, - S2 = 0x1.1111111111111p-7, - S3 = -0x1.a01a01a01a01ap-13, - S4 = 0x1.71de3a556c734p-19; - - double r, s, w, z; - - z = x * x; - if (x > -7.8175831586122513e-03 && x < 7.8175831586122513e-03) - return x * (1 + S1 * z); - - w = z * z; - r = S3 + z * S4; - s = z * x; - return (x + s * (S1 + z * S2)) + s * w * r; -} +extern float __sindf(double x);

/* Copied from musl: src/math/__cosdf.c */ static float __cosdf(double x) diff --git a/libs/musl/src/math/__sindf.c b/libs/musl/src/math/__sindf.c index 8fec2a3f660..fda8a247621 100644 --- a/libs/musl/src/math/__sindf.c +++ b/libs/musl/src/math/__sindf.c @@ -18,10 +18,10 @@

/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */ static const double -S1 = -0x15555554cbac77.0p-55, /* -0.166666666416265235595 */ -S2 = 0x111110896efbb2.0p-59, /* 0.0083333293858894631756 */ -S3 = -0x1a00f9e2cae774.0p-65, /* -0.000198393348360966317347 */ -S4 = 0x16cd878c3b46a7.0p-71; /* 0.0000027183114939898219064 */ +S1 = -0x1.5555555555555p-3, +S2 = 0x1.1111111111111p-7, +S3 = -0x1.a01a01a01a01ap-13, +S4 = 0x1.71de3a556c734p-19;

float __sindf(double x) { @@ -29,6 +29,8 @@ float __sindf(double x)

/* Try to optimize for parallel evaluation as in __tandf.c. */ z = x*x; + if (x > -7.8175831586122513e-03 && x < 7.8175831586122513e-03) + return x * (1 + S1 * z); w = z*z; r = S3 + z*S4; s = z*x;

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2608

Alexandre Julliard

11:17 a.m.

New subject: [PATCH 08/12] msvcrt: Use the __cosdf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

With the changes from 9008cd2f2437650ad41ce8a8924ed1828ca21889. --- dlls/msvcrt/math.c | 17 +---------------- libs/musl/src/math/__cosdf.c | 18 +++++++++--------- 2 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 8fa904b1cc9..684a4807489 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -220,22 +220,7 @@ float CDECL _chgsignf( float num )

#ifndef __i386__ extern float __sindf(double x); - -/* Copied from musl: src/math/__cosdf.c */ -static float __cosdf(double x) -{ - static const double C0 = -0x1.0000000000000p-1, - C1 = 0x1.5555555555555p-5, - C2 = -0x1.6c16c16c16c17p-10, - C3 = 0x1.a01a01a01a01ap-16, - C4 = -0x1.27e4fb7789f5cp-22; - double z; - - z = x * x; - if (x > -7.8163146972656250e-03 && x < 7.8163146972656250e-03) - return 1 + C0 * z; - return 1.0 + z * (C0 + z * (C1 + z * (C2 + z * (C3 + z * C4)))); -} +extern float __cosdf(double x);

static const UINT64 exp2f_T[] = { 0x3ff0000000000000ULL, 0x3fefd9b0d3158574ULL, 0x3fefb5586cf9890fULL, 0x3fef9301d0125b51ULL, diff --git a/libs/musl/src/math/__cosdf.c b/libs/musl/src/math/__cosdf.c index 2124989b329..a84a50c6980 100644 --- a/libs/musl/src/math/__cosdf.c +++ b/libs/musl/src/math/__cosdf.c @@ -18,18 +18,18 @@

/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */ static const double -C0 = -0x1ffffffd0c5e81.0p-54, /* -0.499999997251031003120 */ -C1 = 0x155553e1053a42.0p-57, /* 0.0416666233237390631894 */ -C2 = -0x16c087e80f1e27.0p-62, /* -0.00138867637746099294692 */ -C3 = 0x199342e0ee5069.0p-68; /* 0.0000243904487962774090654 */ +C0 = -0x1.0000000000000p-1, +C1 = 0x1.5555555555555p-5, +C2 = -0x1.6c16c16c16c17p-10, +C3 = 0x1.a01a01a01a01ap-16, +C4 = -0x1.27e4fb7789f5cp-22;

float __cosdf(double x) { - double_t r, w, z; + double_t z;

- /* Try to optimize for parallel evaluation as in __tandf.c. */ z = x*x; - w = z*z; - r = C2+z*C3; - return ((1.0+z*C0) + w*C1) + (w*z)*r; + if (x > -7.8163146972656250e-03 && x < 7.8163146972656250e-03) + return 1 + C0 * z; + return 1.0 + z * (C0 + z * (C1 + z * (C2 + z * (C3 + z * C4)))); }

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2608

Alexandre Julliard

11:17 a.m.

New subject: [PATCH 09/12] msvcrt: Use the sin()/sinf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

--- dlls/msvcrt/math.c | 103 -------------------------------------- libs/musl/src/math/sin.c | 2 + libs/musl/src/math/sinf.c | 2 + 3 files changed, 4 insertions(+), 103 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 684a4807489..d676f813055 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -787,69 +787,6 @@ float CDECL powf( float x, float y ) return powf_exp2(ylogx, sign_bias); }

-/********************************************************************* - * sinf (MSVCRT.@) - * - * Copied from musl: src/math/sinf.c - */ -float CDECL sinf( float x ) -{ - static const double s1pio2 = 1*M_PI_2, - s2pio2 = 2*M_PI_2, - s3pio2 = 3*M_PI_2, - s4pio2 = 4*M_PI_2; - - double y; - UINT32 ix; - int n, sign; - - ix = *(UINT32*)&x; - sign = ix >> 31; - ix &= 0x7fffffff; - - if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */ - if (ix < 0x39800000) { /* |x| < 2**-12 */ - /* raise inexact if x!=0 and underflow if subnormal */ - fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f); - return x; - } - return __sindf(x); - } - if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */ - if (ix <= 0x4016cbe3) { /* |x| ~<= 3pi/4 */ - if (sign) - return -__cosdf(x + s1pio2); - else - return __cosdf(x - s1pio2); - } - return __sindf(sign ? -(x + s2pio2) : -(x - s2pio2)); - } - if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */ - if (ix <= 0x40afeddf) { /* |x| ~<= 7*pi/4 */ - if (sign) - return __cosdf(x + s3pio2); - else - return -__cosdf(x - s3pio2); - } - return __sindf(sign ? x + s4pio2 : x - s4pio2); - } - - /* sin(Inf or NaN) is NaN */ - if (isinf(x)) - return math_error(_DOMAIN, "sinf", x, 0, x - x); - if (ix >= 0x7f800000) - return x - x; - - /* general argument reduction needed */ - n = __rem_pio2f(x, &y); - switch (n&3) { - case 0: return __sindf(y); - case 1: return __cosdf(y); - case 2: return __sindf(-y); - default: return -__cosdf(y); - } -} - static BOOL sqrtf_validate( float *x ) { short c = _fdclass(*x); @@ -2117,46 +2054,6 @@ double CDECL pow( double x, double y ) return pow_exp(x, y, ehi, elo, sign_bias); }

-/********************************************************************* - * sin (MSVCRT.@) - * - * Copied from musl: src/math/sin.c - */ -double CDECL sin( double x ) -{ - double y[2]; - UINT32 ix; - unsigned n; - - ix = *(ULONGLONG*)&x >> 32; - ix &= 0x7fffffff; - - /* |x| ~< pi/4 */ - if (ix <= 0x3fe921fb) { - if (ix < 0x3e500000) { /* |x| < 2**-26 */ - /* raise inexact if x != 0 and underflow if subnormal*/ - fp_barrier(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f); - return x; - } - return __sin(x, 0.0, 0); - } - - /* sin(Inf or NaN) is NaN */ - if (isinf(x)) - return math_error(_DOMAIN, "sin", x, 0, x - x); - if (ix >= 0x7ff00000) - return x - x; - - /* argument reduction needed */ - n = __rem_pio2(x, y); - switch (n&3) { - case 0: return __sin(y[0], y[1], 1); - case 1: return __cos(y[0], y[1]); - case 2: return -__sin(y[0], y[1], 1); - default: return -__cos(y[0], y[1]); - } -} - static BOOL sqrt_validate( double *x, BOOL update_sw ) { short c = _dclass(*x); diff --git a/libs/musl/src/math/sin.c b/libs/musl/src/math/sin.c index fac84f2b27f..4bbc2ee6d85 100644 --- a/libs/musl/src/math/sin.c +++ b/libs/musl/src/math/sin.c @@ -63,6 +63,8 @@ double __cdecl sin(double x) }

/* sin(Inf or NaN) is NaN */ + if (isinf(x)) + return math_error(_DOMAIN, "sin", x, 0, x - x); if (ix >= 0x7ff00000) return x - x;

diff --git a/libs/musl/src/math/sinf.c b/libs/musl/src/math/sinf.c index de0745649c8..5565b7760ab 100644 --- a/libs/musl/src/math/sinf.c +++ b/libs/musl/src/math/sinf.c @@ -61,6 +61,8 @@ float __cdecl sinf(float x) }

/* sin(Inf or NaN) is NaN */ + if (isinf(x)) + return math_error(_DOMAIN, "sinf", x, 0, x - x); if (ix >= 0x7f800000) return x - x;

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2608

Alexandre Julliard

11:17 a.m.

New subject: [PATCH 10/12] msvcrt: Use the cos()/cosf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

--- dlls/msvcrt/math.c | 147 -------------------------------------- libs/musl/src/math/cos.c | 2 + libs/musl/src/math/cosf.c | 2 + 3 files changed, 4 insertions(+), 147 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index d676f813055..8b5f8ee3099 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -219,9 +219,6 @@ float CDECL _chgsignf( float num ) #endif

#ifndef __i386__ -extern float __sindf(double x); -extern float __cosdf(double x); - static const UINT64 exp2f_T[] = { 0x3ff0000000000000ULL, 0x3fefd9b0d3158574ULL, 0x3fefb5586cf9890fULL, 0x3fef9301d0125b51ULL, 0x3fef72b83c7d517bULL, 0x3fef54873168b9aaULL, 0x3fef387a6e756238ULL, 0x3fef1e9df51fdee1ULL, @@ -494,70 +491,6 @@ float CDECL atanf( float x )

extern int __rem_pio2f(float x, double *y);

-/********************************************************************* - * cosf (MSVCRT.@) - * - * Copied from musl: src/math/cosf.c - */ -float CDECL cosf( float x ) -{ - static const double c1pio2 = 1*M_PI_2, - c2pio2 = 2*M_PI_2, - c3pio2 = 3*M_PI_2, - c4pio2 = 4*M_PI_2; - - double y; - UINT32 ix; - unsigned n, sign; - - ix = *(UINT32*)&x; - sign = ix >> 31; - ix &= 0x7fffffff; - - if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */ - if (ix < 0x39800000) { /* |x| < 2**-12 */ - /* raise inexact if x != 0 */ - fp_barrierf(x + 0x1p120f); - return 1.0f; - } - return __cosdf(x); - } - if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */ - if (ix > 0x4016cbe3) /* |x| ~> 3*pi/4 */ - return -__cosdf(sign ? x + c2pio2 : x - c2pio2); - else { - if (sign) - return __sindf(x + c1pio2); - else - return __sindf(c1pio2 - x); - } - } - if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */ - if (ix > 0x40afeddf) /* |x| ~> 7*pi/4 */ - return __cosdf(sign ? x + c4pio2 : x - c4pio2); - else { - if (sign) - return __sindf(-x - c3pio2); - else - return __sindf(x - c3pio2); - } - } - - /* cos(Inf or NaN) is NaN */ - if (isinf(x)) return math_error(_DOMAIN, "cosf", x, 0, x - x); - if (ix >= 0x7f800000) - return x - x; - - /* general argument reduction needed */ - n = __rem_pio2f(x, &y); - switch (n & 3) { - case 0: return __cosdf(y); - case 1: return __sindf(-y); - case 2: return -__cosdf(y); - default: return __sindf(y); - } -} - /********************************************************************* * expf (MSVCRT.@) */ @@ -1309,86 +1242,6 @@ double CDECL rint(double x)

extern int __rem_pio2(double x, double *y);

-/* Copied from musl: src/math/__sin.c */ -static double __sin(double x, double y, int iy) -{ - static const double S1 = -1.66666666666666324348e-01, - S2 = 8.33333333332248946124e-03, - S3 = -1.98412698298579493134e-04, - S4 = 2.75573137070700676789e-06, - S5 = -2.50507602534068634195e-08, - S6 = 1.58969099521155010221e-10; - - double z, r, v, w; - - z = x * x; - w = z * z; - r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6); - v = z * x; - if (iy == 0) - return x + v * (S1 + z * r); - else - return x - ((z * (0.5 * y - v * r) - y) - v * S1); -} - -/* Copied from musl: src/math/__cos.c */ -static double __cos(double x, double y) -{ - static const double C1 = 4.16666666666666019037e-02, - C2 = -1.38888888888741095749e-03, - C3 = 2.48015872894767294178e-05, - C4 = -2.75573143513906633035e-07, - C5 = 2.08757232129817482790e-09, - C6 = -1.13596475577881948265e-11; - double hz, z, r, w; - - z = x * x; - w = z * z; - r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6)); - hz = 0.5 * z; - w = 1.0 - hz; - return w + (((1.0 - w) - hz) + (z * r - x * y)); -} - -/********************************************************************* - * cos (MSVCRT.@) - * - * Copied from musl: src/math/cos.c - */ -double CDECL cos( double x ) -{ - double y[2]; - UINT32 ix; - unsigned n; - - ix = *(ULONGLONG*)&x >> 32; - ix &= 0x7fffffff; - - /* |x| ~< pi/4 */ - if (ix <= 0x3fe921fb) { - if (ix < 0x3e46a09e) { /* |x| < 2**-27 * sqrt(2) */ - /* raise inexact if x!=0 */ - fp_barrier(x + 0x1p120f); - return 1.0; - } - return __cos(x, 0); - } - - /* cos(Inf or NaN) is NaN */ - if (isinf(x)) return math_error(_DOMAIN, "cos", x, 0, x - x); - if (ix >= 0x7ff00000) - return x - x; - - /* argument reduction */ - n = __rem_pio2(x, y); - switch (n & 3) { - case 0: return __cos(y[0], y[1]); - case 1: return -__sin(y[0], y[1], 1); - case 2: return -__cos(y[0], y[1]); - default: return __sin(y[0], y[1], 1); - } -} - /* Copied from musl: src/math/exp_data.c */ static const UINT64 exp_T[] = { 0x0ULL, 0x3ff0000000000000ULL, diff --git a/libs/musl/src/math/cos.c b/libs/musl/src/math/cos.c index 0fae2287270..eb5c2a475e9 100644 --- a/libs/musl/src/math/cos.c +++ b/libs/musl/src/math/cos.c @@ -62,6 +62,8 @@ double __cdecl cos(double x) }

/* cos(Inf or NaN) is NaN */ + if (isinf(x)) + return math_error(_DOMAIN, "cos", x, 0, x - x); if (ix >= 0x7ff00000) return x-x;

diff --git a/libs/musl/src/math/cosf.c b/libs/musl/src/math/cosf.c index 25f4da07409..a2d7b149c02 100644 --- a/libs/musl/src/math/cosf.c +++ b/libs/musl/src/math/cosf.c @@ -63,6 +63,8 @@ float __cdecl cosf(float x) }

/* cos(Inf or NaN) is NaN */ + if (isinf(x)) + return math_error(_DOMAIN, "cosf", x, 0, x - x); if (ix >= 0x7f800000) return x-x;

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2608

Alexandre Julliard

11:17 a.m.

New subject: [PATCH 11/12] msvcrt: Use the tan()/tanf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

--- dlls/msvcrt/math.c | 171 -------------------------------------- libs/musl/src/math/tan.c | 2 + libs/musl/src/math/tanf.c | 2 + 3 files changed, 4 insertions(+), 171 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 8b5f8ee3099..71d193290de 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -489,8 +489,6 @@ float CDECL atanf( float x ) return sign ? -z : z; }

-extern int __rem_pio2f(float x, double *y); - /********************************************************************* * expf (MSVCRT.@) */ @@ -813,82 +811,6 @@ float CDECL sqrtf( float x ) #endif }

-/* Copied from musl: src/math/__tandf.c */ -static float __tandf(double x, int odd) -{ - static const double T[] = { - 0x15554d3418c99f.0p-54, - 0x1112fd38999f72.0p-55, - 0x1b54c91d865afe.0p-57, - 0x191df3908c33ce.0p-58, - 0x185dadfcecf44e.0p-61, - 0x1362b9bf971bcd.0p-59, - }; - - double z, r, w, s, t, u; - - z = x * x; - r = T[4] + z * T[5]; - t = T[2] + z * T[3]; - w = z * z; - s = z * x; - u = T[0] + z * T[1]; - r = (x + s * u) + (s * w) * (t + w * r); - return odd ? -1.0 / r : r; -} - -/********************************************************************* - * tanf (MSVCRT.@) - * - * Copied from musl: src/math/tanf.c - */ -float CDECL tanf( float x ) -{ - static const double t1pio2 = 1*M_PI_2, - t2pio2 = 2*M_PI_2, - t3pio2 = 3*M_PI_2, - t4pio2 = 4*M_PI_2; - - double y; - UINT32 ix; - unsigned n, sign; - - ix = *(UINT32*)&x; - sign = ix >> 31; - ix &= 0x7fffffff; - - if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */ - if (ix < 0x39800000) { /* |x| < 2**-12 */ - /* raise inexact if x!=0 and underflow if subnormal */ - fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f); - return x; - } - return __tandf(x, 0); - } - if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */ - if (ix <= 0x4016cbe3) /* |x| ~<= 3pi/4 */ - return __tandf((sign ? x + t1pio2 : x - t1pio2), 1); - else - return __tandf((sign ? x + t2pio2 : x - t2pio2), 0); - } - if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */ - if (ix <= 0x40afeddf) /* |x| ~<= 7*pi/4 */ - return __tandf((sign ? x + t3pio2 : x - t3pio2), 1); - else - return __tandf((sign ? x + t4pio2 : x - t4pio2), 0); - } - - /* tan(Inf or NaN) is NaN */ - if (isinf(x)) - return math_error(_DOMAIN, "tanf", x, 0, x - x); - if (ix >= 0x7f800000) - return x - x; - - /* argument reduction */ - n = __rem_pio2f(x, &y); - return __tandf(y, n & 1); -} - /********************************************************************* * tanhf (MSVCRT.@) */ @@ -1240,8 +1162,6 @@ double CDECL rint(double x) return y; }

-extern int __rem_pio2(double x, double *y); - /* Copied from musl: src/math/exp_data.c */ static const UINT64 exp_T[] = { 0x0ULL, 0x3ff0000000000000ULL, @@ -2068,97 +1988,6 @@ double CDECL sqrt( double x ) #endif }

-/* Copied from musl: src/math/__tan.c */ -static double __tan(double x, double y, int odd) -{ - static const double T[] = { - 3.33333333333334091986e-01, - 1.33333333333201242699e-01, - 5.39682539762260521377e-02, - 2.18694882948595424599e-02, - 8.86323982359930005737e-03, - 3.59207910759131235356e-03, - 1.45620945432529025516e-03, - 5.88041240820264096874e-04, - 2.46463134818469906812e-04, - 7.81794442939557092300e-05, - 7.14072491382608190305e-05, - -1.85586374855275456654e-05, - 2.59073051863633712884e-05, - }; - static const double pio4 = 7.85398163397448278999e-01; - static const double pio4lo = 3.06161699786838301793e-17; - - double z, r, v, w, s, a, w0, a0; - UINT32 hx; - int big, sign; - - hx = *(ULONGLONG*)&x >> 32; - big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */ - if (big) { - sign = hx >> 31; - if (sign) { - x = -x; - y = -y; - } - x = (pio4 - x) + (pio4lo - y); - y = 0.0; - } - z = x * x; - w = z * z; - r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11])))); - v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12]))))); - s = z * x; - r = y + z * (s * (r + v) + y) + s * T[0]; - w = x + r; - if (big) { - s = 1 - 2 * odd; - v = s - 2.0 * (x + (r - w * w / (w + s))); - return sign ? -v : v; - } - if (!odd) - return w; - /* -1.0/(x+r) has up to 2ulp error, so compute it accurately */ - w0 = w; - *(LONGLONG*)&w0 = *(LONGLONG*)&w0 & 0xffffffff00000000ULL; - v = r - (w0 - x); /* w0+v = r+x */ - a0 = a = -1.0 / w; - *(LONGLONG*)&a0 = *(LONGLONG*)&a0 & 0xffffffff00000000ULL; - return a0 + a * (1.0 + a0 * w0 + a0 * v); -} - -/********************************************************************* - * tan (MSVCRT.@) - * - * Copied from musl: src/math/tan.c - */ -double CDECL tan( double x ) -{ - double y[2]; - UINT32 ix; - unsigned n; - - ix = *(ULONGLONG*)&x >> 32; - ix &= 0x7fffffff; - - if (ix <= 0x3fe921fb) { /* |x| ~< pi/4 */ - if (ix < 0x3e400000) { /* |x| < 2**-27 */ - /* raise inexact if x!=0 and underflow if subnormal */ - fp_barrier(ix < 0x00100000 ? x / 0x1p120f : x + 0x1p120f); - return x; - } - return __tan(x, 0.0, 0); - } - - if (isinf(x)) - return math_error(_DOMAIN, "tan", x, 0, x - x); - if (ix >= 0x7ff00000) - return x - x; - - n = __rem_pio2(x, y); - return __tan(y[0], y[1], n & 1); -} - /********************************************************************* * tanh (MSVCRT.@) */ diff --git a/libs/musl/src/math/tan.c b/libs/musl/src/math/tan.c index 22f06f14603..895976d8327 100644 --- a/libs/musl/src/math/tan.c +++ b/libs/musl/src/math/tan.c @@ -61,6 +61,8 @@ double __cdecl tan(double x) }

/* tan(Inf or NaN) is NaN */ + if (isinf(x)) + return math_error(_DOMAIN, "tan", x, 0, x - x); if (ix >= 0x7ff00000) return x - x;

diff --git a/libs/musl/src/math/tanf.c b/libs/musl/src/math/tanf.c index d3849734e08..99e2a5c8bac 100644 --- a/libs/musl/src/math/tanf.c +++ b/libs/musl/src/math/tanf.c @@ -55,6 +55,8 @@ float __cdecl tanf(float x) }

/* tan(Inf or NaN) is NaN */ + if (isinf(x)) + return math_error(_DOMAIN, "tanf", x, 0, x - x); if (ix >= 0x7f800000) return x - x;

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2608

Alexandre Julliard

11:17 a.m.

New subject: [PATCH 12/12] msvcrt: Use the pow()/powf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

--- dlls/msvcrt/math.c | 597 -------------------------------------- libs/musl/src/math/pow.c | 42 ++- libs/musl/src/math/powf.c | 21 +- 3 files changed, 25 insertions(+), 635 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 71d193290de..1e67ced4360 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -540,184 +540,6 @@ float CDECL expf( float x ) return y; }

-/* Subnormal input is normalized so ix has negative biased exponent. - Output is multiplied by POWF_SCALE (where 1 << 5). */ -static double powf_log2(UINT32 ix) -{ - static const struct { - double invc, logc; - } T[] = { - { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * (1 << 5) }, - { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * (1 << 5) }, - { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * (1 << 5) }, - { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * (1 << 5) }, - { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * (1 << 5) }, - { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * (1 << 5) }, - { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * (1 << 5) }, - { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * (1 << 5) }, - { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * (1 << 5) }, - { 0x1p+0, 0x0p+0 * (1 << 4) }, - { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * (1 << 5) }, - { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * (1 << 5) }, - { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * (1 << 5) }, - { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * (1 << 5) }, - { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * (1 << 5) }, - { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * (1 << 5) } - }; - static const double A[] = { - 0x1.27616c9496e0bp-2 * (1 << 5), -0x1.71969a075c67ap-2 * (1 << 5), - 0x1.ec70a6ca7baddp-2 * (1 << 5), -0x1.7154748bef6c8p-1 * (1 << 5), - 0x1.71547652ab82bp0 * (1 << 5) - }; - - double z, r, r2, r4, p, q, y, y0, invc, logc; - UINT32 iz, top, tmp; - int k, i; - - /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. - The range is split into N subintervals. - The ith subinterval contains z and c is near its center. */ - tmp = ix - 0x3f330000; - i = (tmp >> (23 - 4)) % (1 << 4); - top = tmp & 0xff800000; - iz = ix - top; - k = (INT32)top >> (23 - 5); /* arithmetic shift */ - invc = T[i].invc; - logc = T[i].logc; - z = *(float*)&iz; - - /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */ - r = z * invc - 1; - y0 = logc + (double)k; - - /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */ - r2 = r * r; - y = A[0] * r + A[1]; - p = A[2] * r + A[3]; - r4 = r2 * r2; - q = A[4] * r + y0; - q = p * r2 + q; - y = y * r4 + q; - return y; -} - -/* The output of log2 and thus the input of exp2 is either scaled by N - (in case of fast toint intrinsics) or not. The unscaled xd must be - in [-1021,1023], sign_bias sets the sign of the result. */ -static float powf_exp2(double xd, UINT32 sign_bias) -{ - static const double C[] = { - 0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5), - 0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5), - 0x1.62e42ff0c52d6p-1 / (1 << 5) - }; - - UINT64 ki, ski, t; - double kd, z, r, r2, y, s; - - /* N*x = k + r with r in [-1/2, 1/2] */ - kd = round(xd); /* k */ - ki = (INT64)kd; - r = xd - kd; - - /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ - t = exp2f_T[ki % (1 << 5)]; - ski = ki + sign_bias; - t += ski << (52 - 5); - s = *(double*)&t; - z = C[0] * r + C[1]; - r2 = r * r; - y = C[2] * r + 1; - y = z * r2 + y; - y = y * s; - return y; -} - -/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is - the bit representation of a non-zero finite floating-point value. */ -static int powf_checkint(UINT32 iy) -{ - int e = iy >> 23 & 0xff; - if (e < 0x7f) - return 0; - if (e > 0x7f + 23) - return 2; - if (iy & ((1 << (0x7f + 23 - e)) - 1)) - return 0; - if (iy & (1 << (0x7f + 23 - e))) - return 1; - return 2; -} - -/********************************************************************* - * powf (MSVCRT.@) - * - * Copied from musl: src/math/powf.c src/math/powf_data.c - */ -float CDECL powf( float x, float y ) -{ - UINT32 sign_bias = 0; - UINT32 ix, iy; - double logx, ylogx; - - ix = *(UINT32*)&x; - iy = *(UINT32*)&y; - if (ix - 0x00800000 >= 0x7f800000 - 0x00800000 || - 2 * iy - 1 >= 2u * 0x7f800000 - 1) { - /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */ - if (2 * iy - 1 >= 2u * 0x7f800000 - 1) { - if (2 * iy == 0) - return 1.0f; - if (ix == 0x3f800000) - return 1.0f; - if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000) - return x + y; - if (2 * ix == 2 * 0x3f800000) - return 1.0f; - if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000)) - return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */ - return y * y; - } - if (2 * ix - 1 >= 2u * 0x7f800000 - 1) { - float x2 = x * x; - if (ix & 0x80000000 && powf_checkint(iy) == 1) - x2 = -x2; - if (iy & 0x80000000 && x2 == 0.0) - return math_error(_SING, "powf", x, y, 1 / x2); - /* Without the barrier some versions of clang hoist the 1/x2 and - thus division by zero exception can be signaled spuriously. */ - return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2; - } - /* x and y are non-zero finite. */ - if (ix & 0x80000000) { - /* Finite x < 0. */ - int yint = powf_checkint(iy); - if (yint == 0) - return math_error(_DOMAIN, "powf", x, y, 0 / (x - x)); - if (yint == 1) - sign_bias = 1 << (5 + 11); - ix &= 0x7fffffff; - } - if (ix < 0x00800000) { - /* Normalize subnormal x so exponent becomes negative. */ - x *= 0x1p23f; - ix = *(UINT32*)&x; - ix &= 0x7fffffff; - ix -= 23 << 23; - } - } - logx = powf_log2(ix); - ylogx = y * logx; /* cannot overflow, y is single prec. */ - if ((*(UINT64*)&ylogx >> 47 & 0xffff) >= 0x40af800000000000llu >> 47) { - /* |y*log(x)| >= 126. */ - if (ylogx > 0x1.fffffffd1d571p+6 * (1 << 5)) - return math_error(_OVERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) * 0x1p1023); - if (ylogx <= -150.0 * (1 << 5)) - return math_error(_UNDERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) / 0x1p1023); - } - return powf_exp2(ylogx, sign_bias); -} - static BOOL sqrtf_validate( float *x ) { short c = _fdclass(*x); @@ -1408,425 +1230,6 @@ double CDECL exp( double x ) return scale + scale * tmp; }

-/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about - additional 15 bits precision. IX is the bit representation of x, but - normalized in the subnormal range using the sign bit for the exponent. */ -static double pow_log(UINT64 ix, double *tail) -{ - static const struct { - double invc, logc, logctail; - } T[] = { - {0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48}, - {0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46}, - {0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45}, - {0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49}, - {0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47}, - {0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46}, - {0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50}, - {0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45}, - {0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45}, - {0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45}, - {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46}, - {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46}, - {0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46}, - {0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46}, - {0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46}, - {0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45}, - {0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47}, - {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48}, - {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48}, - {0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47}, - {0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45}, - {0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46}, - {0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45}, - {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45}, - {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45}, - {0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46}, - {0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52}, - {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45}, - {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45}, - {0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45}, - {0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45}, - {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45}, - {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45}, - {0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46}, - {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46}, - {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46}, - {0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45}, - {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46}, - {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46}, - {0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48}, - {0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45}, - {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45}, - {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45}, - {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47}, - {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47}, - {0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45}, - {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45}, - {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45}, - {0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46}, - {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45}, - {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45}, - {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46}, - {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46}, - {0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45}, - {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46}, - {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46}, - {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45}, - {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45}, - {0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46}, - {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45}, - {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45}, - {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46}, - {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46}, - {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45}, - {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45}, - {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48}, - {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48}, - {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45}, - {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45}, - {0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45}, - {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50}, - {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50}, - {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46}, - {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46}, - {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0}, - {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0}, - {0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46}, - {0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45}, - {0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45}, - {0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47}, - {0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45}, - {0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46}, - {0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46}, - {0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47}, - {0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45}, - {0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45}, - {0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45}, - {0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49}, - {0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45}, - {0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46}, - {0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45}, - {0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45}, - {0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45}, - {0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45}, - {0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45}, - {0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47}, - {0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51}, - {0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45}, - {0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45}, - {0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46}, - {0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45}, - {0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46}, - {0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47}, - {0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47}, - {0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45}, - {0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47}, - {0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45}, - {0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48}, - {0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45}, - {0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51}, - {0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51}, - {0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46}, - {0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48}, - {0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45}, - {0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45}, - {0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45}, - {0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45}, - {0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47}, - {0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45}, - {0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45}, - {0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46}, - {0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46}, - {0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47}, - {0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45}, - {0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45}, - {0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45}, - {0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46}, - {0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47}, - }; - static const double A[] = { - -0x1p-1, - 0x1.555555555556p-2 * -2, - -0x1.0000000000006p-2 * -2, - 0x1.999999959554ep-3 * 4, - -0x1.555555529a47ap-3 * 4, - 0x1.2495b9b4845e9p-3 * -8, - -0x1.0002b8b263fc3p-3 * -8 - }; - static const double ln2hi = 0x1.62e42fefa3800p-1, - ln2lo = 0x1.ef35793c76730p-45; - - double z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p; - double zhi, zlo, rhi, rlo, ar, ar2, ar3, lo3, lo4, arhi, arhi2; - UINT64 iz, tmp; - int k, i; - - /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. - The range is split into N subintervals. - The ith subinterval contains z and c is near its center. */ - tmp = ix - 0x3fe6955500000000ULL; - i = (tmp >> (52 - 7)) % (1 << 7); - k = (INT64)tmp >> 52; /* arithmetic shift */ - iz = ix - (tmp & 0xfffULL << 52); - z = *(double*)&iz; - kd = k; - - /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */ - invc = T[i].invc; - logc = T[i].logc; - logctail = T[i].logctail; - - /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and - |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */ - /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */ - iz = (iz + (1ULL << 31)) & (-1ULL << 32); - zhi = *(double*)&iz; - zlo = z - zhi; - rhi = zhi * invc - 1.0; - rlo = zlo * invc; - r = rhi + rlo; - - /* k*Ln2 + log(c) + r. */ - t1 = kd * ln2hi + logc; - t2 = t1 + r; - lo1 = kd * ln2lo + logctail; - lo2 = t1 - t2 + r; - - /* Evaluation is optimized assuming superscalar pipelined execution. */ - ar = A[0] * r; /* A[0] = -0.5. */ - ar2 = r * ar; - ar3 = r * ar2; - /* k*Ln2 + log(c) + r + A[0]*r*r. */ - arhi = A[0] * rhi; - arhi2 = rhi * arhi; - hi = t2 + arhi2; - lo3 = rlo * (ar + arhi); - lo4 = t2 - hi + arhi2; - /* p = log1p(r) - r - A[0]*r*r. */ - p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6])))); - lo = lo1 + lo2 + lo3 + lo4 + p; - y = hi + lo; - *tail = hi - y + lo; - return y; -} - -/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. - The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */ -static double pow_exp(double argx, double argy, double x, double xtail, UINT32 sign_bias) -{ - static const double C[] = { - 0x1.ffffffffffdbdp-2, - 0x1.555555555543cp-3, - 0x1.55555cf172b91p-5, - 0x1.1111167a4d017p-7 - }; - static const double invln2N = 0x1.71547652b82fep0 * (1 << 7), - negln2hiN = -0x1.62e42fefa0000p-8, - negln2loN = -0x1.cf79abc9e3b3ap-47; - - UINT32 abstop; - UINT64 ki, idx, top, sbits; - double kd, z, r, r2, scale, tail, tmp; - - abstop = (*(UINT64*)&x >> 52) & 0x7ff; - if (abstop - 0x3c9 >= 0x408 - 0x3c9) { - if (abstop - 0x3c9 >= 0x80000000) { - /* Avoid spurious underflow for tiny x. */ - /* Note: 0 is common input. */ - double one = 1.0 + x; - return sign_bias ? -one : one; - } - if (abstop >= 0x409) { - /* Note: inf and nan are already handled. */ - if (*(UINT64*)&x >> 63) - return math_error(_UNDERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MIN : DBL_MIN) * DBL_MIN); - return math_error(_OVERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MAX : DBL_MAX) * DBL_MAX); - } - /* Large x is special cased below. */ - abstop = 0; - } - - /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ - /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ - z = invln2N * x; - kd = round(z); - ki = (INT64)kd; - r = x + kd * negln2hiN + kd * negln2loN; - /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ - r += xtail; - /* 2^(k/N) ~= scale * (1 + tail). */ - idx = 2 * (ki % (1 << 7)); - top = (ki + sign_bias) << (52 - 7); - tail = *(double*)&exp_T[idx]; - /* This is only a valid scale when -1023*N < k < 1024*N. */ - sbits = exp_T[idx + 1] + top; - /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */ - /* Evaluation is optimized assuming superscalar pipelined execution. */ - r2 = r * r; - /* Without fma the worst case error is 0.25/N ulp larger. */ - /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */ - tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]); - if (abstop == 0) { - /* Handle cases that may overflow or underflow when computing the result that - is scale*(1+TMP) without intermediate rounding. The bit representation of - scale is in SBITS, however it has a computed exponent that may have - overflown into the sign bit so that needs to be adjusted before using it as - a double. (int32_t)KI is the k used in the argument reduction and exponent - adjustment of scale, positive k here means the result may overflow and - negative k means the result may underflow. */ - double scale, y; - - if ((ki & 0x80000000) == 0) { - /* k > 0, the exponent of scale might have overflowed by <= 460. */ - sbits -= 1009ull << 52; - scale = *(double*)&sbits; - y = 0x1p1009 * (scale + scale * tmp); - if (isinf(y)) - return math_error(_OVERFLOW, "pow", argx, argy, y); - return y; - } - /* k < 0, need special care in the subnormal range. */ - sbits += 1022ull << 52; - /* Note: sbits is signed scale. */ - scale = *(double*)&sbits; - y = scale + scale * tmp; - if (fabs(y) < 1.0) { - /* Round y to the right precision before scaling it into the subnormal - range to avoid double rounding that can cause 0.5+E/2 ulp error where - E is the worst-case ulp error outside the subnormal range. So this - is only useful if the goal is better than 1 ulp worst-case error. */ - double hi, lo, one = 1.0; - if (y < 0.0) - one = -1.0; - lo = scale - y + scale * tmp; - hi = one + y; - lo = one - hi + y + lo; - y = hi + lo - one; - /* Fix the sign of 0. */ - if (y == 0.0) { - sbits &= 0x8000000000000000ULL; - y = *(double*)&sbits; - } - /* The underflow exception needs to be signaled explicitly. */ - fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022); - y = 0x1p-1022 * y; - return math_error(_UNDERFLOW, "pow", argx, argy, y); - } - y = 0x1p-1022 * y; - return y; - } - scale = *(double*)&sbits; - /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there - is no spurious underflow here even without fma. */ - return scale + scale * tmp; -} - -/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is - the bit representation of a non-zero finite floating-point value. */ -static inline int pow_checkint(UINT64 iy) -{ - int e = iy >> 52 & 0x7ff; - if (e < 0x3ff) - return 0; - if (e > 0x3ff + 52) - return 2; - if (iy & ((1ULL << (0x3ff + 52 - e)) - 1)) - return 0; - if (iy & (1ULL << (0x3ff + 52 - e))) - return 1; - return 2; -} - -/********************************************************************* - * pow (MSVCRT.@) - * - * Copied from musl: src/math/pow.c - */ -double CDECL pow( double x, double y ) -{ - UINT32 sign_bias = 0; - UINT64 ix, iy; - UINT32 topx, topy; - double lo, hi, ehi, elo, yhi, ylo, lhi, llo; - - ix = *(UINT64*)&x; - iy = *(UINT64*)&y; - topx = ix >> 52; - topy = iy >> 52; - if (topx - 0x001 >= 0x7ff - 0x001 || - (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) { - /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0 - and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */ - /* Special cases: (x < 0x1p-126 or inf or nan) or - (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */ - if (2 * iy - 1 >= 2 * 0x7ff0000000000000ULL - 1) { - if (2 * iy == 0) - return 1.0; - if (ix == 0x3ff0000000000000ULL) - return 1.0; - if (2 * ix > 2 * 0x7ff0000000000000ULL || - 2 * iy > 2 * 0x7ff0000000000000ULL) - return x + y; - if (2 * ix == 2 * 0x3ff0000000000000ULL) - return 1.0; - if ((2 * ix < 2 * 0x3ff0000000000000ULL) == !(iy >> 63)) - return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */ - return y * y; - } - if (2 * ix - 1 >= 2 * 0x7ff0000000000000ULL - 1) { - double x2 = x * x; - if (ix >> 63 && pow_checkint(iy) == 1) - x2 = -x2; - if (iy & 0x8000000000000000ULL && x2 == 0.0) - return math_error(_SING, "pow", x, y, 1 / x2); - /* Without the barrier some versions of clang hoist the 1/x2 and - thus division by zero exception can be signaled spuriously. */ - return iy >> 63 ? fp_barrier(1 / x2) : x2; - } - /* Here x and y are non-zero finite. */ - if (ix >> 63) { - /* Finite x < 0. */ - int yint = pow_checkint(iy); - if (yint == 0) - return math_error(_DOMAIN, "pow", x, y, 0 / (x - x)); - if (yint == 1) - sign_bias = 0x800 << 7; - ix &= 0x7fffffffffffffff; - topx &= 0x7ff; - } - if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) { - /* Note: sign_bias == 0 here because y is not odd. */ - if (ix == 0x3ff0000000000000ULL) - return 1.0; - if ((topy & 0x7ff) < 0x3be) { - /* |y| < 2^-65, x^y ~= 1 + y*log(x). */ - return ix > 0x3ff0000000000000ULL ? 1.0 + y : 1.0 - y; - } - if ((ix > 0x3ff0000000000000ULL) == (topy < 0x800)) - return math_error(_OVERFLOW, "pow", x, y, fp_barrier(DBL_MAX) * DBL_MAX); - return math_error(_UNDERFLOW, "pow", x, y, fp_barrier(DBL_MIN) * DBL_MIN); - } - if (topx == 0) { - /* Normalize subnormal x so exponent becomes negative. */ - x *= 0x1p52; - ix = *(UINT64*)&x; - ix &= 0x7fffffffffffffff; - ix -= 52ULL << 52; - } - } - - hi = pow_log(ix, &lo); - iy &= -1ULL << 27; - yhi = *(double*)&iy; - ylo = y - yhi; - *(UINT64*)&lhi = *(UINT64*)&hi & -1ULL << 27; - llo = fp_barrier(hi - lhi + lo); - ehi = yhi * lhi; - elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */ - return pow_exp(x, y, ehi, elo, sign_bias); -} - static BOOL sqrt_validate( double *x, BOOL update_sw ) { short c = _dclass(*x); diff --git a/libs/musl/src/math/pow.c b/libs/musl/src/math/pow.c index cce83687eb9..3b7815c1317 100644 --- a/libs/musl/src/math/pow.c +++ b/libs/musl/src/math/pow.c @@ -121,7 +121,7 @@ static inline double_t log_inline(uint64_t ix, double_t *tail) a double. (int32_t)KI is the k used in the argument reduction and exponent adjustment of scale, positive k here means the result may overflow and negative k means the result may underflow. */ -static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) +static inline double specialcase(double argx, double argy, double_t tmp, uint64_t sbits, uint64_t ki) { double_t scale, y;

@@ -130,6 +130,8 @@ static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) sbits -= 1009ull << 52; scale = asdouble(sbits); y = 0x1p1009 * (scale + scale * tmp); + if (isinf(y)) + return math_error(_OVERFLOW, "pow", argx, argy, y); return eval_as_double(y); } /* k < 0, need special care in the subnormal range. */ @@ -154,6 +156,8 @@ static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) y = asdouble(sbits & 0x8000000000000000); /* The underflow exception needs to be signaled explicitly. */ fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022); + y = 0x1p-1022 * y; + return math_error(_UNDERFLOW, "pow", argx, argy, y); } y = 0x1p-1022 * y; return eval_as_double(y); @@ -163,7 +167,7 @@ static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)

/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */ -static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias) +static inline double exp_inline(double argx, double argy, double_t x, double_t xtail, uint32_t sign_bias) { uint32_t abstop; uint64_t ki, idx, top, sbits; @@ -182,9 +186,9 @@ static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias) if (abstop >= top12(1024.0)) { /* Note: inf and nan are already handled. */ if (asuint64(x) >> 63) - return __math_uflow(sign_bias); + return math_error(_UNDERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MIN : DBL_MIN) * DBL_MIN); else - return __math_oflow(sign_bias); + return math_error(_OVERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MAX : DBL_MAX) * DBL_MAX); } /* Large x is special cased below. */ abstop = 0; @@ -193,20 +197,8 @@ static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias) /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ z = InvLn2N * x; -#if TOINT_INTRINSICS - kd = roundtoint(z); - ki = converttoint(z); -#elif EXP_USE_TOINT_NARROW - /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */ - kd = eval_as_double(z + Shift); - ki = asuint64(kd) >> 16; - kd = (double_t)(int32_t)ki; -#else - /* z - kd is in [-1, 1] in non-nearest rounding modes. */ - kd = eval_as_double(z + Shift); - ki = asuint64(kd); - kd -= Shift; -#endif + kd = round(z); + ki = (int64_t)kd; r = x + kd * NegLn2hiN + kd * NegLn2loN; /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ r += xtail; @@ -223,7 +215,7 @@ static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias) /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */ tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); if (predict_false(abstop == 0)) - return specialcase(tmp, sbits, ki); + return specialcase(argx, argy, tmp, sbits, ki); scale = asdouble(sbits); /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there is no spurious underflow here even without fma. */ @@ -286,6 +278,8 @@ double __cdecl pow(double x, double y) double_t x2 = x * x; if (ix >> 63 && checkint(iy) == 1) x2 = -x2; + if (iy & 0x8000000000000000ULL && x2 == 0.0) + return math_error(_SING, "pow", x, y, 1 / x2); /* Without the barrier some versions of clang hoist the 1/x2 and thus division by zero exception can be signaled spuriously. */ return iy >> 63 ? fp_barrier(1 / x2) : x2; @@ -295,7 +289,7 @@ double __cdecl pow(double x, double y) /* Finite x < 0. */ int yint = checkint(iy); if (yint == 0) - return __math_invalid(x); + return math_error(_DOMAIN, "pow", x, y, 0 / (x - x)); if (yint == 1) sign_bias = SIGN_BIAS; ix &= 0x7fffffffffffffff; @@ -313,9 +307,9 @@ double __cdecl pow(double x, double y) else return 1.0; } - return (ix > asuint64(1.0)) == (topy < 0x800) ? - __math_oflow(0) : - __math_uflow(0); + if ((ix > asuint64(1.0)) == (topy < 0x800)) + return math_error(_OVERFLOW, "pow", x, y, fp_barrier(DBL_MAX) * DBL_MAX); + return math_error(_UNDERFLOW, "pow", x, y, fp_barrier(DBL_MIN) * DBL_MIN); } if (topx == 0) { /* Normalize subnormal x so exponent becomes negative. */ @@ -339,5 +333,5 @@ double __cdecl pow(double x, double y) ehi = yhi * lhi; elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */ #endif - return exp_inline(ehi, elo, sign_bias); + return exp_inline(x, y, ehi, elo, sign_bias); } diff --git a/libs/musl/src/math/powf.c b/libs/musl/src/math/powf.c index fba6270713b..2e8576ba49f 100644 --- a/libs/musl/src/math/powf.c +++ b/libs/musl/src/math/powf.c @@ -73,19 +73,10 @@ static inline float exp2_inline(double_t xd, uint32_t sign_bias) uint64_t ki, ski, t; double_t kd, z, r, r2, y, s;

-#if TOINT_INTRINSICS #define C __exp2f_data.poly_scaled /* N*x = k + r with r in [-1/2, 1/2] */ - kd = roundtoint(xd); /* k */ - ki = converttoint(xd); -#else -#define C __exp2f_data.poly -#define SHIFT __exp2f_data.shift_scaled - /* x = k/N + r with r in [-1/(2N), 1/(2N)] */ - kd = eval_as_double(xd + SHIFT); - ki = asuint64(kd); - kd -= SHIFT; /* k/N */ -#endif + kd = round(xd); /* k */ + ki = (int64_t)kd; r = xd - kd;

/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ @@ -150,6 +141,8 @@ float __cdecl powf(float x, float y) float_t x2 = x * x; if (ix & 0x80000000 && checkint(iy) == 1) x2 = -x2; + if (iy & 0x80000000 && x2 == 0.0) + return math_error(_SING, "powf", x, y, 1 / x2); /* Without the barrier some versions of clang hoist the 1/x2 and thus division by zero exception can be signaled spuriously. */ return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2; @@ -159,7 +152,7 @@ float __cdecl powf(float x, float y) /* Finite x < 0. */ int yint = checkint(iy); if (yint == 0) - return __math_invalidf(x); + return math_error(_DOMAIN, "powf", x, y, 0 / (x - x)); if (yint == 1) sign_bias = SIGN_BIAS; ix &= 0x7fffffff; @@ -177,9 +170,9 @@ float __cdecl powf(float x, float y) asuint64(126.0 * POWF_SCALE) >> 47)) { /* |y*log(x)| >= 126. */ if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE) - return __math_oflowf(sign_bias); + return math_error(_OVERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) * 0x1p1023); if (ylogx <= -150.0 * POWF_SCALE) - return __math_uflowf(sign_bias); + return math_error(_UNDERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) / 0x1p1023); } return exp2_inline(ylogx, sign_bias); }

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2608

Piotr Caban (＠piotr)

1:57 p.m.

New subject: [PATCH 0/12] MR2608: msvcrt: Use still more functions from the bundled musl library. - approved

This merge request was approved by Piotr Caban.

-- https://gitlab.winehq.org/wine/wine/-/merge_requests/2608

942

Age (days ago)

942

Last active (days ago)

wine-gitlab@winehq.org

13 comments

3 participants

tags (0)

participants (3)

Alexandre Julliard
Alexandre Julliard (＠julliard)
Piotr Caban (＠piotr)