[PATCH 0/12] MR2601: msvcrt: Use more functions from the bundled musl library.

List overview All Threads

newer

older

[PATCH v2 0/1] MR150:...

[PATCH 0/2] MR141:...

Alexandre Julliard (＠julliard)

6 Apr 2023 6 Apr '23

12:13 p.m.

-- https://gitlab.winehq.org/wine/wine/-/merge_requests/2601

Show replies by date

Alexandre Julliard

6 Apr 6 Apr

12:13 p.m.

New subject: [PATCH 01/12] msvcrt: Use the log()/logf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

--- dlls/msvcrt/math.c | 454 -------------------------------------- libs/musl/src/math/log.c | 8 +- libs/musl/src/math/logf.c | 8 +- 3 files changed, 10 insertions(+), 460 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 1b3d380bc53..c35d997ef52 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -1084,87 +1084,6 @@ float CDECL expf( float x ) return y; }

-/********************************************************************* - * logf (MSVCRT.@) - * - * Copied from musl: src/math/logf.c src/math/logf_data.c - */ -float CDECL logf( float x ) -{ - static const double Ln2 = 0x1.62e42fefa39efp-1; - static const double A[] = { - -0x1.00ea348b88334p-2, - 0x1.5575b0be00b6ap-2, - -0x1.ffffef20a4123p-2 - }; - static const struct { - double invc, logc; - } T[] = { - { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 }, - { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 }, - { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 }, - { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 }, - { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 }, - { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 }, - { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 }, - { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 }, - { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 }, - { 0x1p+0, 0x0p+0 }, - { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 }, - { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 }, - { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 }, - { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 }, - { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 }, - { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 } - }; - - double z, r, r2, y, y0, invc, logc; - UINT32 ix, iz, tmp; - int k, i; - - ix = *(UINT32*)&x; - /* Fix sign of zero with downward rounding when x==1. */ - if (ix == 0x3f800000) - return 0; - if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) { - /* x < 0x1p-126 or inf or nan. */ - if (ix * 2 == 0) - return math_error(_SING, "logf", x, 0, (ix & 0x80000000 ? 1.0 : -1.0) / x); - if (ix == 0x7f800000) /* log(inf) == inf. */ - return x; - if (ix * 2 > 0xff000000) - return x; - if (ix & 0x80000000) - return math_error(_DOMAIN, "logf", x, 0, (x - x) / (x - x)); - /* x is subnormal, normalize it. */ - x *= 0x1p23f; - ix = *(UINT32*)&x; - ix -= 23 << 23; - } - - /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. - The range is split into N subintervals. - The ith subinterval contains z and c is near its center. */ - tmp = ix - 0x3f330000; - i = (tmp >> (23 - 4)) % (1 << 4); - k = (INT32)tmp >> 23; /* arithmetic shift */ - iz = ix - (tmp & (0x1ffu << 23)); - invc = T[i].invc; - logc = T[i].logc; - z = *(float*)&iz; - - /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */ - r = z * invc - 1; - y0 = logc + (double)k * Ln2; - - /* Pipelined polynomial evaluation to approximate log1p(r). */ - r2 = r * r; - y = A[1] * r + A[2]; - y = A[0] * r2 + y; - y = y * r2 + (y0 + r); - return y; -} - /********************************************************************* * log10f (MSVCRT.@) */ @@ -2680,379 +2599,6 @@ double CDECL exp( double x ) return scale + scale * tmp; }

-/********************************************************************* - * log (MSVCRT.@) - * - * Copied from musl: src/math/log.c src/math/log_data.c - */ -double CDECL log( double x ) -{ - static const double Ln2hi = 0x1.62e42fefa3800p-1, - Ln2lo = 0x1.ef35793c76730p-45; - static const double A[] = { - -0x1.0000000000001p-1, - 0x1.555555551305bp-2, - -0x1.fffffffeb459p-3, - 0x1.999b324f10111p-3, - -0x1.55575e506c89fp-3 - }; - static const double B[] = { - -0x1p-1, - 0x1.5555555555577p-2, - -0x1.ffffffffffdcbp-3, - 0x1.999999995dd0cp-3, - -0x1.55555556745a7p-3, - 0x1.24924a344de3p-3, - -0x1.fffffa4423d65p-4, - 0x1.c7184282ad6cap-4, - -0x1.999eb43b068ffp-4, - 0x1.78182f7afd085p-4, - -0x1.5521375d145cdp-4 - }; - static const struct { - double invc, logc; - } T[] = { - {0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2}, - {0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2}, - {0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2}, - {0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2}, - {0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2}, - {0x1.69147332f0cbap+0, -0x1.602d076180000p-2}, - {0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2}, - {0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2}, - {0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2}, - {0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2}, - {0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2}, - {0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2}, - {0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2}, - {0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2}, - {0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2}, - {0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2}, - {0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2}, - {0x1.52aff42064583p+0, -0x1.1e9e129279000p-2}, - {0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2}, - {0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2}, - {0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2}, - {0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2}, - {0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2}, - {0x1.4880524d48434p+0, -0x1.feb224586f000p-3}, - {0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3}, - {0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3}, - {0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3}, - {0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3}, - {0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3}, - {0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3}, - {0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3}, - {0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3}, - {0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3}, - {0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3}, - {0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3}, - {0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3}, - {0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3}, - {0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3}, - {0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3}, - {0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3}, - {0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3}, - {0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3}, - {0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3}, - {0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3}, - {0x1.293726014b530p+0, -0x1.31b996b490000p-3}, - {0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3}, - {0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3}, - {0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3}, - {0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3}, - {0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3}, - {0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4}, - {0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4}, - {0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4}, - {0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4}, - {0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4}, - {0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4}, - {0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4}, - {0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4}, - {0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4}, - {0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4}, - {0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4}, - {0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4}, - {0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4}, - {0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4}, - {0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5}, - {0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5}, - {0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5}, - {0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5}, - {0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5}, - {0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5}, - {0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5}, - {0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5}, - {0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6}, - {0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6}, - {0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6}, - {0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6}, - {0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7}, - {0x1.02865137932a9p+0, -0x1.419355daa0000p-7}, - {0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8}, - {0x1.008040614b195p+0, -0x1.0040979240000p-9}, - {0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9}, - {0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7}, - {0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6}, - {0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6}, - {0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5}, - {0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5}, - {0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5}, - {0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5}, - {0x1.e01e009609a56p-1, 0x1.07598e598c000p-4}, - {0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4}, - {0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4}, - {0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4}, - {0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4}, - {0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4}, - {0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4}, - {0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4}, - {0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4}, - {0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3}, - {0x1.bf583eeece73fp-1, 0x1.147858292b000p-3}, - {0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3}, - {0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3}, - {0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3}, - {0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3}, - {0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3}, - {0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3}, - {0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3}, - {0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3}, - {0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3}, - {0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3}, - {0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3}, - {0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3}, - {0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3}, - {0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3}, - {0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3}, - {0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3}, - {0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3}, - {0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2}, - {0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2}, - {0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2}, - {0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2}, - {0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2}, - {0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2}, - {0x1.8060195f40260p-1, 0x1.2595fd7636800p-2}, - {0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2}, - {0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2}, - {0x1.79baa679725c2p-1, 0x1.377266dec1800p-2}, - {0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2}, - {0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2} - }; - static const struct { - double chi, clo; - } T2[] = { - {0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56}, - {0x1.63000034db495p-1, 0x1.dbfea48005d41p-55}, - {0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55}, - {0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57}, - {0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56}, - {0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55}, - {0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55}, - {0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56}, - {0x1.710000e86978p-1, 0x1.bff6671097952p-56}, - {0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55}, - {0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57}, - {0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57}, - {0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55}, - {0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56}, - {0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55}, - {0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55}, - {0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55}, - {0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55}, - {0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55}, - {0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55}, - {0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55}, - {0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56}, - {0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55}, - {0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55}, - {0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55}, - {0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56}, - {0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55}, - {0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56}, - {0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55}, - {0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55}, - {0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60}, - {0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55}, - {0x1.a10001145b006p-1, 0x1.4ff489958da56p-56}, - {0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55}, - {0x1.a500010971d79p-1, 0x1.8fecadd78793p-55}, - {0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55}, - {0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55}, - {0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57}, - {0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55}, - {0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57}, - {0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58}, - {0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56}, - {0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56}, - {0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55}, - {0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56}, - {0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57}, - {0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57}, - {0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55}, - {0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55}, - {0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57}, - {0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55}, - {0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55}, - {0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56}, - {0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57}, - {0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55}, - {0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55}, - {0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56}, - {0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55}, - {0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58}, - {0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56}, - {0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56}, - {0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55}, - {0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55}, - {0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57}, - {0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56}, - {0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56}, - {0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56}, - {0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58}, - {0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55}, - {0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56}, - {0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58}, - {0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55}, - {0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59}, - {0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55}, - {0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55}, - {0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57}, - {0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56}, - {0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57}, - {0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56}, - {0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57}, - {0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55}, - {0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54}, - {0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54}, - {0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55}, - {0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57}, - {0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54}, - {0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55}, - {0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56}, - {0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55}, - {0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54}, - {0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54}, - {0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55}, - {0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54}, - {0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54}, - {0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57}, - {0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54}, - {0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54}, - {0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54}, - {0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56}, - {0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56}, - {0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56}, - {0x1.2b00014556313p+0, -0x1.2808233f21f02p-54}, - {0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55}, - {0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55}, - {0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55}, - {0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54}, - {0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54}, - {0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55}, - {0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54}, - {0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55}, - {0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56}, - {0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54}, - {0x1.410001532aff4p+0, 0x1.7f8375f198524p-57}, - {0x1.4300017478b29p+0, 0x1.301e672dc5143p-55}, - {0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55}, - {0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54}, - {0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54}, - {0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54}, - {0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54}, - {0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54}, - {0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57}, - {0x1.530001605277ap+0, -0x1.6bfcece233209p-54}, - {0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55}, - {0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54}, - {0x1.5900017e61012p+0, 0x1.87ec581afef9p-55}, - {0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54}, - {0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54}, - {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54} - }; - - double w, z, r, r2, r3, y, invc, logc, kd, hi, lo; - UINT64 ix, iz, tmp; - UINT32 top; - int k, i; - - ix = *(UINT64*)&x; - top = ix >> 48; - if (ix - 0x3fee000000000000ULL < 0x3090000000000ULL) { - double rhi, rlo; - - /* Handle close to 1.0 inputs separately. */ - /* Fix sign of zero with downward rounding when x==1. */ - if (ix == 0x3ff0000000000000ULL) - return 0; - r = x - 1.0; - r2 = r * r; - r3 = r * r2; - y = r3 * (B[1] + r * B[2] + r2 * B[3] + r3 * (B[4] + r * B[5] + r2 * B[6] + - r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10]))); - /* Worst-case error is around 0.507 ULP. */ - w = r * 0x1p27; - rhi = r + w - w; - rlo = r - rhi; - w = rhi * rhi * B[0]; /* B[0] == -0.5. */ - hi = r + w; - lo = r - hi + w; - lo += B[0] * rlo * (rhi + r); - y += lo; - y += hi; - return y; - } - if (top - 0x0010 >= 0x7ff0 - 0x0010) { - /* x < 0x1p-1022 or inf or nan. */ - if (ix * 2 == 0) - return math_error(_SING, "log", x, 0, (top & 0x8000 ? 1.0 : -1.0) / x); - if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf. */ - return x; - if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL)) - return x; - if (top & 0x8000) - return math_error(_DOMAIN, "log", x, 0, (x - x) / (x - x)); - /* x is subnormal, normalize it. */ - x *= 0x1p52; - ix = *(UINT64*)&x; - ix -= 52ULL << 52; - } - - /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. - The range is split into N subintervals. - The ith subinterval contains z and c is near its center. */ - tmp = ix - 0x3fe6000000000000ULL; - i = (tmp >> (52 - 7)) % (1 << 7); - k = (INT64)tmp >> 52; /* arithmetic shift */ - iz = ix - (tmp & 0xfffULL << 52); - invc = T[i].invc; - logc = T[i].logc; - z = *(double*)&iz; - - /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */ - /* r ~= z/c - 1, |r| < 1/(2*N). */ - r = (z - T2[i].chi - T2[i].clo) * invc; - kd = (double)k; - - /* hi + lo = r + log(c) + k*Ln2. */ - w = kd * Ln2hi + logc; - hi = w + r; - lo = w - hi + r + kd * Ln2lo; - - /* log(x) = lo + (log1p(r) - r) + hi. */ - r2 = r * r; /* rounding error: 0x1p-54/N^2. */ - /* Worst case error if |y| > 0x1p-5: - 0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma) - Worst case error if |y| > 0x1p-4: - 0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */ - y = lo + r2 * A[0] + - r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi; - return y; -} - /********************************************************************* * log10 (MSVCRT.@) */ diff --git a/libs/musl/src/math/log.c b/libs/musl/src/math/log.c index 11f873cb83c..1d9964e11a6 100644 --- a/libs/musl/src/math/log.c +++ b/libs/musl/src/math/log.c @@ -63,11 +63,13 @@ double __cdecl log(double x) if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) { /* x < 0x1p-1022 or inf or nan. */ if (ix * 2 == 0) - return __math_divzero(1); + return math_error(_SING, "log", x, 0, (top & 0x8000 ? 1.0 : -1.0) / x); if (ix == asuint64(INFINITY)) /* log(inf) == inf. */ return x; - if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0) - return __math_invalid(x); + if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL)) + return x; + if (top & 0x8000) + return math_error(_DOMAIN, "log", x, 0, (x - x) / (x - x)); /* x is subnormal, normalize it. */ ix = asuint64(x * 0x1p52); ix -= 52ULL << 52; diff --git a/libs/musl/src/math/logf.c b/libs/musl/src/math/logf.c index 46095ce6514..eaabe2f6426 100644 --- a/libs/musl/src/math/logf.c +++ b/libs/musl/src/math/logf.c @@ -37,11 +37,13 @@ float __cdecl logf(float x) if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) { /* x < 0x1p-126 or inf or nan. */ if (ix * 2 == 0) - return __math_divzerof(1); + return math_error(_SING, "logf", x, 0, (ix & 0x80000000 ? 1.0 : -1.0) / x); if (ix == 0x7f800000) /* log(inf) == inf. */ return x; - if ((ix & 0x80000000) || ix * 2 >= 0xff000000) - return __math_invalidf(x); + if (ix * 2 > 0xff000000) + return x; + if (ix & 0x80000000) + return math_error(_DOMAIN, "logf", x, 0, (x - x) / (x - x)); /* x is subnormal, normalize it. */ ix = asuint(x * 0x1p23f); ix -= 23 << 23;

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2601

Alexandre Julliard

12:13 p.m.

New subject: [PATCH 02/12] msvcrt: Use the log10()/log10f() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

--- dlls/msvcrt/math.c | 146 ------------------------------------ libs/musl/src/math/log10.c | 8 +- libs/musl/src/math/log10f.c | 6 +- 3 files changed, 9 insertions(+), 151 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index c35d997ef52..425eccb9c9e 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -1084,69 +1084,6 @@ float CDECL expf( float x ) return y; }

-/********************************************************************* - * log10f (MSVCRT.@) - */ -float CDECL log10f( float x ) -{ - static const float ivln10hi = 4.3432617188e-01, - ivln10lo = -3.1689971365e-05, - log10_2hi = 3.0102920532e-01, - log10_2lo = 7.9034151668e-07, - Lg1 = 0xaaaaaa.0p-24, - Lg2 = 0xccce13.0p-25, - Lg3 = 0x91e9ee.0p-25, - Lg4 = 0xf89e26.0p-26; - - union {float f; UINT32 i;} u = {x}; - float hfsq, f, s, z, R, w, t1, t2, dk, hi, lo; - UINT32 ix; - int k; - - ix = u.i; - k = 0; - if (ix < 0x00800000 || ix >> 31) { /* x < 2**-126 */ - if (ix << 1 == 0) - return math_error(_SING, "log10f", x, 0, -1 / (x * x)); - if ((ix & ~(1u << 31)) > 0x7f800000) - return x; - if (ix >> 31) - return math_error(_DOMAIN, "log10f", x, 0, (x - x) / (x - x)); - /* subnormal number, scale up x */ - k -= 25; - x *= 0x1p25f; - u.f = x; - ix = u.i; - } else if (ix >= 0x7f800000) { - return x; - } else if (ix == 0x3f800000) - return 0; - - /* reduce x into [sqrt(2)/2, sqrt(2)] */ - ix += 0x3f800000 - 0x3f3504f3; - k += (int)(ix >> 23) - 0x7f; - ix = (ix & 0x007fffff) + 0x3f3504f3; - u.i = ix; - x = u.f; - - f = x - 1.0f; - s = f / (2.0f + f); - z = s * s; - w = z * z; - t1= w * (Lg2 + w * Lg4); - t2= z * (Lg1 + w * Lg3); - R = t2 + t1; - hfsq = 0.5f * f * f; - - hi = f - hfsq; - u.f = hi; - u.i &= 0xfffff000; - hi = u.f; - lo = f - hi - hfsq + s * (hfsq + R); - dk = k; - return dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi + hi * ivln10hi + dk * log10_2hi; -} - /* Subnormal input is normalized so ix has negative biased exponent. Output is multiplied by POWF_SCALE (where 1 << 5). */ static double powf_log2(UINT32 ix) @@ -2599,89 +2536,6 @@ double CDECL exp( double x ) return scale + scale * tmp; }

-/********************************************************************* - * log10 (MSVCRT.@) - */ -double CDECL log10( double x ) -{ - static const double ivln10hi = 4.34294481878168880939e-01, - ivln10lo = 2.50829467116452752298e-11, - log10_2hi = 3.01029995663611771306e-01, - log10_2lo = 3.69423907715893078616e-13, - Lg1 = 6.666666666666735130e-01, - Lg2 = 3.999999999940941908e-01, - Lg3 = 2.857142874366239149e-01, - Lg4 = 2.222219843214978396e-01, - Lg5 = 1.818357216161805012e-01, - Lg6 = 1.531383769920937332e-01, - Lg7 = 1.479819860511658591e-01; - - union {double f; UINT64 i;} u = {x}; - double hfsq, f, s, z, R, w, t1, t2, dk, y, hi, lo, val_hi, val_lo; - UINT32 hx; - int k; - - hx = u.i >> 32; - k = 0; - if (hx < 0x00100000 || hx >> 31) { - if (u.i << 1 == 0) - return math_error(_SING, "log10", x, 0, -1 / (x * x)); - if ((u.i & ~(1ULL << 63)) > 0x7ff0000000000000ULL) - return x; - if (hx >> 31) - return math_error(_DOMAIN, "log10", x, 0, (x - x) / (x - x)); - /* subnormal number, scale x up */ - k -= 54; - x *= 0x1p54; - u.f = x; - hx = u.i >> 32; - } else if (hx >= 0x7ff00000) { - return x; - } else if (hx == 0x3ff00000 && u.i<<32 == 0) - return 0; - - /* reduce x into [sqrt(2)/2, sqrt(2)] */ - hx += 0x3ff00000 - 0x3fe6a09e; - k += (int)(hx >> 20) - 0x3ff; - hx = (hx & 0x000fffff) + 0x3fe6a09e; - u.i = (UINT64)hx << 32 | (u.i & 0xffffffff); - x = u.f; - - f = x - 1.0; - hfsq = 0.5 * f * f; - s = f / (2.0 + f); - z = s * s; - w = z * z; - t1 = w * (Lg2 + w * (Lg4 + w * Lg6)); - t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7))); - R = t2 + t1; - - /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */ - hi = f - hfsq; - u.f = hi; - u.i &= (UINT64)-1 << 32; - hi = u.f; - lo = f - hi - hfsq + s * (hfsq + R); - - /* val_hi+val_lo ~ log10(1+f) + k*log10(2) */ - val_hi = hi * ivln10hi; - dk = k; - y = dk * log10_2hi; - val_lo = dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi; - - /* - * Extra precision in for adding y is not strictly needed - * since there is no very large cancellation near x = sqrt(2) or - * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs - * with some parallelism and it reduces the error for many args. - */ - w = y + val_hi; - val_lo += (y - w) + val_hi; - val_hi = w; - - return val_lo + val_hi; -} - /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about additional 15 bits precision. IX is the bit representation of x, but normalized in the subnormal range using the sign bit for the exponent. */ diff --git a/libs/musl/src/math/log10.c b/libs/musl/src/math/log10.c index df7d591c292..6ec431bfe22 100644 --- a/libs/musl/src/math/log10.c +++ b/libs/musl/src/math/log10.c @@ -45,9 +45,11 @@ double __cdecl log10(double x) k = 0; if (hx < 0x00100000 || hx>>31) { if (u.i<<1 == 0) - return -1/(x*x); /* log(+-0)=-inf */ - if (hx>>31) - return (x-x)/0.0; /* log(-#) = NaN */ + return math_error(_SING, "log10", x, 0, -1 / (x * x)); + if ((u.i & ~(1ULL << 63)) > 0x7ff0000000000000ULL) + return x; + if (hx >> 31) + return math_error(_DOMAIN, "log10", x, 0, (x - x) / (x - x)); /* subnormal number, scale x up */ k -= 54; x *= 0x1p54; diff --git a/libs/musl/src/math/log10f.c b/libs/musl/src/math/log10f.c index 64cf8c85be0..0233e2ff332 100644 --- a/libs/musl/src/math/log10f.c +++ b/libs/musl/src/math/log10f.c @@ -39,9 +39,11 @@ float __cdecl log10f(float x) k = 0; if (ix < 0x00800000 || ix>>31) { /* x < 2**-126 */ if (ix<<1 == 0) - return -1/(x*x); /* log(+-0)=-inf */ + return math_error(_SING, "log10f", x, 0, -1 / (x * x)); + if ((ix & ~(1u << 31)) > 0x7f800000) + return x; if (ix>>31) - return (x-x)/0.0f; /* log(-#) = NaN */ + return math_error(_DOMAIN, "log10f", x, 0, (x - x) / (x - x)); /* subnormal number, scale up x */ k -= 25; x *= 0x1p25f;

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2601

Alexandre Julliard

12:13 p.m.

New subject: [PATCH 03/12] msvcrt: Use the __expo2()/__expo2f() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

--- dlls/msvcrt/math.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 425eccb9c9e..f368790f3d4 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -983,16 +983,7 @@ float CDECL cosf( float x ) } }

-/* Copied from musl: src/math/__expo2f.c */ -static float __expo2f(float x, float sign) -{ - static const int k = 235; - static const float kln2 = 0x1.45c778p+7f; - float scale; - - *(UINT32*)&scale = (UINT32)(0x7f + k/2) << 23; - return expf(x - kln2) * (sign * scale) * scale; -} +extern float __expo2f(float x, float sign);

/********************************************************************* * coshf (MSVCRT.@) @@ -2237,15 +2228,7 @@ double CDECL cos( double x ) } }

-static double __expo2(double x, double sign) -{ - static const int k = 2043; - static const double kln2 = 0x1.62066151add8bp+10; - double scale; - - *(UINT64*)&scale = (UINT64)(0x3ff + k / 2) << 52; - return exp(x - kln2) * (sign * scale) * scale; -} +extern double __expo2(double x, double sign);

/********************************************************************* * cosh (MSVCRT.@)

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2601

Alexandre Julliard

12:13 p.m.

New subject: [PATCH 04/12] msvcrt: Use the coshf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

With the changes from 361143252bf23b6f29eead49a3715fdd6d552e8a. --- dlls/msvcrt/math.c | 39 -------------------------------------- libs/musl/src/math/coshf.c | 5 +++++ 2 files changed, 5 insertions(+), 39 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index f368790f3d4..1c6fd5d7d70 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -985,45 +985,6 @@ float CDECL cosf( float x )

extern float __expo2f(float x, float sign);

-/********************************************************************* - * coshf (MSVCRT.@) - * - * Copied from musl: src/math/coshf.c - */ -float CDECL coshf( float x ) -{ - UINT32 ui = *(UINT32*)&x; - UINT32 sign = ui & 0x80000000; - float t; - - /* |x| */ - ui &= 0x7fffffff; - x = *(float*)&ui; - - /* |x| < log(2) */ - if (ui < 0x3f317217) { - if (ui < 0x3f800000 - (12 << 23)) { - fp_barrierf(x + 0x1p120f); - return 1; - } - t = expm1f(x); - return 1 + t * t / (2 * (1 + t)); - } - - /* |x| < log(FLT_MAX) */ - if (ui < 0x42b17217) { - t = expf(x); - return 0.5f * (t + 1 / t); - } - - /* |x| > log(FLT_MAX) or nan */ - if (ui > 0x7f800000) - *(UINT32*)&t = ui | sign | 0x400000; - else - t = __expo2f(x, 1.0f); - return t; -} - /********************************************************************* * expf (MSVCRT.@) */ diff --git a/libs/musl/src/math/coshf.c b/libs/musl/src/math/coshf.c index 2fc004682da..44f29113559 100644 --- a/libs/musl/src/math/coshf.c +++ b/libs/musl/src/math/coshf.c @@ -3,6 +3,7 @@ float __cdecl coshf(float x) { union {float f; uint32_t i;} u = {.f = x}; + uint32_t sign = u.i & 0x80000000; uint32_t w; float t;

@@ -28,6 +29,10 @@ float __cdecl coshf(float x) }

/* |x| > log(FLT_MAX) or nan */ + if (w > 0x7f800000) { + u.i |= sign | 0x400000; + return u.f; + } t = __expo2f(x, 1.0f); return t; }

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2601

Alexandre Julliard

12:13 p.m.

New subject: [PATCH 05/12] msvcrt: Use the sinhf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

With the changes from 4fd9daea4a79d0826c9e5ab6466291043e40d579. --- dlls/msvcrt/math.c | 36 ------------------------------------ libs/musl/src/math/sinhf.c | 5 +++++ 2 files changed, 5 insertions(+), 36 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 1c6fd5d7d70..9741100b2b9 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -983,8 +983,6 @@ float CDECL cosf( float x ) } }

-extern float __expo2f(float x, float sign); - /********************************************************************* * expf (MSVCRT.@) */ @@ -1277,40 +1275,6 @@ float CDECL sinf( float x ) } }

-/********************************************************************* - * sinhf (MSVCRT.@) - */ -float CDECL sinhf( float x ) -{ - UINT32 ui = *(UINT32*)&x; - float t, h, absx; - - h = 0.5; - if (ui >> 31) - h = -h; - /* |x| */ - ui &= 0x7fffffff; - absx = *(float*)&ui; - - /* |x| < log(FLT_MAX) */ - if (ui < 0x42b17217) { - t = expm1f(absx); - if (ui < 0x3f800000) { - if (ui < 0x3f800000 - (12 << 23)) - return x; - return h * (2 * t - t * t / (t + 1)); - } - return h * (t + t / (t + 1)); - } - - /* |x| > logf(FLT_MAX) or nan */ - if (ui > 0x7f800000) - *(DWORD*)&t = *(DWORD*)&x | 0x400000; - else - t = __expo2f(absx, 2 * h); - return t; -} - static BOOL sqrtf_validate( float *x ) { short c = _fdclass(*x); diff --git a/libs/musl/src/math/sinhf.c b/libs/musl/src/math/sinhf.c index a9abf634e85..60bca2b5a36 100644 --- a/libs/musl/src/math/sinhf.c +++ b/libs/musl/src/math/sinhf.c @@ -3,6 +3,7 @@ float __cdecl sinhf(float x) { union {float f; uint32_t i;} u = {.f = x}; + uint32_t sign = u.i & 0x80000000; uint32_t w; float t, h, absx;

@@ -26,6 +27,10 @@ float __cdecl sinhf(float x) }

/* |x| > logf(FLT_MAX) or nan */ + if (w > 0x7f800000) { + u.i = w | sign | 0x400000; + return u.f; + } t = __expo2f(absx, 2*h); return t; }

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2601

Alexandre Julliard

12:13 p.m.

New subject: [PATCH 06/12] msvcrt: Use the cosh() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

With the changes from 60d178b410ba943c6c39b2ccf1deaec264d7d21a. --- dlls/msvcrt/math.c | 43 --------------------------------------- libs/musl/src/math/cosh.c | 5 +++++ 2 files changed, 5 insertions(+), 43 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 9741100b2b9..8bce6727ac4 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -2155,49 +2155,6 @@ double CDECL cos( double x )

extern double __expo2(double x, double sign);

-/********************************************************************* - * cosh (MSVCRT.@) - * - * Copied from musl: src/math/cosh.c - */ -double CDECL cosh( double x ) -{ - UINT64 ux = *(UINT64*)&x; - UINT64 sign = ux & 0x8000000000000000ULL; - UINT32 w; - double t; - - /* |x| */ - ux &= (uint64_t)-1 / 2; - x = *(double*)&ux; - w = ux >> 32; - - /* |x| < log(2) */ - if (w < 0x3fe62e42) { - if (w < 0x3ff00000 - (26 << 20)) { - fp_barrier(x + 0x1p120f); - return 1; - } - t = expm1(x); - return 1 + t * t / (2 * (1 + t)); - } - - /* |x| < log(DBL_MAX) */ - if (w < 0x40862e42) { - t = exp(x); - /* note: if x>log(0x1p26) then the 1/t is not needed */ - return 0.5 * (t + 1 / t); - } - - /* |x| > log(DBL_MAX) or nan */ - /* note: the result is stored to handle overflow */ - if (ux > 0x7ff0000000000000ULL) - *(UINT64*)&t = ux | sign | 0x0008000000000000ULL; - else - t = __expo2(x, 1.0); - return t; -} - /* Copied from musl: src/math/exp_data.c */ static const UINT64 exp_T[] = { 0x0ULL, 0x3ff0000000000000ULL, diff --git a/libs/musl/src/math/cosh.c b/libs/musl/src/math/cosh.c index 384d9d8beac..8ab9417d2c2 100644 --- a/libs/musl/src/math/cosh.c +++ b/libs/musl/src/math/cosh.c @@ -7,6 +7,7 @@ double __cdecl cosh(double x) { union {double f; uint64_t i;} u = {.f = x}; + uint64_t sign = u.i & 0x8000000000000000ULL; uint32_t w; double t;

@@ -35,6 +36,10 @@ double __cdecl cosh(double x)

/* |x| > log(DBL_MAX) or nan */ /* note: the result is stored to handle overflow */ + if (w > 0x7ff00000) { + u.i |= sign | 0x0008000000000000ULL; + return u.f; + } t = __expo2(x, 1.0); return t; }

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2601

Alexandre Julliard

12:13 p.m.

New subject: [PATCH 07/12] msvcrt: Use the sinh() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

With the changes from 11166aa01e7d17f8535977f648c0b5e098d43bd9. --- dlls/msvcrt/math.c | 40 --------------------------------------- libs/musl/src/math/sinh.c | 5 +++++ 2 files changed, 5 insertions(+), 40 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 8bce6727ac4..5efdc6fc12e 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -2153,8 +2153,6 @@ double CDECL cos( double x ) } }

-extern double __expo2(double x, double sign); - /* Copied from musl: src/math/exp_data.c */ static const UINT64 exp_T[] = { 0x0ULL, 0x3ff0000000000000ULL, @@ -2860,44 +2858,6 @@ double CDECL sin( double x ) } }

-/********************************************************************* - * sinh (MSVCRT.@) - */ -double CDECL sinh( double x ) -{ - UINT64 ux = *(UINT64*)&x; - UINT64 sign = ux & 0x8000000000000000ULL; - UINT32 w; - double t, h, absx; - - h = 0.5; - if (ux >> 63) - h = -h; - /* |x| */ - ux &= (UINT64)-1 / 2; - absx = *(double*)&ux; - w = ux >> 32; - - /* |x| < log(DBL_MAX) */ - if (w < 0x40862e42) { - t = expm1(absx); - if (w < 0x3ff00000) { - if (w < 0x3ff00000 - (26 << 20)) - return x; - return h * (2 * t - t * t / (t + 1)); - } - return h * (t + t / (t + 1)); - } - - /* |x| > log(DBL_MAX) or nan */ - /* note: the result is stored to handle overflow */ - if (ux > 0x7ff0000000000000ULL) - *(UINT64*)&t = ux | sign | 0x0008000000000000ULL; - else - t = __expo2(absx, 2 * h); - return t; -} - static BOOL sqrt_validate( double *x, BOOL update_sw ) { short c = _dclass(*x); diff --git a/libs/musl/src/math/sinh.c b/libs/musl/src/math/sinh.c index af3ef8493b1..2d05ba80860 100644 --- a/libs/musl/src/math/sinh.c +++ b/libs/musl/src/math/sinh.c @@ -7,6 +7,7 @@ double __cdecl sinh(double x) { union {double f; uint64_t i;} u = {.f = x}; + uint64_t sign = u.i & 0x8000000000000000ULL; uint32_t w; double t, h, absx;

@@ -34,6 +35,10 @@ double __cdecl sinh(double x)

/* |x| > log(DBL_MAX) or nan */ /* note: the result is stored to handle overflow */ + if (w > 0x7ff00000) { + u.i |= sign | 0x0008000000000000ULL; + return u.f; + } t = __expo2(absx, 2*h); return t; }

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2601

Alexandre Julliard

12:13 p.m.

New subject: [PATCH 08/12] msvcrt: Use the atan2()/atan2f() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

--- dlls/msvcrt/math.c | 144 --------------------------------------------- 1 file changed, 144 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 5efdc6fc12e..25cf2e05ccd 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -797,77 +797,6 @@ float CDECL atanf( float x ) return sign ? -z : z; }

-/********************************************************************* - * atan2f (MSVCRT.@) - * - * Copied from musl: src/math/atan2f.c - */ -float CDECL atan2f( float y, float x ) -{ - static const float pi = 3.1415927410e+00, - pi_lo = -8.7422776573e-08; - - float z; - unsigned int m, ix, iy; - - if (isnan(x) || isnan(y)) - return x + y; - ix = *(unsigned int*)&x; - iy = *(unsigned int*)&y; - if (ix == 0x3f800000) /* x=1.0 */ - return atanf(y); - m = ((iy >> 31) & 1) | ((ix >> 30) & 2); /* 2*sign(x)+sign(y) */ - ix &= 0x7fffffff; - iy &= 0x7fffffff; - - /* when y = 0 */ - if (iy == 0) { - switch (m) { - case 0: - case 1: return y; /* atan(+-0,+anything)=+-0 */ - case 2: return pi; /* atan(+0,-anything) = pi */ - case 3: return -pi; /* atan(-0,-anything) =-pi */ - } - } - /* when x = 0 */ - if (ix == 0) - return m & 1 ? -pi / 2 : pi / 2; - /* when x is INF */ - if (ix == 0x7f800000) { - if (iy == 0x7f800000) { - switch (m) { - case 0: return pi / 4; /* atan(+INF,+INF) */ - case 1: return -pi / 4; /* atan(-INF,+INF) */ - case 2: return 3 * pi / 4; /*atan(+INF,-INF)*/ - case 3: return -3 * pi / 4; /*atan(-INF,-INF)*/ - } - } else { - switch (m) { - case 0: return 0.0f; /* atan(+...,+INF) */ - case 1: return -0.0f; /* atan(-...,+INF) */ - case 2: return pi; /* atan(+...,-INF) */ - case 3: return -pi; /* atan(-...,-INF) */ - } - } - } - /* |y/x| > 0x1p26 */ - if (ix + (26 << 23) < iy || iy == 0x7f800000) - return m & 1 ? -pi / 2 : pi / 2; - - /* z = atan(|y/x|) with correct underflow */ - if ((m & 2) && iy + (26 << 23) < ix) /*|y/x| < 0x1p-26, x < 0 */ - z = 0.0; - else - z = atanf(fabsf(y / x)); - switch (m) { - case 0: return z; /* atan(+,+) */ - case 1: return -z; /* atan(-,+) */ - case 2: return pi - (z - pi_lo); /* atan(+,-) */ - default: /* case 3 */ - return (z - pi_lo) - pi; /* atan(-,-) */ - } -} - /* Copied from musl: src/math/__rem_pio2f.c */ static int __rem_pio2f(float x, double *y) { @@ -1822,79 +1751,6 @@ double CDECL atan( double x ) return sign ? -z : z; }

-/********************************************************************* - * atan2 (MSVCRT.@) - * - * Copied from musl: src/math/atan2.c - */ -double CDECL atan2( double y, double x ) -{ - static const double pi = 3.1415926535897931160E+00, - pi_lo = 1.2246467991473531772E-16; - - double z; - unsigned int m, lx, ly, ix, iy; - - if (isnan(x) || isnan(y)) - return x+y; - ix = *(ULONGLONG*)&x >> 32; - lx = *(ULONGLONG*)&x; - iy = *(ULONGLONG*)&y >> 32; - ly = *(ULONGLONG*)&y; - if (((ix - 0x3ff00000) | lx) == 0) /* x = 1.0 */ - return atan(y); - m = ((iy >> 31) & 1) | ((ix >> 30) & 2); /* 2*sign(x)+sign(y) */ - ix = ix & 0x7fffffff; - iy = iy & 0x7fffffff; - - /* when y = 0 */ - if ((iy | ly) == 0) { - switch(m) { - case 0: - case 1: return y; /* atan(+-0,+anything)=+-0 */ - case 2: return pi; /* atan(+0,-anything) = pi */ - case 3: return -pi; /* atan(-0,-anything) =-pi */ - } - } - /* when x = 0 */ - if ((ix | lx) == 0) - return m & 1 ? -pi / 2 : pi / 2; - /* when x is INF */ - if (ix == 0x7ff00000) { - if (iy == 0x7ff00000) { - switch(m) { - case 0: return pi / 4; /* atan(+INF,+INF) */ - case 1: return -pi / 4; /* atan(-INF,+INF) */ - case 2: return 3 * pi / 4; /* atan(+INF,-INF) */ - case 3: return -3 * pi / 4; /* atan(-INF,-INF) */ - } - } else { - switch(m) { - case 0: return 0.0; /* atan(+...,+INF) */ - case 1: return -0.0; /* atan(-...,+INF) */ - case 2: return pi; /* atan(+...,-INF) */ - case 3: return -pi; /* atan(-...,-INF) */ - } - } - } - /* |y/x| > 0x1p64 */ - if (ix + (64 << 20) < iy || iy == 0x7ff00000) - return m & 1 ? -pi / 2 : pi / 2; - - /* z = atan(|y/x|) without spurious underflow */ - if ((m & 2) && iy + (64 << 20) < ix) /* |y/x| < 0x1p-64, x<0 */ - z = 0; - else - z = atan(fabs(y / x)); - switch (m) { - case 0: return z; /* atan(+,+) */ - case 1: return -z; /* atan(-,+) */ - case 2: return pi - (z - pi_lo); /* atan(+,-) */ - default: /* case 3 */ - return (z - pi_lo) - pi; /* atan(-,-) */ - } -} - /* Copied from musl: src/math/rint.c */ static double __rint(double x) {

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2601

Alexandre Julliard

12:13 p.m.

New subject: [PATCH 09/12] msvcrt: Use the exp2()/exp2f() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

--- dlls/msvcrt/math.c | 169 ------------------------------------- libs/musl/src/math/exp2.c | 17 +++- libs/musl/src/math/exp2f.c | 12 ++- 3 files changed, 21 insertions(+), 177 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 25cf2e05ccd..60d85bc7be7 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -488,7 +488,6 @@ static double __round(double x) return *(double*)&llx; }

-#if !defined(__i386__) || _MSVCR_VER >= 120 #ifndef __i386__ /* Copied from musl: src/math/__sindf.c */ static float __sindf(double x) @@ -525,7 +524,6 @@ static float __cosdf(double x) return 1 + C0 * z; return 1.0 + z * (C0 + z * (C1 + z * (C2 + z * (C3 + z * C4)))); } -#endif

static const UINT64 exp2f_T[] = { 0x3ff0000000000000ULL, 0x3fefd9b0d3158574ULL, 0x3fefb5586cf9890fULL, 0x3fef9301d0125b51ULL, @@ -5498,173 +5496,6 @@ void __cdecl __libm_sse2_sqrt_precise(void)

#if _MSVCR_VER>=120

-/********************************************************************* - * exp2 (MSVCR120.@) - * - * Copied from musl: src/math/exp2.c - */ -double CDECL exp2(double x) -{ - static const double C[] = { - 0x1.62e42fefa39efp-1, - 0x1.ebfbdff82c424p-3, - 0x1.c6b08d70cf4b5p-5, - 0x1.3b2abd24650ccp-7, - 0x1.5d7e09b4e3a84p-10 - }; - - UINT32 abstop; - UINT64 ki, idx, top, sbits; - double kd, r, r2, scale, tail, tmp; - - abstop = (*(UINT64*)&x >> 52) & 0x7ff; - if (abstop - 0x3c9 >= 0x408 - 0x3c9) { - if (abstop - 0x3c9 >= 0x80000000) { - /* Avoid spurious underflow for tiny x. */ - /* Note: 0 is common input. */ - return 1.0 + x; - } - if (abstop >= 409) { - if (*(UINT64*)&x == 0xfff0000000000000ull) - return 0.0; - if (abstop >= 0x7ff) - return 1.0 + x; - if (!(*(UINT64*)&x >> 63)) { - *_errno() = ERANGE; - return fp_barrier(DBL_MAX) * DBL_MAX; - } - else if (x <= -2147483648.0) { - fp_barrier(x + 0x1p120f); - return 0; - } - else if (*(UINT64*)&x >= 0xc090cc0000000000ull) { - *_errno() = ERANGE; - fp_barrier(x + 0x1p120f); - return 0; - } - } - if (2 * *(UINT64*)&x > 2 * 0x408d000000000000ull) - /* Large x is special cased below. */ - abstop = 0; - } - - /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */ - /* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */ - kd = fp_barrier(x + 0x1.8p52 / (1 << 7)); - ki = *(UINT64*)&kd; /* k. */ - kd -= 0x1.8p52 / (1 << 7); /* k/N for int k. */ - r = x - kd; - /* 2^(k/N) ~= scale * (1 + tail). */ - idx = 2 * (ki % (1 << 7)); - top = ki << (52 - 7); - tail = *(double*)&exp_T[idx]; - /* This is only a valid scale when -1023*N < k < 1024*N. */ - sbits = exp_T[idx + 1] + top; - /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */ - /* Evaluation is optimized assuming superscalar pipelined execution. */ - r2 = r * r; - /* Without fma the worst case error is 0.5/N ulp larger. */ - /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */ - tmp = tail + r * C[0] + r2 * (C[1] + r * C[2]) + r2 * r2 * (C[3] + r * C[4]); - if (abstop == 0) - { - /* Handle cases that may overflow or underflow when computing the result that - is scale*(1+TMP) without intermediate rounding. The bit representation of - scale is in SBITS, however it has a computed exponent that may have - overflown into the sign bit so that needs to be adjusted before using it as - a double. (int32_t)KI is the k used in the argument reduction and exponent - adjustment of scale, positive k here means the result may overflow and - negative k means the result may underflow. */ - double scale, y; - - if ((ki & 0x80000000) == 0) { - /* k > 0, the exponent of scale might have overflowed by 1. */ - sbits -= 1ull << 52; - scale = *(double*)&sbits; - y = 2 * (scale + scale * tmp); - return y; - } - /* k < 0, need special care in the subnormal range. */ - sbits += 1022ull << 52; - scale = *(double*)&sbits; - y = scale + scale * tmp; - if (y < 1.0) { - /* Round y to the right precision before scaling it into the subnormal - range to avoid double rounding that can cause 0.5+E/2 ulp error where - E is the worst-case ulp error outside the subnormal range. So this - is only useful if the goal is better than 1 ulp worst-case error. */ - double hi, lo; - lo = scale - y + scale * tmp; - hi = 1.0 + y; - lo = 1.0 - hi + y + lo; - y = hi + lo - 1.0; - /* Avoid -0.0 with downward rounding. */ - if (y == 0.0) - y = 0.0; - /* The underflow exception needs to be signaled explicitly. */ - fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022); - } - y = 0x1p-1022 * y; - return y; - } - scale = *(double*)&sbits; - /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there - is no spurious underflow here even without fma. */ - return scale + scale * tmp; -} - -/********************************************************************* - * exp2f (MSVCR120.@) - * - * Copied from musl: src/math/exp2f.c - */ -float CDECL exp2f(float x) -{ - static const double C[] = { - 0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1 - }; - static const double shift = 0x1.8p+52 / (1 << 5); - - double kd, xd, z, r, r2, y, s; - UINT32 abstop; - UINT64 ki, t; - - xd = x; - abstop = (*(UINT32*)&x >> 20) & 0x7ff; - if (abstop >= 0x430) { - /* |x| >= 128 or x is nan. */ - if (*(UINT32*)&x == 0xff800000) - return 0.0f; - if (abstop >= 0x7f8) - return x + x; - if (x > 0.0f) { - *_errno() = ERANGE; - return fp_barrierf(x * FLT_MAX); - } - if (x <= -150.0f) { - fp_barrierf(x - 0x1p120); - return 0; - } - } - - /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k, N = 1 << 5. */ - kd = xd + shift; - ki = *(UINT64*)&kd; - kd -= shift; /* k/(1<<5) for int k. */ - r = xd - kd; - - /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ - t = exp2f_T[ki % (1 << 5)]; - t += ki << (52 - 5); - s = *(double*)&t; - z = C[0] * r + C[1]; - r2 = r * r; - y = C[2] * r + 1; - y = z * r2 + y; - y = y * s; - return y; -} - /********************************************************************* * log1p (MSVCR120.@) * diff --git a/libs/musl/src/math/exp2.c b/libs/musl/src/math/exp2.c index 618289a4185..47c557ce22a 100644 --- a/libs/musl/src/math/exp2.c +++ b/libs/musl/src/math/exp2.c @@ -84,10 +84,19 @@ double __cdecl exp2(double x) return 0.0; if (abstop >= top12(INFINITY)) return 1.0 + x; - if (!(asuint64(x) >> 63)) - return __math_oflow(0); - else if (asuint64(x) >= asuint64(-1075.0)) - return __math_uflow(0); + if (!(asuint64(x) >> 63)) { + errno = ERANGE; + return fp_barrier(DBL_MAX) * DBL_MAX; + } + else if (x <= -2147483648.0) { + fp_barrier(x + 0x1p120f); + return 0; + } + else if (asuint64(x) >= asuint64(-1075.0)) { + errno = ERANGE; + fp_barrier(x + 0x1p120f); + return 0; + } } if (2 * asuint64(x) > 2 * asuint64(928.0)) /* Large x is special cased below. */ diff --git a/libs/musl/src/math/exp2f.c b/libs/musl/src/math/exp2f.c index 28670a800a1..e87265df0d9 100644 --- a/libs/musl/src/math/exp2f.c +++ b/libs/musl/src/math/exp2f.c @@ -44,10 +44,14 @@ float __cdecl exp2f(float x) return 0.0f; if (abstop >= top12(INFINITY)) return x + x; - if (x > 0.0f) - return __math_oflowf(0); - if (x <= -150.0f) - return __math_uflowf(0); + if (x > 0.0f) { + errno = ERANGE; + return fp_barrierf(x * FLT_MAX); + } + if (x <= -150.0f) { + fp_barrierf(x - 0x1p120); + return 0; + } }

/* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2601

Alexandre Julliard

12:13 p.m.

New subject: [PATCH 10/12] msvcrt: Use the log1p()/log1pf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

--- dlls/msvcrt/math.c | 144 ------------------------------------ libs/musl/src/math/log1p.c | 6 +- libs/musl/src/math/log1pf.c | 5 +- 3 files changed, 9 insertions(+), 146 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 60d85bc7be7..0f9a415d88c 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -5496,150 +5496,6 @@ void __cdecl __libm_sse2_sqrt_precise(void)

#if _MSVCR_VER>=120

-/********************************************************************* - * log1p (MSVCR120.@) - * - * Copied from musl: src/math/log1p.c - */ -double CDECL log1p(double x) -{ - static const double ln2_hi = 6.93147180369123816490e-01, - ln2_lo = 1.90821492927058770002e-10, - Lg1 = 6.666666666666735130e-01, - Lg2 = 3.999999999940941908e-01, - Lg3 = 2.857142874366239149e-01, - Lg4 = 2.222219843214978396e-01, - Lg5 = 1.818357216161805012e-01, - Lg6 = 1.531383769920937332e-01, - Lg7 = 1.479819860511658591e-01; - - union {double f; UINT64 i;} u = {x}; - double hfsq, f, c, s, z, R, w, t1, t2, dk; - UINT32 hx, hu; - int k; - - hx = u.i >> 32; - k = 1; - if (hx < 0x3fda827a || hx >> 31) { /* 1+x < sqrt(2)+ */ - if (hx >= 0xbff00000) { /* x <= -1.0 */ - if (x == -1) { - *_errno() = ERANGE; - return x / 0.0; /* og1p(-1) = -inf */ - } - *_errno() = EDOM; - return (x-x) / 0.0; /* log1p(x<-1) = NaN */ - } - if (hx << 1 < 0x3ca00000 << 1) { /* |x| < 2**-53 */ - fp_barrier(x + 0x1p120f); - /* underflow if subnormal */ - if ((hx & 0x7ff00000) == 0) - fp_barrierf(x); - return x; - } - if (hx <= 0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ - k = 0; - c = 0; - f = x; - } - } else if (hx >= 0x7ff00000) - return x; - if (k) { - u.f = 1 + x; - hu = u.i >> 32; - hu += 0x3ff00000 - 0x3fe6a09e; - k = (int)(hu >> 20) - 0x3ff; - /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ - if (k < 54) { - c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1); - c /= u.f; - } else - c = 0; - /* reduce u into [sqrt(2)/2, sqrt(2)] */ - hu = (hu & 0x000fffff) + 0x3fe6a09e; - u.i = (UINT64)hu << 32 | (u.i & 0xffffffff); - f = u.f - 1; - } - hfsq = 0.5 * f * f; - s = f / (2.0 + f); - z = s * s; - w = z * z; - t1 = w * (Lg2 + w * (Lg4 + w * Lg6)); - t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7))); - R = t2 + t1; - dk = k; - return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi; -} - -/********************************************************************* - * log1pf (MSVCR120.@) - * - * Copied from musl: src/math/log1pf.c - */ -float CDECL log1pf(float x) -{ - static const float ln2_hi = 6.9313812256e-01, - ln2_lo = 9.0580006145e-06, - Lg1 = 0xaaaaaa.0p-24, - Lg2 = 0xccce13.0p-25, - Lg3 = 0x91e9ee.0p-25, - Lg4 = 0xf89e26.0p-26; - - union {float f; UINT32 i;} u = {x}; - float hfsq, f, c, s, z, R, w, t1, t2, dk; - UINT32 ix, iu; - int k; - - ix = u.i; - k = 1; - if (ix < 0x3ed413d0 || ix >> 31) { /* 1+x < sqrt(2)+ */ - if (ix >= 0xbf800000) { /* x <= -1.0 */ - if (x == -1) { - *_errno() = ERANGE; - return x / 0.0f; /* log1p(-1)=+inf */ - } - *_errno() = EDOM; - return (x - x) / 0.0f; /* log1p(x<-1)=NaN */ - } - if (ix<<1 < 0x33800000<<1) { /* |x| < 2**-24 */ - /* underflow if subnormal */ - if ((ix & 0x7f800000) == 0) - fp_barrierf(x * x); - return x; - } - if (ix <= 0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ - k = 0; - c = 0; - f = x; - } - } else if (ix >= 0x7f800000) - return x; - if (k) { - u.f = 1 + x; - iu = u.i; - iu += 0x3f800000 - 0x3f3504f3; - k = (int)(iu >> 23) - 0x7f; - /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ - if (k < 25) { - c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1); - c /= u.f; - } else - c = 0; - /* reduce u into [sqrt(2)/2, sqrt(2)] */ - iu = (iu & 0x007fffff) + 0x3f3504f3; - u.i = iu; - f = u.f - 1; - } - s = f / (2.0f + f); - z = s * s; - w = z * z; - t1= w * (Lg2 + w * Lg4); - t2= z * (Lg1 + w * Lg3); - R = t2 + t1; - hfsq = 0.5f * f * f; - dk = k; - return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi; -} - /********************************************************************* * log2 (MSVCR120.@) * diff --git a/libs/musl/src/math/log1p.c b/libs/musl/src/math/log1p.c index 9376b13ed6a..e206f17ad5e 100644 --- a/libs/musl/src/math/log1p.c +++ b/libs/musl/src/math/log1p.c @@ -77,11 +77,15 @@ double __cdecl log1p(double x) k = 1; if (hx < 0x3fda827a || hx>>31) { /* 1+x < sqrt(2)+ */ if (hx >= 0xbff00000) { /* x <= -1.0 */ - if (x == -1) + if (x == -1) { + errno = ERANGE; return x/0.0; /* log1p(-1) = -inf */ + } + errno = EDOM; return (x-x)/0.0; /* log1p(x<-1) = NaN */ } if (hx<<1 < 0x3ca00000<<1) { /* |x| < 2**-53 */ + fp_barrier(x + 0x1p120f); /* underflow if subnormal */ if ((hx&0x7ff00000) == 0) FORCE_EVAL((float)x); diff --git a/libs/musl/src/math/log1pf.c b/libs/musl/src/math/log1pf.c index e2cda58438b..9637f672e20 100644 --- a/libs/musl/src/math/log1pf.c +++ b/libs/musl/src/math/log1pf.c @@ -32,8 +32,11 @@ float __cdecl log1pf(float x) k = 1; if (ix < 0x3ed413d0 || ix>>31) { /* 1+x < sqrt(2)+ */ if (ix >= 0xbf800000) { /* x <= -1.0 */ - if (x == -1) + if (x == -1) { + errno = ERANGE; return x/0.0f; /* log1p(-1)=+inf */ + } + errno = EDOM; return (x-x)/0.0f; /* log1p(x<-1)=NaN */ } if (ix<<1 < 0x33800000<<1) { /* |x| < 2**-24 */

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2601

Alexandre Julliard

12:13 p.m.

New subject: [PATCH 11/12] msvcrt: Use the log2()/log2f() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

--- dlls/msvcrt/math.c | 338 ------------------------------------- libs/musl/src/math/log2.c | 10 +- libs/musl/src/math/log2f.c | 10 +- 3 files changed, 16 insertions(+), 342 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 0f9a415d88c..2685e34809b 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -5496,344 +5496,6 @@ void __cdecl __libm_sse2_sqrt_precise(void)

#if _MSVCR_VER>=120

-/********************************************************************* - * log2 (MSVCR120.@) - * - * Copied from musl: src/math/log2.c - */ -double CDECL log2(double x) -{ - static const double invln2hi = 0x1.7154765200000p+0, - invln2lo = 0x1.705fc2eefa200p-33; - static const double A[] = { - -0x1.71547652b8339p-1, - 0x1.ec709dc3a04bep-2, - -0x1.7154764702ffbp-2, - 0x1.2776c50034c48p-2, - -0x1.ec7b328ea92bcp-3, - 0x1.a6225e117f92ep-3 - }; - static const double B[] = { - -0x1.71547652b82fep-1, - 0x1.ec709dc3a03f7p-2, - -0x1.71547652b7c3fp-2, - 0x1.2776c50f05be4p-2, - -0x1.ec709dd768fe5p-3, - 0x1.a61761ec4e736p-3, - -0x1.7153fbc64a79bp-3, - 0x1.484d154f01b4ap-3, - -0x1.289e4a72c383cp-3, - 0x1.0b32f285aee66p-3 - }; - static const struct { - double invc, logc; - } T[] = { - {0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1}, - {0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1}, - {0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1}, - {0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2}, - {0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2}, - {0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2}, - {0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2}, - {0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2}, - {0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2}, - {0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2}, - {0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2}, - {0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2}, - {0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2}, - {0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2}, - {0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2}, - {0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2}, - {0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2}, - {0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2}, - {0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2}, - {0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2}, - {0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3}, - {0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3}, - {0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3}, - {0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3}, - {0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3}, - {0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3}, - {0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3}, - {0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3}, - {0x1.19453847f2200p+0, -0x1.162595afdc000p-3}, - {0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4}, - {0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4}, - {0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4}, - {0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4}, - {0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4}, - {0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4}, - {0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5}, - {0x1.07325cac53b83p+0, -0x1.47a954f770000p-5}, - {0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6}, - {0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6}, - {0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8}, - {0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7}, - {0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5}, - {0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5}, - {0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4}, - {0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4}, - {0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4}, - {0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3}, - {0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3}, - {0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3}, - {0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3}, - {0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3}, - {0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3}, - {0x1.ac57026295039p-1, 0x1.0790ab4678000p-2}, - {0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2}, - {0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2}, - {0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2}, - {0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2}, - {0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2}, - {0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2}, - {0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2}, - {0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2}, - {0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2}, - {0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2}, - {0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2} - }; - static const struct { - double chi, clo; - } T2[] = { - {0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55}, - {0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57}, - {0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55}, - {0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55}, - {0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55}, - {0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56}, - {0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56}, - {0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57}, - {0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55}, - {0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57}, - {0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55}, - {0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55}, - {0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56}, - {0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56}, - {0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56}, - {0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55}, - {0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57}, - {0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55}, - {0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55}, - {0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58}, - {0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55}, - {0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58}, - {0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56}, - {0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56}, - {0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57}, - {0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56}, - {0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56}, - {0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55}, - {0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58}, - {0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56}, - {0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55}, - {0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56}, - {0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55}, - {0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56}, - {0x1.ea00027edc00cp-1, -0x1.c848309459811p-55}, - {0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55}, - {0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55}, - {0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59}, - {0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58}, - {0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55}, - {0x1.0200004292367p+0, 0x1.b7ff365324681p-54}, - {0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55}, - {0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58}, - {0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54}, - {0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55}, - {0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54}, - {0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54}, - {0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54}, - {0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55}, - {0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55}, - {0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56}, - {0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54}, - {0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56}, - {0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54}, - {0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56}, - {0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54}, - {0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56}, - {0x1.460000d387cb1p+0, 0x1.20837856599a6p-55}, - {0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55}, - {0x1.4e000043543f3p+0, -0x1.81125ed175329p-56}, - {0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54}, - {0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55}, - {0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55}, - {0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54} - }; - - double z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p, rhi, rlo; - UINT64 ix, iz, tmp; - UINT32 top; - int k, i; - - ix = *(UINT64*)&x; - top = ix >> 48; - if (ix - 0x3feea4af00000000ULL < 0x210aa00000000ULL) { - /* Handle close to 1.0 inputs separately. */ - /* Fix sign of zero with downward rounding when x==1. */ - if (ix == 0x3ff0000000000000ULL) - return 0; - r = x - 1.0; - *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32; - rlo = r - rhi; - hi = rhi * invln2hi; - lo = rlo * invln2hi + r * invln2lo; - r2 = r * r; /* rounding error: 0x1p-62. */ - r4 = r2 * r2; - /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */ - p = r2 * (B[0] + r * B[1]); - y = hi + p; - lo += hi - y + p; - lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) + - r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9]))); - y += lo; - return y; - } - if (top - 0x0010 >= 0x7ff0 - 0x0010) { - /* x < 0x1p-1022 or inf or nan. */ - if (ix * 2 == 0) { - *_errno() = ERANGE; - return -1.0 / x; - } - if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf. */ - return x; - if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL)) - return x; - if (top & 0x8000) { - *_errno() = EDOM; - return (x - x) / (x - x); - } - /* x is subnormal, normalize it. */ - x *= 0x1p52; - ix = *(UINT64*)&x; - ix -= 52ULL << 52; - } - - /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. - The range is split into N subintervals. - The ith subinterval contains z and c is near its center. */ - tmp = ix - 0x3fe6000000000000ULL; - i = (tmp >> (52 - 6)) % (1 << 6); - k = (INT64)tmp >> 52; /* arithmetic shift */ - iz = ix - (tmp & 0xfffULL << 52); - invc = T[i].invc; - logc = T[i].logc; - z = *(double*)&iz; - kd = k; - - /* log2(x) = log2(z/c) + log2(c) + k. */ - /* r ~= z/c - 1, |r| < 1/(2*N). */ - /* rounding error: 0x1p-55/N + 0x1p-65. */ - r = (z - T2[i].chi - T2[i].clo) * invc; - *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32; - rlo = r - rhi; - t1 = rhi * invln2hi; - t2 = rlo * invln2hi + r * invln2lo; - - /* hi + lo = r/ln2 + log2(c) + k. */ - t3 = kd + logc; - hi = t3 + t1; - lo = t3 - hi + t1 + t2; - - /* log2(r+1) = r/ln2 + r^2*poly(r). */ - /* Evaluation is optimized assuming superscalar pipelined execution. */ - r2 = r * r; /* rounding error: 0x1p-54/N^2. */ - r4 = r2 * r2; - /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma). - ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */ - p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]); - y = lo + r2 * p + hi; - return y; -} - -/********************************************************************* - * log2f (MSVCR120.@) - * - * Copied from musl: src/math/log2f.c - */ -float CDECL log2f(float x) -{ - static const double A[] = { - -0x1.712b6f70a7e4dp-2, - 0x1.ecabf496832ep-2, - -0x1.715479ffae3dep-1, - 0x1.715475f35c8b8p0 - }; - static const struct { - double invc, logc; - } T[] = { - { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 }, - { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 }, - { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 }, - { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 }, - { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 }, - { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 }, - { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 }, - { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 }, - { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 }, - { 0x1p+0, 0x0p+0 }, - { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 }, - { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 }, - { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 }, - { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 }, - { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 }, - { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 } - }; - - double z, r, r2, p, y, y0, invc, logc; - UINT32 ix, iz, top, tmp; - int k, i; - - ix = *(UINT32*)&x; - /* Fix sign of zero with downward rounding when x==1. */ - if (ix == 0x3f800000) - return 0; - if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) { - /* x < 0x1p-126 or inf or nan. */ - if (ix * 2 == 0) { - *_errno() = ERANGE; - return -1.0f / x; - } - if (ix == 0x7f800000) /* log2(inf) == inf. */ - return x; - if (ix * 2 > 0xff000000) - return x; - if (ix & 0x80000000) { - *_errno() = EDOM; - return (x - x) / (x - x); - } - /* x is subnormal, normalize it. */ - x *= 0x1p23f; - ix = *(UINT32*)&x; - ix -= 23 << 23; - } - - /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. - The range is split into N subintervals. - The ith subinterval contains z and c is near its center. */ - tmp = ix - 0x3f330000; - i = (tmp >> (23 - 4)) % (1 << 4); - top = tmp & 0xff800000; - iz = ix - top; - k = (INT32)tmp >> 23; /* arithmetic shift */ - invc = T[i].invc; - logc = T[i].logc; - z = *(float*)&iz; - - /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */ - r = z * invc - 1; - y0 = logc + (double)k; - - /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */ - r2 = r * r; - y = A[1] * r + A[2]; - y = A[0] * r2 + y; - p = A[3] * r + y0; - y = y * r2 + p; - return y; -} - /********************************************************************* * rint (MSVCR120.@) */ diff --git a/libs/musl/src/math/log2.c b/libs/musl/src/math/log2.c index a97e47705b4..988d97bbb44 100644 --- a/libs/musl/src/math/log2.c +++ b/libs/musl/src/math/log2.c @@ -65,12 +65,18 @@ double __cdecl log2(double x) } if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) { /* x < 0x1p-1022 or inf or nan. */ - if (ix * 2 == 0) + if (ix * 2 == 0) { + errno = ERANGE; return __math_divzero(1); + } if (ix == asuint64(INFINITY)) /* log(inf) == inf. */ return x; - if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0) + if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL)) + return x; + if (top & 0x8000) { + errno = EDOM; return __math_invalid(x); + } /* x is subnormal, normalize it. */ ix = asuint64(x * 0x1p52); ix -= 52ULL << 52; diff --git a/libs/musl/src/math/log2f.c b/libs/musl/src/math/log2f.c index 2a47d90cfc3..dd186ffa190 100644 --- a/libs/musl/src/math/log2f.c +++ b/libs/musl/src/math/log2f.c @@ -35,12 +35,18 @@ float __cdecl log2f(float x) return 0; if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) { /* x < 0x1p-126 or inf or nan. */ - if (ix * 2 == 0) + if (ix * 2 == 0) { + errno = ERANGE; return __math_divzerof(1); + } if (ix == 0x7f800000) /* log2(inf) == inf. */ return x; - if ((ix & 0x80000000) || ix * 2 >= 0xff000000) + if (ix * 2 > 0xff000000) + return x; + if (ix & 0x80000000) { + errno = EDOM; return __math_invalidf(x); + } /* x is subnormal, normalize it. */ ix = asuint(x * 0x1p23f); ix -= 23 << 23;

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2601

Alexandre Julliard

12:13 p.m.

New subject: [PATCH 12/12] msvcrt: Use the fma()/fmaf() implementation from the bundled musl library.

From: Alexandre Julliard julliard@winehq.org

--- dlls/msvcrt/math.c | 257 -------------------------------------- dlls/msvcrt/msvcrt.spec | 2 - libs/musl/src/math/fma.c | 26 ++-- libs/musl/src/math/fmaf.c | 2 +- 4 files changed, 16 insertions(+), 271 deletions(-)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 2685e34809b..8d4ed01c02f 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -3310,263 +3310,6 @@ double CDECL floor( double x ) return u.f; }

-/********************************************************************* - * fma (MSVCRT.@) - * - * Copied from musl: src/math/fma.c - */ -struct fma_num -{ - UINT64 m; - int e; - int sign; -}; - -static struct fma_num normalize(double x) -{ - UINT64 ix = *(UINT64*)&x; - int e = ix >> 52; - int sign = e & 0x800; - struct fma_num ret; - - e &= 0x7ff; - if (!e) { - x *= 0x1p63; - ix = *(UINT64*)&x; - e = ix >> 52 & 0x7ff; - e = e ? e - 63 : 0x800; - } - ix &= (1ull << 52) - 1; - ix |= 1ull << 52; - ix <<= 1; - e -= 0x3ff + 52 + 1; - - ret.m = ix; - ret.e = e; - ret.sign = sign; - return ret; -} - -static void mul(UINT64 *hi, UINT64 *lo, UINT64 x, UINT64 y) -{ - UINT64 t1, t2, t3; - UINT64 xlo = (UINT32)x, xhi = x >> 32; - UINT64 ylo = (UINT32)y, yhi = y >> 32; - - t1 = xlo * ylo; - t2 = xlo * yhi + xhi * ylo; - t3 = xhi * yhi; - *lo = t1 + (t2 << 32); - *hi = t3 + (t2 >> 32) + (t1 > *lo); -} - -double CDECL fma( double x, double y, double z ) -{ - int e, d, sign, samesign, nonzero; - UINT64 rhi, rlo, zhi, zlo; - struct fma_num nx, ny, nz; - double r; - INT64 i; - - /* normalize so top 10bits and last bit are 0 */ - nx = normalize(x); - ny = normalize(y); - nz = normalize(z); - - if (nx.e >= 0x7ff - 0x3ff - 52 - 1 || ny.e >= 0x7ff - 0x3ff - 52 - 1) { - r = x * y + z; - if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(r)) *_errno() = EDOM; - return r; - } - if (nz.e >= 0x7ff - 0x3ff - 52 - 1) { - if (nz.e > 0x7ff - 0x3ff - 52 - 1) {/* z==0 */ - r = x * y + z; - if (!isnan(x) && !isnan(y) && isnan(r)) *_errno() = EDOM; - return r; - } - return z; - } - - /* mul: r = x*y */ - mul(&rhi, &rlo, nx.m, ny.m); - /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */ - - /* align exponents */ - e = nx.e + ny.e; - d = nz.e - e; - /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */ - if (d > 0) { - if (d < 64) { - zlo = nz.m << d; - zhi = nz.m >> (64 - d); - } else { - zlo = 0; - zhi = nz.m; - e = nz.e - 64; - d -= 64; - if (d < 64 && d) { - rlo = rhi << (64 - d) | rlo >> d | !!(rlo << (64 - d)); - rhi = rhi >> d; - } else if (d) { - rlo = 1; - rhi = 0; - } - } - } else { - zhi = 0; - d = -d; - if (d == 0) { - zlo = nz.m; - } else if (d < 64) { - zlo = nz.m >> d | !!(nz.m << (64 - d)); - } else { - zlo = 1; - } - } - - /* add */ - sign = nx.sign ^ ny.sign; - samesign = !(sign ^ nz.sign); - nonzero = 1; - if (samesign) { - /* r += z */ - rlo += zlo; - rhi += zhi + (rlo < zlo); - } else { - /* r -= z */ - UINT64 t = rlo; - rlo -= zlo; - rhi = rhi - zhi - (t < rlo); - if (rhi >> 63) { - rlo = -rlo; - rhi = -rhi - !!rlo; - sign = !sign; - } - nonzero = !!rhi; - } - - /* set rhi to top 63bit of the result (last bit is sticky) */ - if (nonzero) { - e += 64; - if (rhi >> 32) { - BitScanReverse((DWORD*)&d, rhi >> 32); - d = 31 - d - 1; - } else { - BitScanReverse((DWORD*)&d, rhi); - d = 63 - d - 1; - } - /* note: d > 0 */ - rhi = rhi << d | rlo >> (64 - d) | !!(rlo << d); - } else if (rlo) { - if (rlo >> 32) { - BitScanReverse((DWORD*)&d, rlo >> 32); - d = 31 - d - 1; - } else { - BitScanReverse((DWORD*)&d, rlo); - d = 63 - d - 1; - } - if (d < 0) - rhi = rlo >> 1 | (rlo & 1); - else - rhi = rlo << d; - } else { - /* exact +-0 */ - return x * y + z; - } - e -= d; - - /* convert to double */ - i = rhi; /* i is in [1<<62,(1<<63)-1] */ - if (sign) - i = -i; - r = i; /* |r| is in [0x1p62,0x1p63] */ - - if (e < -1022 - 62) { - /* result is subnormal before rounding */ - if (e == -1022 - 63) { - double c = 0x1p63; - if (sign) - c = -c; - if (r == c) { - /* min normal after rounding, underflow depends - on arch behaviour which can be imitated by - a double to float conversion */ - float fltmin = 0x0.ffffff8p-63 * FLT_MIN * r; - return DBL_MIN / FLT_MIN * fltmin; - } - /* one bit is lost when scaled, add another top bit to - only round once at conversion if it is inexact */ - if (rhi << 53) { - double tiny; - - i = rhi >> 1 | (rhi & 1) | 1ull << 62; - if (sign) - i = -i; - r = i; - r = 2 * r - c; /* remove top bit */ - - /* raise underflow portably, such that it - cannot be optimized away */ - tiny = DBL_MIN / FLT_MIN * r; - r += (double)(tiny * tiny) * (r - r); - } - } else { - /* only round once when scaled */ - d = 10; - i = (rhi >> d | !!(rhi << (64 - d))) << d; - if (sign) - i = -i; - r = i; - } - } - return scalbn(r, e); -} - -/********************************************************************* - * fmaf (MSVCRT.@) - * - * Copied from musl: src/math/fmaf.c - */ -float CDECL fmaf( float x, float y, float z ) -{ - union { double f; UINT64 i; } u; - double xy, err; - int e, neg; - - xy = (double)x * y; - u.f = xy + z; - e = u.i>>52 & 0x7ff; - /* Common case: The double precision result is fine. */ - if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */ - e == 0x7ff || /* NaN */ - (u.f - xy == z && u.f - z == xy) || /* exact */ - (_controlfp(0, 0) & _MCW_RC) != _RC_NEAR) /* not round-to-nearest */ - { - if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(u.f)) *_errno() = EDOM; - - /* underflow may not be raised correctly, example: - fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) */ - if (e < 0x3ff-126 && e >= 0x3ff-149 && _statusfp() & _SW_INEXACT) - fp_barrierf((float)u.f * (float)u.f); - return u.f; - } - - /* - * If result is inexact, and exactly halfway between two float values, - * we need to adjust the low-order bit in the direction of the error. - */ - neg = u.i >> 63; - if (neg == (z > xy)) - err = xy - u.f + z; - else - err = z - u.f + xy; - if (neg == (err < 0)) - u.i++; - else - u.i--; - return u.f; -} - #if defined(__i386__) || defined(__x86_64__) static void _setfp_sse( unsigned int *cw, unsigned int cw_mask, unsigned int *sw, unsigned int sw_mask ) diff --git a/dlls/msvcrt/msvcrt.spec b/dlls/msvcrt/msvcrt.spec index f339757438f..4d4228cc819 100644 --- a/dlls/msvcrt/msvcrt.spec +++ b/dlls/msvcrt/msvcrt.spec @@ -1297,8 +1297,6 @@ @ cdecl fgetws(ptr long ptr) @ cdecl floor(double) @ cdecl -arch=!i386 floorf(float) -@ cdecl fma(double double double) -@ cdecl -arch=!i386 fmaf(float float float) @ cdecl fmod(double double) @ cdecl -arch=!i386 fmodf(float float) @ cdecl fopen(str str) diff --git a/libs/musl/src/math/fma.c b/libs/musl/src/math/fma.c index 9639756a8f3..0de94002400 100644 --- a/libs/musl/src/math/fma.c +++ b/libs/musl/src/math/fma.c @@ -58,19 +58,23 @@ static void mul(uint64_t *hi, uint64_t *lo, uint64_t x, uint64_t y)

double __cdecl fma(double x, double y, double z) { - #pragma STDC FENV_ACCESS ON - /* normalize so top 10bits and last bit are 0 */ struct num nx, ny, nz; nx = normalize(x); ny = normalize(y); nz = normalize(z);

- if (nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN) - return x*y + z; + if (nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN) { + double r = x * y + z; + if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(r)) errno = EDOM; + return r; + } if (nz.e >= ZEROINFNAN) { - if (nz.e > ZEROINFNAN) /* z==0 */ - return x*y + z; + if (nz.e > ZEROINFNAN) { /* z==0 */ + double r = x * y + z; + if (!isnan(x) && !isnan(y) && isnan(r)) errno = EDOM; + return r; + } return z; }

@@ -86,7 +90,7 @@ double __cdecl fma(double x, double y, double z) if (d > 0) { if (d < 64) { zlo = nz.m<<d; - zhi = nz.m>>64-d; + zhi = nz.m>>(64-d); } else { zlo = 0; zhi = nz.m; @@ -94,7 +98,7 @@ double __cdecl fma(double x, double y, double z) d -= 64; if (d == 0) { } else if (d < 64) { - rlo = rhi<<64-d | rlo>>d | !!(rlo<<64-d); + rlo = rhi<<(64-d) | rlo>>d | !!(rlo<<(64-d)); rhi = rhi>>d; } else { rlo = 1; @@ -107,7 +111,7 @@ double __cdecl fma(double x, double y, double z) if (d == 0) { zlo = nz.m; } else if (d < 64) { - zlo = nz.m>>d | !!(nz.m<<64-d); + zlo = nz.m>>d | !!(nz.m<<(64-d)); } else { zlo = 1; } @@ -139,7 +143,7 @@ double __cdecl fma(double x, double y, double z) e += 64; d = a_clz_64(rhi)-1; /* note: d > 0 */ - rhi = rhi<<d | rlo>>64-d | !!(rlo<<d); + rhi = rhi<<d | rlo>>(64-d) | !!(rlo<<d); } else if (rlo) { d = a_clz_64(rlo)-1; if (d < 0) @@ -190,7 +194,7 @@ double __cdecl fma(double x, double y, double z) } else { /* only round once when scaled */ d = 10; - i = ( rhi>>d | !!(rhi<<64-d) ) << d; + i = ( rhi>>d | !!(rhi<<(64-d)) ) << d; if (sign) i = -i; r = i; diff --git a/libs/musl/src/math/fmaf.c b/libs/musl/src/math/fmaf.c index e4c3847c0a1..3e4636fd204 100644 --- a/libs/musl/src/math/fmaf.c +++ b/libs/musl/src/math/fmaf.c @@ -39,7 +39,6 @@ */ float __cdecl fmaf(float x, float y, float z) { - #pragma STDC FENV_ACCESS ON double xy, result; union {double f; uint64_t i;} u; int e; @@ -54,6 +53,7 @@ float __cdecl fmaf(float x, float y, float z) (result - xy == z && result - z == xy) || /* exact */ fegetround() != FE_TONEAREST) /* not round-to-nearest */ { + if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(u.f)) errno = EDOM; /* underflow may not be raised correctly, example: fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f)

-- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/2601

Piotr Caban (＠piotr)

2:46 p.m.

New subject: [PATCH 0/12] MR2601: msvcrt: Use more functions from the bundled musl library. - approved

This merge request was approved by Piotr Caban.

-- https://gitlab.winehq.org/wine/wine/-/merge_requests/2601

944

Age (days ago)

944

Last active (days ago)

wine-gitlab@winehq.org

13 comments

3 participants

tags (0)

participants (3)

Alexandre Julliard
Alexandre Julliard (＠julliard)
Piotr Caban (＠piotr)