libquadmath/math/acosq.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /*
  13    __float128 expansions are
  14    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
  15    and are incorporated herein by permission of the author.  The author
  16    reserves the right to distribute this material elsewhere under different
  17    copying permissions.  These modifications are distributed here under
  18    the following terms:
  19
  20     This library is free software; you can redistribute it and/or
  21     modify it under the terms of the GNU Lesser General Public
  22     License as published by the Free Software Foundation; either
  23     version 2.1 of the License, or (at your option) any later version.
  24
  25     This library is distributed in the hope that it will be useful,
  26     but WITHOUT ANY WARRANTY; without even the implied warranty of
  27     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  28     Lesser General Public License for more details.
  29
  30     You should have received a copy of the GNU Lesser General Public
  31     License along with this library; if not, write to the Free Software
  32     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA */
  33
  34 /* __ieee754_acosl(x)
  35  * Method :
  36  *      acos(x)  = pi/2 - asin(x)
  37  *      acos(-x) = pi/2 + asin(x)
  38  * For |x| <= 0.375
  39  *      acos(x) = pi/2 - asin(x)
  40  * Between .375 and .5 the approximation is
  41  *      acos(0.4375 + x) = acos(0.4375) + x P(x) / Q(x)
  42  * Between .5 and .625 the approximation is
  43  *      acos(0.5625 + x) = acos(0.5625) + x rS(x) / sS(x)
  44  * For x > 0.625,
  45  *      acos(x) = 2 asin(sqrt((1-x)/2))
  46  *      computed with an extended precision square root in the leading term.
  47  * For x < -0.625
  48  *      acos(x) = pi - 2 asin(sqrt((1-|x|)/2))
  49  *
  50  * Special cases:
  51  *      if x is NaN, return x itself;
  52  *      if |x|>1, return NaN with invalid signal.
  53  *
  54  * Functions needed: __ieee754_sqrtl.
  55  */
  56
  57 #include "quadmath-imp.h"
  58
  59 static const __float128
  60   one = 1.0Q,
  61   pio2_hi = 1.5707963267948966192313216916397514420986Q,
  62   pio2_lo = 4.3359050650618905123985220130216759843812E-35Q,
  63
  64   /* acos(0.5625 + x) = acos(0.5625) + x rS(x) / sS(x)
  65      -0.0625 <= x <= 0.0625
  66      peak relative error 3.3e-35  */
  67
  68   rS0 =  5.619049346208901520945464704848780243887E0Q,
  69   rS1 = -4.460504162777731472539175700169871920352E1Q,
  70   rS2 =  1.317669505315409261479577040530751477488E2Q,
  71   rS3 = -1.626532582423661989632442410808596009227E2Q,
  72   rS4 =  3.144806644195158614904369445440583873264E1Q,
  73   rS5 =  9.806674443470740708765165604769099559553E1Q,
  74   rS6 = -5.708468492052010816555762842394927806920E1Q,
  75   rS7 = -1.396540499232262112248553357962639431922E1Q,
  76   rS8 =  1.126243289311910363001762058295832610344E1Q,
  77   rS9 =  4.956179821329901954211277873774472383512E-1Q,
  78   rS10 = -3.313227657082367169241333738391762525780E-1Q,
  79
  80   sS0 = -4.645814742084009935700221277307007679325E0Q,
  81   sS1 =  3.879074822457694323970438316317961918430E1Q,
  82   sS2 = -1.221986588013474694623973554726201001066E2Q,
  83   sS3 =  1.658821150347718105012079876756201905822E2Q,
  84   sS4 = -4.804379630977558197953176474426239748977E1Q,
  85   sS5 = -1.004296417397316948114344573811562952793E2Q,
  86   sS6 =  7.530281592861320234941101403870010111138E1Q,
  87   sS7 =  1.270735595411673647119592092304357226607E1Q,
  88   sS8 = -1.815144839646376500705105967064792930282E1Q,
  89   sS9 = -7.821597334910963922204235247786840828217E-2Q,
  90   /* 1.000000000000000000000000000000000000000E0 */
  91
  92   acosr5625 = 9.7338991014954640492751132535550279812151E-1Q,
  93   pimacosr5625 = 2.1682027434402468335351320579240000860757E0Q,
  94
  95   /* acos(0.4375 + x) = acos(0.4375) + x rS(x) / sS(x)
  96      -0.0625 <= x <= 0.0625
  97      peak relative error 2.1e-35  */
  98
  99   P0 =  2.177690192235413635229046633751390484892E0Q,
 100   P1 = -2.848698225706605746657192566166142909573E1Q,
 101   P2 =  1.040076477655245590871244795403659880304E2Q,
 102   P3 = -1.400087608918906358323551402881238180553E2Q,
 103   P4 =  2.221047917671449176051896400503615543757E1Q,
 104   P5 =  9.643714856395587663736110523917499638702E1Q,
 105   P6 = -5.158406639829833829027457284942389079196E1Q,
 106   P7 = -1.578651828337585944715290382181219741813E1Q,
 107   P8 =  1.093632715903802870546857764647931045906E1Q,
 108   P9 =  5.448925479898460003048760932274085300103E-1Q,
 109   P10 = -3.315886001095605268470690485170092986337E-1Q,
 110   Q0 = -1.958219113487162405143608843774587557016E0Q,
 111   Q1 =  2.614577866876185080678907676023269360520E1Q,
 112   Q2 = -9.990858606464150981009763389881793660938E1Q,
 113   Q3 =  1.443958741356995763628660823395334281596E2Q,
 114   Q4 = -3.206441012484232867657763518369723873129E1Q,
 115   Q5 = -1.048560885341833443564920145642588991492E2Q,
 116   Q6 =  6.745883931909770880159915641984874746358E1Q,
 117   Q7 =  1.806809656342804436118449982647641392951E1Q,
 118   Q8 = -1.770150690652438294290020775359580915464E1Q,
 119   Q9 = -5.659156469628629327045433069052560211164E-1Q,
 120   /* 1.000000000000000000000000000000000000000E0 */
 121
 122   acosr4375 = 1.1179797320499710475919903296900511518755E0Q,
 123   pimacosr4375 = 2.0236129215398221908706530535894517323217E0Q,
 124
 125   /* asin(x) = x + x^3 pS(x^2) / qS(x^2)
 126      0 <= x <= 0.5
 127      peak relative error 1.9e-35  */
 128   pS0 = -8.358099012470680544198472400254596543711E2Q,
 129   pS1 =  3.674973957689619490312782828051860366493E3Q,
 130   pS2 = -6.730729094812979665807581609853656623219E3Q,
 131   pS3 =  6.643843795209060298375552684423454077633E3Q,
 132   pS4 = -3.817341990928606692235481812252049415993E3Q,
 133   pS5 =  1.284635388402653715636722822195716476156E3Q,
 134   pS6 = -2.410736125231549204856567737329112037867E2Q,
 135   pS7 =  2.219191969382402856557594215833622156220E1Q,
 136   pS8 = -7.249056260830627156600112195061001036533E-1Q,
 137   pS9 =  1.055923570937755300061509030361395604448E-3Q,
 138
 139   qS0 = -5.014859407482408326519083440151745519205E3Q,
 140   qS1 =  2.430653047950480068881028451580393430537E4Q,
 141   qS2 = -4.997904737193653607449250593976069726962E4Q,
 142   qS3 =  5.675712336110456923807959930107347511086E4Q,
 143   qS4 = -3.881523118339661268482937768522572588022E4Q,
 144   qS5 =  1.634202194895541569749717032234510811216E4Q,
 145   qS6 = -4.151452662440709301601820849901296953752E3Q,
 146   qS7 =  5.956050864057192019085175976175695342168E2Q,
 147   qS8 = -4.175375777334867025769346564600396877176E1Q;
 148   /* 1.000000000000000000000000000000000000000E0 */
 149
 150 __float128
 151 acosq (__float128 x)
 152 {
 153   __float128 z, r, w, p, q, s, t, f2;
 154   int32_t ix, sign;
 155   ieee854_float128 u;
 156
 157   u.value = x;
 158   sign = u.words32.w0;
 159   ix = sign & 0x7fffffff;
 160   u.words32.w0 = ix;            /* |x| */
 161   if (ix >= 0x3fff0000)         /* |x| >= 1 */
 162     {
 163       if (ix == 0x3fff0000
 164           && (u.words32.w1 | u.words32.w2 | u.words32.w3) == 0)
 165         {                       /* |x| == 1 */
 166           if ((sign & 0x80000000) == 0)
 167             return 0.0;         /* acos(1) = 0  */
 168           else
 169             return (2.0 * pio2_hi) + (2.0 * pio2_lo);   /* acos(-1)= pi */
 170         }
 171       return (x - x) / (x - x); /* acos(|x| > 1) is NaN */
 172     }
 173   else if (ix < 0x3ffe0000)     /* |x| < 0.5 */
 174     {
 175       if (ix < 0x3fc60000)      /* |x| < 2**-57 */
 176         return pio2_hi + pio2_lo;
 177       if (ix < 0x3ffde000)      /* |x| < .4375 */
 178         {
 179           /* Arcsine of x.  */
 180           z = x * x;
 181           p = (((((((((pS9 * z
 182                        + pS8) * z
 183                       + pS7) * z
 184                      + pS6) * z
 185                     + pS5) * z
 186                    + pS4) * z
 187                   + pS3) * z
 188                  + pS2) * z
 189                 + pS1) * z
 190                + pS0) * z;
 191           q = (((((((( z
 192                        + qS8) * z
 193                      + qS7) * z
 194                     + qS6) * z
 195                    + qS5) * z
 196                   + qS4) * z
 197                  + qS3) * z
 198                 + qS2) * z
 199                + qS1) * z
 200             + qS0;
 201           r = x + x * p / q;
 202           z = pio2_hi - (r - pio2_lo);
 203           return z;
 204         }
 205       /* .4375 <= |x| < .5 */
 206       t = u.value - 0.4375Q;
 207       p = ((((((((((P10 * t
 208                     + P9) * t
 209                    + P8) * t
 210                   + P7) * t
 211                  + P6) * t
 212                 + P5) * t
 213                + P4) * t
 214               + P3) * t
 215              + P2) * t
 216             + P1) * t
 217            + P0) * t;
 218
 219       q = (((((((((t
 220                    + Q9) * t
 221                   + Q8) * t
 222                  + Q7) * t
 223                 + Q6) * t
 224                + Q5) * t
 225               + Q4) * t
 226              + Q3) * t
 227             + Q2) * t
 228            + Q1) * t
 229         + Q0;
 230       r = p / q;
 231       if (sign & 0x80000000)
 232         r = pimacosr4375 - r;
 233       else
 234         r = acosr4375 + r;
 235       return r;
 236     }
 237   else if (ix < 0x3ffe4000)     /* |x| < 0.625 */
 238     {
 239       t = u.value - 0.5625Q;
 240       p = ((((((((((rS10 * t
 241                     + rS9) * t
 242                    + rS8) * t
 243                   + rS7) * t
 244                  + rS6) * t
 245                 + rS5) * t
 246                + rS4) * t
 247               + rS3) * t
 248              + rS2) * t
 249             + rS1) * t
 250            + rS0) * t;
 251
 252       q = (((((((((t
 253                    + sS9) * t
 254                   + sS8) * t
 255                  + sS7) * t
 256                 + sS6) * t
 257                + sS5) * t
 258               + sS4) * t
 259              + sS3) * t
 260             + sS2) * t
 261            + sS1) * t
 262         + sS0;
 263       if (sign & 0x80000000)
 264         r = pimacosr5625 - p / q;
 265       else
 266         r = acosr5625 + p / q;
 267       return r;
 268     }
 269   else
 270     {                           /* |x| >= .625 */
 271       z = (one - u.value) * 0.5;
 272       s = sqrtq (z);
 273       /* Compute an extended precision square root from
 274          the Newton iteration  s -> 0.5 * (s + z / s).
 275          The change w from s to the improved value is
 276             w = 0.5 * (s + z / s) - s  = (s^2 + z)/2s - s = (z - s^2)/2s.
 277           Express s = f1 + f2 where f1 * f1 is exactly representable.
 278           w = (z - s^2)/2s = (z - f1^2 - 2 f1 f2 - f2^2)/2s .
 279           s + w has extended precision.  */
 280       u.value = s;
 281       u.words32.w2 = 0;
 282       u.words32.w3 = 0;
 283       f2 = s - u.value;
 284       w = z - u.value * u.value;
 285       w = w - 2.0 * u.value * f2;
 286       w = w - f2 * f2;
 287       w = w / (2.0 * s);
 288       /* Arcsine of s.  */
 289       p = (((((((((pS9 * z
 290                    + pS8) * z
 291                   + pS7) * z
 292                  + pS6) * z
 293                 + pS5) * z
 294                + pS4) * z
 295               + pS3) * z
 296              + pS2) * z
 297             + pS1) * z
 298            + pS0) * z;
 299       q = (((((((( z
 300                    + qS8) * z
 301                  + qS7) * z
 302                 + qS6) * z
 303                + qS5) * z
 304               + qS4) * z
 305              + qS3) * z
 306             + qS2) * z
 307            + qS1) * z
 308         + qS0;
 309       r = s + (w + s * p / q);
 310
 311       if (sign & 0x80000000)
 312         w = pio2_hi + (pio2_lo - r);
 313       else
 314         w = r;
 315       return 2.0 * w;
 316     }
 317 }