libgcc/config/libbid/bid128_sqrt.c

   1 /* Copyright (C) 2007  Free Software Foundation, Inc.
   2
   3 This file is part of GCC.
   4
   5 GCC is free software; you can redistribute it and/or modify it under
   6 the terms of the GNU General Public License as published by the Free
   7 Software Foundation; either version 2, or (at your option) any later
   8 version.
   9
  10 In addition to the permissions in the GNU General Public License, the
  11 Free Software Foundation gives you unlimited permission to link the
  12 compiled version of this file into combinations with other programs,
  13 and to distribute those combinations without any restriction coming
  14 from the use of this file.  (The General Public License restrictions
  15 do apply in other respects; for example, they cover modification of
  16 the file, and distribution when not linked into a combine
  17 executable.)
  18
  19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  21 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  22 for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GCC; see the file COPYING.  If not, write to the Free
  26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  27 02110-1301, USA.  */
  28
  29 #define BID_128RES
  30 #include "sqrt_macros.h"
  31
  32 BID128_FUNCTION_ARG1(__bid128_sqrt, x)
  33
  34   UINT256 M256, C256, C4, C8;
  35   UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res;
  36   UINT64 sign_x, Carry;
  37   SINT64 D;
  38   int_float fx, f64;
  39   int exponent_x = 0, bin_expon_cx;
  40   int digits, scale, exponent_q;
  41
  42   // unpack arguments, check for NaN or Infinity
  43   if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
  44     res.w[1] = x.w[1];
  45     res.w[0] = x.w[0];
  46     // NaN ?
  47     if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
  48 #ifdef SET_STATUS_FLAGS
  49       if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull)    // sNaN
  50         __set_status_flags (pfpsf, INVALID_EXCEPTION);
  51 #endif
  52           res.w[1] &= QUIET_MASK64;
  53       BID_RETURN (res);
  54     }
  55     // x is Infinity?
  56     if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
  57       if (sign_x) {
  58         // -Inf, return NaN
  59         res.w[1] = 0x7c00000000000000ull;
  60 #ifdef SET_STATUS_FLAGS
  61         __set_status_flags (pfpsf, INVALID_EXCEPTION);
  62 #endif
  63       }
  64       BID_RETURN (res);
  65     }
  66     // x is 0 otherwise
  67
  68     res.w[1] =
  69       sign_x |
  70       ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) <<
  71        49);
  72     BID_RETURN (res);
  73   }
  74   if (sign_x) {
  75     res.w[1] = 0x7c00000000000000ull;
  76     res.w[0] = 0;
  77 #ifdef SET_STATUS_FLAGS
  78     __set_status_flags (pfpsf, INVALID_EXCEPTION);
  79 #endif
  80     BID_RETURN (res);
  81   }
  82   // 2^64
  83   f64.i = 0x5f800000;
  84
  85   // fx ~ CX
  86   fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
  87   bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
  88   digits = __bid_estimate_decimal_digits[bin_expon_cx];
  89
  90   A10 = CX;
  91   if (exponent_x & 1) {
  92     A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
  93     A10.w[0] = CX.w[0] << 3;
  94     CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
  95     CX2.w[0] = CX.w[0] << 1;
  96     __add_128_128 (A10, A10, CX2);
  97   }
  98
  99   CS.w[0] = short_sqrt128 (A10);
 100   CS.w[1] = 0;
 101   // check for exact result
 102   if (CS.w[0] * CS.w[0] == A10.w[0]) {
 103     __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]);
 104     if (S2.w[1] == A10.w[1])    // && S2.w[0]==A10.w[0])
 105     {
 106       get_BID128_very_fast (&res, 0,
 107                             (exponent_x +
 108                              DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
 109       BID_RETURN (res);
 110     }
 111   }
 112   // get number of digits in CX
 113   D = CX.w[1] - __bid_power10_index_binexp_128[bin_expon_cx].w[1];
 114   if (D > 0
 115       || (!D && CX.w[0] >= __bid_power10_index_binexp_128[bin_expon_cx].w[0]))
 116     digits++;
 117
 118   // if exponent is odd, scale coefficient by 10
 119   scale = 67 - digits;
 120   exponent_q = exponent_x - scale;
 121   scale += (exponent_q & 1);    // exp. bias is even
 122
 123   if (scale > 38) {
 124     T128 = __bid_power10_table_128[scale - 37];
 125     __mul_128x128_low (CX1, CX, T128);
 126
 127     TP128 = __bid_power10_table_128[37];
 128     __mul_128x128_to_256 (C256, CX1, TP128);
 129   } else {
 130     T128 = __bid_power10_table_128[scale];
 131     __mul_128x128_to_256 (C256, CX, T128);
 132   }
 133
 134
 135   // 4*C256
 136   C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62);
 137   C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62);
 138   C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
 139   C4.w[0] = C256.w[0] << 2;
 140
 141   long_sqrt128 (&CS, C256);
 142
 143 #ifndef IEEE_ROUND_NEAREST
 144 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 145   if (!((rnd_mode) & 3)) {
 146 #endif
 147 #endif
 148     // compare to midpoints
 149     CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
 150     CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
 151     // CSM^2
 152     //__mul_128x128_to_256(M256, CSM, CSM);
 153     __sqr128_to_256 (M256, CSM);
 154
 155     if (C4.w[3] > M256.w[3]
 156         || (C4.w[3] == M256.w[3]
 157             && (C4.w[2] > M256.w[2]
 158                 || (C4.w[2] == M256.w[2]
 159                     && (C4.w[1] > M256.w[1]
 160                         || (C4.w[1] == M256.w[1]
 161                             && C4.w[0] > M256.w[0])))))) {
 162       // round up
 163       CS.w[0]++;
 164       if (!CS.w[0])
 165         CS.w[1]++;
 166     } else {
 167       C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61);
 168       C8.w[0] = CS.w[0] << 3;
 169       // M256 - 8*CSM
 170       __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
 171       __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
 172       __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
 173       M256.w[3] = M256.w[3] - Carry;
 174
 175       // if CSM' > C256, round up
 176       if (M256.w[3] > C4.w[3]
 177           || (M256.w[3] == C4.w[3]
 178               && (M256.w[2] > C4.w[2]
 179                   || (M256.w[2] == C4.w[2]
 180                       && (M256.w[1] > C4.w[1]
 181                           || (M256.w[1] == C4.w[1]
 182                               && M256.w[0] > C4.w[0])))))) {
 183         // round down
 184         if (!CS.w[0])
 185           CS.w[1]--;
 186         CS.w[0]--;
 187       }
 188     }
 189 #ifndef IEEE_ROUND_NEAREST
 190 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 191   } else {
 192     __sqr128_to_256 (M256, CS);
 193     C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
 194     C8.w[0] = CS.w[0] << 1;
 195     if (M256.w[3] > C256.w[3]
 196         || (M256.w[3] == C256.w[3]
 197             && (M256.w[2] > C256.w[2]
 198                 || (M256.w[2] == C256.w[2]
 199                     && (M256.w[1] > C256.w[1]
 200                         || (M256.w[1] == C256.w[1]
 201                             && M256.w[0] > C256.w[0])))))) {
 202       __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
 203       __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
 204       __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
 205       M256.w[3] = M256.w[3] - Carry;
 206       M256.w[0]++;
 207       if (!M256.w[0]) {
 208         M256.w[1]++;
 209         if (!M256.w[1]) {
 210           M256.w[2]++;
 211           if (!M256.w[2])
 212             M256.w[3]++;
 213         }
 214       }
 215
 216       if (!CS.w[0])
 217         CS.w[1]--;
 218       CS.w[0]--;
 219
 220       if (M256.w[3] > C256.w[3]
 221           || (M256.w[3] == C256.w[3]
 222               && (M256.w[2] > C256.w[2]
 223                   || (M256.w[2] == C256.w[2]
 224                       && (M256.w[1] > C256.w[1]
 225                           || (M256.w[1] == C256.w[1]
 226                               && M256.w[0] > C256.w[0])))))) {
 227
 228         if (!CS.w[0])
 229           CS.w[1]--;
 230         CS.w[0]--;
 231       }
 232     }
 233
 234     else {
 235       __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
 236       __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
 237       __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
 238       M256.w[3] = M256.w[3] + Carry;
 239       M256.w[0]++;
 240       if (!M256.w[0]) {
 241         M256.w[1]++;
 242         if (!M256.w[1]) {
 243           M256.w[2]++;
 244           if (!M256.w[2])
 245             M256.w[3]++;
 246         }
 247       }
 248       if (M256.w[3] < C256.w[3]
 249           || (M256.w[3] == C256.w[3]
 250               && (M256.w[2] < C256.w[2]
 251                   || (M256.w[2] == C256.w[2]
 252                       && (M256.w[1] < C256.w[1]
 253                           || (M256.w[1] == C256.w[1]
 254                               && M256.w[0] <= C256.w[0])))))) {
 255
 256         CS.w[0]++;
 257         if (!CS.w[0])
 258           CS.w[1]++;
 259       }
 260     }
 261     // RU?
 262     if ((rnd_mode) == ROUNDING_UP) {
 263       CS.w[0]++;
 264       if (!CS.w[0])
 265         CS.w[1]++;
 266     }
 267
 268   }
 269 #endif
 270 #endif
 271
 272 #ifdef SET_STATUS_FLAGS
 273   __set_status_flags (pfpsf, INEXACT_EXCEPTION);
 274 #endif
 275   get_BID128_fast (&res, 0,
 276                    (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
 277   BID_RETURN (res);
 278 }