libgcc/config/libbid/bid64_fma.c

   1 /* Copyright (C) 2007, 2009  Free Software Foundation, Inc.
   2
   3 This file is part of GCC.
   4
   5 GCC is free software; you can redistribute it and/or modify it under
   6 the terms of the GNU General Public License as published by the Free
   7 Software Foundation; either version 3, or (at your option) any later
   8 version.
   9
  10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 for more details.
  14
  15 Under Section 7 of GPL version 3, you are granted additional
  16 permissions described in the GCC Runtime Library Exception, version
  17 3.1, as published by the Free Software Foundation.
  18
  19 You should have received a copy of the GNU General Public License and
  20 a copy of the GCC Runtime Library Exception along with this program;
  21 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24 /*****************************************************************************
  25  *    BID64 fma
  26  *****************************************************************************
  27  *
  28  *  Algorithm description:
  29  *
  30  *  if multiplication is guranteed exact (short coefficients)
  31  *     call the unpacked arg. equivalent of bid64_add(x*y, z)
  32  *  else
  33  *     get full coefficient_x*coefficient_y product
  34  *     call subroutine to perform addition of 64-bit argument
  35  *                                         to 128-bit product
  36  *
  37  ****************************************************************************/
  38
  39 #include "bid_inline_add.h"
  40
  41 #if DECIMAL_CALL_BY_REFERENCE
  42 extern void bid64_mul (UINT64 * pres, UINT64 * px,
  43                        UINT64 *
  44                        py _RND_MODE_PARAM _EXC_FLAGS_PARAM
  45                        _EXC_MASKS_PARAM _EXC_INFO_PARAM);
  46 #else
  47
  48 extern UINT64 bid64_mul (UINT64 x,
  49                          UINT64 y _RND_MODE_PARAM
  50                          _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
  51                          _EXC_INFO_PARAM);
  52 #endif
  53
  54 #if DECIMAL_CALL_BY_REFERENCE
  55
  56 void
  57 bid64_fma (UINT64 * pres, UINT64 * px, UINT64 * py,
  58            UINT64 *
  59            pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
  60            _EXC_INFO_PARAM) {
  61   UINT64 x, y, z;
  62 #else
  63
  64 UINT64
  65 bid64_fma (UINT64 x, UINT64 y,
  66            UINT64 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
  67            _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
  68 #endif
  69   UINT128 P, PU, CT, CZ;
  70   UINT64 sign_x, sign_y, coefficient_x, coefficient_y, sign_z,
  71     coefficient_z;
  72   UINT64 C64, remainder_y, res;
  73   UINT64 CYh, CY0L, T, valid_x, valid_y, valid_z;
  74   int_double tempx, tempy;
  75   int extra_digits, exponent_x, exponent_y, bin_expon_cx, bin_expon_cy,
  76     bin_expon_product, rmode;
  77   int digits_p, bp, final_exponent, exponent_z, digits_z, ez, ey,
  78     scale_z, uf_status;
  79
  80 #if DECIMAL_CALL_BY_REFERENCE
  81 #if !DECIMAL_GLOBAL_ROUNDING
  82   _IDEC_round rnd_mode = *prnd_mode;
  83 #endif
  84   x = *px;
  85   y = *py;
  86   z = *pz;
  87 #endif
  88
  89   valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x);
  90   valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y);
  91   valid_z = unpack_BID64 (&sign_z, &exponent_z, &coefficient_z, z);
  92
  93   // unpack arguments, check for NaN, Infinity, or 0
  94   if (!valid_x || !valid_y || !valid_z) {
  95
  96     if ((y & MASK_NAN) == MASK_NAN) {   // y is NAN
  97       // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
  98       // check first for non-canonical NaN payload
  99       y = y & 0xfe03ffffffffffffull;    // clear G6-G12
 100       if ((y & 0x0003ffffffffffffull) > 999999999999999ull) {
 101         y = y & 0xfe00000000000000ull;  // clear G6-G12 and the payload bits
 102       }
 103       if ((y & MASK_SNAN) == MASK_SNAN) {       // y is SNAN
 104         // set invalid flag
 105         *pfpsf |= INVALID_EXCEPTION;
 106         // return quiet (y)
 107         res = y & 0xfdffffffffffffffull;
 108       } else {  // y is QNaN
 109         // return y
 110         res = y;
 111         // if z = SNaN or x = SNaN signal invalid exception
 112         if ((z & MASK_SNAN) == MASK_SNAN
 113             || (x & MASK_SNAN) == MASK_SNAN) {
 114           // set invalid flag
 115           *pfpsf |= INVALID_EXCEPTION;
 116         }
 117       }
 118       BID_RETURN (res)
 119     } else if ((z & MASK_NAN) == MASK_NAN) {    // z is NAN
 120       // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
 121       // check first for non-canonical NaN payload
 122       z = z & 0xfe03ffffffffffffull;    // clear G6-G12
 123       if ((z & 0x0003ffffffffffffull) > 999999999999999ull) {
 124         z = z & 0xfe00000000000000ull;  // clear G6-G12 and the payload bits
 125       }
 126       if ((z & MASK_SNAN) == MASK_SNAN) {       // z is SNAN
 127         // set invalid flag
 128         *pfpsf |= INVALID_EXCEPTION;
 129         // return quiet (z)
 130         res = z & 0xfdffffffffffffffull;
 131       } else {  // z is QNaN
 132         // return z
 133         res = z;
 134         // if x = SNaN signal invalid exception
 135         if ((x & MASK_SNAN) == MASK_SNAN) {
 136           // set invalid flag
 137           *pfpsf |= INVALID_EXCEPTION;
 138         }
 139       }
 140       BID_RETURN (res)
 141     } else if ((x & MASK_NAN) == MASK_NAN) {    // x is NAN
 142       // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
 143       // check first for non-canonical NaN payload
 144       x = x & 0xfe03ffffffffffffull;    // clear G6-G12
 145       if ((x & 0x0003ffffffffffffull) > 999999999999999ull) {
 146         x = x & 0xfe00000000000000ull;  // clear G6-G12 and the payload bits
 147       }
 148       if ((x & MASK_SNAN) == MASK_SNAN) {       // x is SNAN
 149         // set invalid flag
 150         *pfpsf |= INVALID_EXCEPTION;
 151         // return quiet (x)
 152         res = x & 0xfdffffffffffffffull;
 153       } else {  // x is QNaN
 154         // return x
 155         res = x;        // clear out G[6]-G[16]
 156       }
 157       BID_RETURN (res)
 158     }
 159
 160     if (!valid_x) {
 161       // x is Inf. or 0
 162
 163       // x is Infinity?
 164       if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) {
 165         // check if y is 0
 166         if (!coefficient_y) {
 167           // y==0, return NaN
 168 #ifdef SET_STATUS_FLAGS
 169           if ((z & 0x7e00000000000000ull) != 0x7c00000000000000ull)
 170             __set_status_flags (pfpsf, INVALID_EXCEPTION);
 171 #endif
 172           BID_RETURN (0x7c00000000000000ull);
 173         }
 174         // test if z is Inf of oposite sign
 175         if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull)
 176             && (((x ^ y) ^ z) & 0x8000000000000000ull)) {
 177           // return NaN
 178 #ifdef SET_STATUS_FLAGS
 179           __set_status_flags (pfpsf, INVALID_EXCEPTION);
 180 #endif
 181           BID_RETURN (0x7c00000000000000ull);
 182         }
 183         // otherwise return +/-Inf
 184         BID_RETURN (((x ^ y) & 0x8000000000000000ull) |
 185                     0x7800000000000000ull);
 186       }
 187       // x is 0
 188       if (((y & 0x7800000000000000ull) != 0x7800000000000000ull)
 189           && ((z & 0x7800000000000000ull) != 0x7800000000000000ull)) {
 190
 191         if (coefficient_z) {
 192           exponent_y = exponent_x - DECIMAL_EXPONENT_BIAS + exponent_y;
 193
 194           sign_z = z & 0x8000000000000000ull;
 195
 196           if (exponent_y >= exponent_z)
 197             BID_RETURN (z);
 198           res =
 199             add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z,
 200                         &rnd_mode, pfpsf);
 201           BID_RETURN (res);
 202         }
 203       }
 204     }
 205     if (!valid_y) {
 206       // y is Inf. or 0
 207
 208       // y is Infinity?
 209       if ((y & 0x7800000000000000ull) == 0x7800000000000000ull) {
 210         // check if x is 0
 211         if (!coefficient_x) {
 212           // y==0, return NaN
 213 #ifdef SET_STATUS_FLAGS
 214           __set_status_flags (pfpsf, INVALID_EXCEPTION);
 215 #endif
 216           BID_RETURN (0x7c00000000000000ull);
 217         }
 218         // test if z is Inf of oposite sign
 219         if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull)
 220             && (((x ^ y) ^ z) & 0x8000000000000000ull)) {
 221 #ifdef SET_STATUS_FLAGS
 222           __set_status_flags (pfpsf, INVALID_EXCEPTION);
 223 #endif
 224           // return NaN
 225           BID_RETURN (0x7c00000000000000ull);
 226         }
 227         // otherwise return +/-Inf
 228         BID_RETURN (((x ^ y) & 0x8000000000000000ull) |
 229                     0x7800000000000000ull);
 230       }
 231       // y is 0
 232       if (((z & 0x7800000000000000ull) != 0x7800000000000000ull)) {
 233
 234         if (coefficient_z) {
 235           exponent_y += exponent_x - DECIMAL_EXPONENT_BIAS;
 236
 237           sign_z = z & 0x8000000000000000ull;
 238
 239           if (exponent_y >= exponent_z)
 240             BID_RETURN (z);
 241           res =
 242             add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z,
 243                         &rnd_mode, pfpsf);
 244           BID_RETURN (res);
 245         }
 246       }
 247     }
 248
 249     if (!valid_z) {
 250       // y is Inf. or 0
 251
 252       // test if y is NaN/Inf
 253       if ((z & 0x7800000000000000ull) == 0x7800000000000000ull) {
 254         BID_RETURN (coefficient_z & QUIET_MASK64);
 255       }
 256       // z is 0, return x*y
 257       if ((!coefficient_x) || (!coefficient_y)) {
 258         //0+/-0
 259         exponent_x += exponent_y - DECIMAL_EXPONENT_BIAS;
 260         if (exponent_x > DECIMAL_MAX_EXPON_64)
 261           exponent_x = DECIMAL_MAX_EXPON_64;
 262         else if (exponent_x < 0)
 263           exponent_x = 0;
 264         if (exponent_x <= exponent_z)
 265           res = ((UINT64) exponent_x) << 53;
 266         else
 267           res = ((UINT64) exponent_z) << 53;
 268         if ((sign_x ^ sign_y) == sign_z)
 269           res |= sign_z;
 270 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 271 #ifndef IEEE_ROUND_NEAREST
 272         else if (rnd_mode == ROUNDING_DOWN)
 273           res |= 0x8000000000000000ull;
 274 #endif
 275 #endif
 276         BID_RETURN (res);
 277       }
 278     }
 279   }
 280
 281   /* get binary coefficients of x and y */
 282
 283   //--- get number of bits in the coefficients of x and y ---
 284   // version 2 (original)
 285   tempx.d = (double) coefficient_x;
 286   bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52);
 287
 288   tempy.d = (double) coefficient_y;
 289   bin_expon_cy = ((tempy.i & MASK_BINARY_EXPONENT) >> 52);
 290
 291   // magnitude estimate for coefficient_x*coefficient_y is
 292   //        2^(unbiased_bin_expon_cx + unbiased_bin_expon_cx)
 293   bin_expon_product = bin_expon_cx + bin_expon_cy;
 294
 295   // check if coefficient_x*coefficient_y<2^(10*k+3)
 296   // equivalent to unbiased_bin_expon_cx + unbiased_bin_expon_cx < 10*k+1
 297   if (bin_expon_product < UPPER_EXPON_LIMIT + 2 * BINARY_EXPONENT_BIAS) {
 298     //  easy multiply
 299     C64 = coefficient_x * coefficient_y;
 300     final_exponent = exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS;
 301     if ((final_exponent > 0) || (!coefficient_z)) {
 302       res =
 303         get_add64 (sign_x ^ sign_y,
 304                    final_exponent, C64, sign_z, exponent_z, coefficient_z, rnd_mode, pfpsf);
 305       BID_RETURN (res);
 306     } else {
 307       P.w[0] = C64;
 308       P.w[1] = 0;
 309       extra_digits = 0;
 310     }
 311   } else {
 312     if (!coefficient_z) {
 313 #if DECIMAL_CALL_BY_REFERENCE
 314       bid64_mul (&res, px,
 315                  py _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
 316                  _EXC_INFO_ARG);
 317 #else
 318       res =
 319         bid64_mul (x,
 320                    y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
 321                    _EXC_INFO_ARG);
 322 #endif
 323       BID_RETURN (res);
 324     }
 325     // get 128-bit product: coefficient_x*coefficient_y
 326     __mul_64x64_to_128 (P, coefficient_x, coefficient_y);
 327
 328     // tighten binary range of P:  leading bit is 2^bp
 329     // unbiased_bin_expon_product <= bp <= unbiased_bin_expon_product+1
 330     bin_expon_product -= 2 * BINARY_EXPONENT_BIAS;
 331     __tight_bin_range_128 (bp, P, bin_expon_product);
 332
 333     // get number of decimal digits in the product
 334     digits_p = estimate_decimal_digits[bp];
 335     if (!(__unsigned_compare_gt_128 (power10_table_128[digits_p], P)))
 336       digits_p++;       // if power10_table_128[digits_p] <= P
 337
 338     // determine number of decimal digits to be rounded out
 339     extra_digits = digits_p - MAX_FORMAT_DIGITS;
 340     final_exponent =
 341       exponent_x + exponent_y + extra_digits - DECIMAL_EXPONENT_BIAS;
 342   }
 343
 344   if (((unsigned) final_exponent) >= 3 * 256) {
 345     if (final_exponent < 0) {
 346       //--- get number of bits in the coefficients of z  ---
 347       tempx.d = (double) coefficient_z;
 348       bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
 349       // get number of decimal digits in the coeff_x
 350       digits_z = estimate_decimal_digits[bin_expon_cx];
 351       if (coefficient_z >= power10_table_128[digits_z].w[0])
 352         digits_z++;
 353       // underflow
 354       if ((final_exponent + 16 < 0)
 355           || (exponent_z + digits_z > 33 + final_exponent)) {
 356         res =
 357           BID_normalize (sign_z, exponent_z, coefficient_z,
 358                          sign_x ^ sign_y, 1, rnd_mode, pfpsf);
 359         BID_RETURN (res);
 360       }
 361
 362       ez = exponent_z + digits_z - 16;
 363       if (ez < 0)
 364         ez = 0;
 365       scale_z = exponent_z - ez;
 366       coefficient_z *= power10_table_128[scale_z].w[0];
 367       ey = final_exponent - extra_digits;
 368       extra_digits = ez - ey;
 369       if (extra_digits > 33) {
 370         res =
 371           BID_normalize (sign_z, exponent_z, coefficient_z,
 372                          sign_x ^ sign_y, 1, rnd_mode, pfpsf);
 373         BID_RETURN (res);
 374       }
 375       //else  // extra_digits<=32
 376
 377       if (extra_digits > 17) {
 378         CYh = __truncate (P, 16);
 379         // get remainder
 380         T = power10_table_128[16].w[0];
 381         __mul_64x64_to_64 (CY0L, CYh, T);
 382         remainder_y = P.w[0] - CY0L;
 383
 384         extra_digits -= 16;
 385         P.w[0] = CYh;
 386         P.w[1] = 0;
 387       } else
 388         remainder_y = 0;
 389
 390       // align coeff_x, CYh
 391       __mul_64x64_to_128 (CZ, coefficient_z,
 392                           power10_table_128[extra_digits].w[0]);
 393
 394       if (sign_z == (sign_y ^ sign_x)) {
 395         __add_128_128 (CT, CZ, P);
 396         if (__unsigned_compare_ge_128
 397             (CT, power10_table_128[16 + extra_digits])) {
 398           extra_digits++;
 399           ez++;
 400         }
 401       } else {
 402         if (remainder_y && (__unsigned_compare_ge_128 (CZ, P))) {
 403           P.w[0]++;
 404           if (!P.w[0])
 405             P.w[1]++;
 406         }
 407         __sub_128_128 (CT, CZ, P);
 408         if (((SINT64) CT.w[1]) < 0) {
 409           sign_z = sign_y ^ sign_x;
 410           CT.w[0] = 0 - CT.w[0];
 411           CT.w[1] = 0 - CT.w[1];
 412           if (CT.w[0])
 413             CT.w[1]--;
 414         } else if(!(CT.w[1]|CT.w[0]))
 415                 sign_z = (rnd_mode!=ROUNDING_DOWN)? 0: 0x8000000000000000ull;
 416         if (ez
 417             &&
 418             (__unsigned_compare_gt_128
 419              (power10_table_128[15 + extra_digits], CT))) {
 420           extra_digits--;
 421           ez--;
 422         }
 423       }
 424
 425 #ifdef SET_STATUS_FLAGS
 426       uf_status = 0;
 427       if ((!ez)
 428           &&
 429           __unsigned_compare_gt_128 (power10_table_128
 430                                      [extra_digits + 15], CT)) {
 431         rmode = rnd_mode;
 432         if (sign_z && (unsigned) (rmode - 1) < 2)
 433           rmode = 3 - rmode;
 434         //__add_128_64(PU, CT, round_const_table[rmode][extra_digits]);
 435         PU = power10_table_128[extra_digits + 15];
 436         PU.w[0]--;
 437         if (__unsigned_compare_gt_128 (PU, CT)
 438             || (rmode == ROUNDING_DOWN)
 439             || (rmode == ROUNDING_TO_ZERO))
 440           uf_status = UNDERFLOW_EXCEPTION;
 441         else if (extra_digits < 2) {
 442           if ((rmode == ROUNDING_UP)) {
 443             if (!extra_digits)
 444               uf_status = UNDERFLOW_EXCEPTION;
 445             else {
 446               if (remainder_y && (sign_z != (sign_y ^ sign_x)))
 447                 remainder_y = power10_table_128[16].w[0] - remainder_y;
 448
 449               if (power10_table_128[15].w[0] > remainder_y)
 450                 uf_status = UNDERFLOW_EXCEPTION;
 451             }
 452           } else        // RN or RN_away
 453           {
 454             if (remainder_y && (sign_z != (sign_y ^ sign_x)))
 455               remainder_y = power10_table_128[16].w[0] - remainder_y;
 456
 457             if (!extra_digits) {
 458               remainder_y += round_const_table[rmode][15];
 459               if (remainder_y < power10_table_128[16].w[0])
 460                 uf_status = UNDERFLOW_EXCEPTION;
 461             } else {
 462               if (remainder_y < round_const_table[rmode][16])
 463                 uf_status = UNDERFLOW_EXCEPTION;
 464             }
 465           }
 466           //__set_status_flags (pfpsf, uf_status);
 467         }
 468       }
 469 #endif
 470       res =
 471         __bid_full_round64_remainder (sign_z, ez - extra_digits, CT,
 472                                       extra_digits, remainder_y,
 473                                       rnd_mode, pfpsf, uf_status);
 474       BID_RETURN (res);
 475
 476     } else {
 477       if ((sign_z == (sign_x ^ sign_y))
 478           || (final_exponent > 3 * 256 + 15)) {
 479         res =
 480           fast_get_BID64_check_OF (sign_x ^ sign_y, final_exponent,
 481                                    1000000000000000ull, rnd_mode,
 482                                    pfpsf);
 483         BID_RETURN (res);
 484       }
 485     }
 486   }
 487
 488
 489   if (extra_digits > 0) {
 490     res =
 491       get_add128 (sign_z, exponent_z, coefficient_z, sign_x ^ sign_y,
 492                   final_exponent, P, extra_digits, rnd_mode, pfpsf);
 493     BID_RETURN (res);
 494   }
 495   // go to convert_format and exit
 496   else {
 497     C64 = __low_64 (P);
 498
 499     res =
 500       get_add64 (sign_x ^ sign_y,
 501                  exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS, C64,
 502                  sign_z, exponent_z, coefficient_z,
 503                  rnd_mode, pfpsf);
 504     BID_RETURN (res);
 505   }
 506 }