libgcc/config/libbid/inline_bid_add.h

   1 /* Copyright (C) 2007  Free Software Foundation, Inc.
   2
   3 This file is part of GCC.
   4
   5 GCC is free software; you can redistribute it and/or modify it under
   6 the terms of the GNU General Public License as published by the Free
   7 Software Foundation; either version 2, or (at your option) any later
   8 version.
   9
  10 In addition to the permissions in the GNU General Public License, the
  11 Free Software Foundation gives you unlimited permission to link the
  12 compiled version of this file into combinations with other programs,
  13 and to distribute those combinations without any restriction coming
  14 from the use of this file.  (The General Public License restrictions
  15 do apply in other respects; for example, they cover modification of
  16 the file, and distribution when not linked into a combine
  17 executable.)
  18
  19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  21 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  22 for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GCC; see the file COPYING.  If not, write to the Free
  26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  27 02110-1301, USA.  */
  28
  29 /*****************************************************************************
  30  *
  31  *    Helper add functions (for fma)
  32  *
  33  *    __BID_INLINE__ UINT64 get_add64(
  34  *        UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
  35  *        UINT64 sign_y, int exponent_y, UINT64 coefficient_y,
  36  *                                       int rounding_mode)
  37  *
  38  *   __BID_INLINE__ UINT64 get_add128(
  39  *                       UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
  40  *                       UINT64 sign_y, int final_exponent_y, UINT128 CY,
  41  *                       int extra_digits, int rounding_mode)
  42  *
  43  *****************************************************************************
  44  *
  45  *  Algorithm description:
  46  *
  47  *  get_add64:  same as BID64 add, but arguments are unpacked and there
  48  *                                 are no special case checks
  49  *
  50  *  get_add128: add 64-bit coefficient to 128-bit product (which contains
  51  *                                        16+extra_digits decimal digits),
  52  *                         return BID64 result
  53  *              - the exponents are compared and the two coefficients are
  54  *                properly aligned for addition/subtraction
  55  *              - multiple paths are needed
  56  *              - final result exponent is calculated and the lower term is
  57  *                      rounded first if necessary, to avoid manipulating
  58  *                      coefficients longer than 128 bits
  59  *
  60  ****************************************************************************/
  61
  62 #ifndef _INLINE_BID_ADD_H_
  63 #define _INLINE_BID_ADD_H_
  64
  65 #include "bid_internal.h"
  66
  67 #define MAX_FORMAT_DIGITS     16
  68 #define DECIMAL_EXPONENT_BIAS 398
  69 #define MASK_BINARY_EXPONENT  0x7ff0000000000000ull
  70 #define BINARY_EXPONENT_BIAS  0x3ff
  71 #define UPPER_EXPON_LIMIT     51
  72
  73 ///////////////////////////////////////////////////////////////////////
  74 //
  75 // get_add64() is essentially the same as bid_add(), except that
  76 //             the arguments are unpacked
  77 //
  78 //////////////////////////////////////////////////////////////////////
  79 __BID_INLINE__ UINT64
  80 get_add64 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
  81            UINT64 sign_y, int exponent_y, UINT64 coefficient_y,
  82            int rounding_mode, unsigned *fpsc) {
  83   UINT128 CA, CT, CT_new;
  84   UINT64 sign_a, sign_b, coefficient_a, coefficient_b, sign_s, sign_ab,
  85     rem_a;
  86   UINT64 saved_ca, saved_cb, C0_64, C64, remainder_h, T1, carry, tmp,
  87     C64_new;
  88   int_double tempx;
  89   int exponent_a, exponent_b, diff_dec_expon;
  90   int bin_expon_ca, extra_digits, amount, scale_k, scale_ca;
  91   unsigned rmode, status;
  92
  93   // sort arguments by exponent
  94   if (exponent_x <= exponent_y) {
  95     sign_a = sign_y;
  96     exponent_a = exponent_y;
  97     coefficient_a = coefficient_y;
  98     sign_b = sign_x;
  99     exponent_b = exponent_x;
 100     coefficient_b = coefficient_x;
 101   } else {
 102     sign_a = sign_x;
 103     exponent_a = exponent_x;
 104     coefficient_a = coefficient_x;
 105     sign_b = sign_y;
 106     exponent_b = exponent_y;
 107     coefficient_b = coefficient_y;
 108   }
 109
 110   // exponent difference
 111   diff_dec_expon = exponent_a - exponent_b;
 112
 113   /* get binary coefficients of x and y */
 114
 115   //--- get number of bits in the coefficients of x and y ---
 116
 117   tempx.d = (double) coefficient_a;
 118   bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
 119
 120   if (!coefficient_a) {
 121     return get_BID64 (sign_b, exponent_b, coefficient_b, rounding_mode,
 122                       fpsc);
 123   }
 124   if (diff_dec_expon > MAX_FORMAT_DIGITS) {
 125     // normalize a to a 16-digit coefficient
 126
 127     scale_ca = __bid_estimate_decimal_digits[bin_expon_ca];
 128     if (coefficient_a >= __bid_power10_table_128[scale_ca].w[0])
 129       scale_ca++;
 130
 131     scale_k = 16 - scale_ca;
 132
 133     coefficient_a *= __bid_power10_table_128[scale_k].w[0];
 134
 135     diff_dec_expon -= scale_k;
 136     exponent_a -= scale_k;
 137
 138     /* get binary coefficients of x and y */
 139
 140     //--- get number of bits in the coefficients of x and y ---
 141     tempx.d = (double) coefficient_a;
 142     bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
 143
 144     if (diff_dec_expon > MAX_FORMAT_DIGITS) {
 145 #ifdef SET_STATUS_FLAGS
 146       if (coefficient_b) {
 147         __set_status_flags (fpsc, INEXACT_EXCEPTION);
 148       }
 149 #endif
 150
 151 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 152 #ifndef IEEE_ROUND_NEAREST
 153       if (((rounding_mode) & 3) && coefficient_b) // not ROUNDING_TO_NEAREST
 154       {
 155         switch (rounding_mode) {
 156         case ROUNDING_DOWN:
 157           if (sign_b) {
 158             coefficient_a -= ((((SINT64) sign_a) >> 63) | 1);
 159             if (coefficient_a < 1000000000000000ull) {
 160               exponent_a--;
 161               coefficient_a = 9999999999999999ull;
 162             } else if (coefficient_a >= 10000000000000000ull) {
 163               exponent_a++;
 164               coefficient_a = 1000000000000000ull;
 165             }
 166           }
 167           break;
 168         case ROUNDING_UP:
 169           if (!sign_b) {
 170             coefficient_a += ((((SINT64) sign_a) >> 63) | 1);
 171             if (coefficient_a < 1000000000000000ull) {
 172               exponent_a--;
 173               coefficient_a = 9999999999999999ull;
 174             } else if (coefficient_a >= 10000000000000000ull) {
 175               exponent_a++;
 176               coefficient_a = 1000000000000000ull;
 177             }
 178           }
 179           break;
 180         default:        // RZ
 181           if (sign_a != sign_b) {
 182             coefficient_a--;
 183             if (coefficient_a < 1000000000000000ull) {
 184               exponent_a--;
 185               coefficient_a = 9999999999999999ull;
 186             }
 187           }
 188           break;
 189         }
 190       } else
 191 #endif
 192 #endif
 193         // check special case here
 194         if ((coefficient_a == 1000000000000000ull)
 195             && (diff_dec_expon == MAX_FORMAT_DIGITS + 1)
 196             && (sign_a ^ sign_b) && (coefficient_b > 5000000000000000ull)) {
 197         coefficient_a = 9999999999999999ull;
 198         exponent_a--;
 199       }
 200
 201       return get_BID64 (sign_a, exponent_a, coefficient_a,
 202                         rounding_mode, fpsc);
 203     }
 204   }
 205   // test whether coefficient_a*10^(exponent_a-exponent_b)  may exceed 2^62
 206   if (bin_expon_ca + __bid_estimate_bin_expon[diff_dec_expon] < 60) {
 207     // coefficient_a*10^(exponent_a-exponent_b)<2^63
 208
 209     // multiply by 10^(exponent_a-exponent_b)
 210     coefficient_a *= __bid_power10_table_128[diff_dec_expon].w[0];
 211
 212     // sign mask
 213     sign_b = ((SINT64) sign_b) >> 63;
 214     // apply sign to coeff. of b
 215     coefficient_b = (coefficient_b + sign_b) ^ sign_b;
 216
 217     // apply sign to coefficient a
 218     sign_a = ((SINT64) sign_a) >> 63;
 219     coefficient_a = (coefficient_a + sign_a) ^ sign_a;
 220
 221     coefficient_a += coefficient_b;
 222     // get sign
 223     sign_s = ((SINT64) coefficient_a) >> 63;
 224     coefficient_a = (coefficient_a + sign_s) ^ sign_s;
 225     sign_s &= 0x8000000000000000ull;
 226
 227     // coefficient_a < 10^16 ?
 228     if (coefficient_a < __bid_power10_table_128[MAX_FORMAT_DIGITS].w[0]) {
 229 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 230 #ifndef IEEE_ROUND_NEAREST
 231       if (rounding_mode == ROUNDING_DOWN && (!coefficient_a)
 232           && sign_a != sign_b)
 233         sign_s = 0x8000000000000000ull;
 234 #endif
 235 #endif
 236       return get_BID64 (sign_s, exponent_b, coefficient_a,
 237                         rounding_mode, fpsc);
 238     }
 239     // otherwise rounding is necessary
 240
 241     // already know coefficient_a<10^19
 242     // coefficient_a < 10^17 ?
 243     if (coefficient_a < __bid_power10_table_128[17].w[0])
 244       extra_digits = 1;
 245     else if (coefficient_a < __bid_power10_table_128[18].w[0])
 246       extra_digits = 2;
 247     else
 248       extra_digits = 3;
 249
 250 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 251 #ifndef IEEE_ROUND_NEAREST
 252     rmode = rounding_mode;
 253     if (sign_s && (unsigned) (rmode - 1) < 2)
 254       rmode = 3 - rmode;
 255 #else
 256     rmode = 0;
 257 #endif
 258 #else
 259     rmode = 0;
 260 #endif
 261     coefficient_a += __bid_round_const_table[rmode][extra_digits];
 262
 263     // get P*(2^M[extra_digits])/10^extra_digits
 264     __mul_64x64_to_128 (CT, coefficient_a,
 265                         __bid_reciprocals10_64[extra_digits]);
 266
 267     // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
 268     amount = __bid_short_recip_scale[extra_digits];
 269     C64 = CT.w[1] >> amount;
 270
 271   } else {
 272     // coefficient_a*10^(exponent_a-exponent_b) is large
 273     sign_s = sign_a;
 274
 275 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 276 #ifndef IEEE_ROUND_NEAREST
 277     rmode = rounding_mode;
 278     if (sign_s && (unsigned) (rmode - 1) < 2)
 279       rmode = 3 - rmode;
 280 #else
 281     rmode = 0;
 282 #endif
 283 #else
 284     rmode = 0;
 285 #endif
 286
 287     // check whether we can take faster path
 288     scale_ca = __bid_estimate_decimal_digits[bin_expon_ca];
 289
 290     sign_ab = sign_a ^ sign_b;
 291     sign_ab = ((SINT64) sign_ab) >> 63;
 292
 293     // T1 = 10^(16-diff_dec_expon)
 294     T1 = __bid_power10_table_128[16 - diff_dec_expon].w[0];
 295
 296     // get number of digits in coefficient_a
 297     //P_ca = __bid_power10_table_128[scale_ca].w[0];
 298     //P_ca_m1 = __bid_power10_table_128[scale_ca-1].w[0];
 299     if (coefficient_a >= __bid_power10_table_128[scale_ca].w[0]) {
 300       scale_ca++;
 301       //P_ca_m1 = P_ca;
 302       //P_ca = __bid_power10_table_128[scale_ca].w[0];
 303     }
 304
 305     scale_k = 16 - scale_ca;
 306
 307     // apply sign
 308     //Ts = (T1 + sign_ab) ^ sign_ab;
 309
 310     // test range of ca
 311     //X = coefficient_a + Ts - P_ca_m1;
 312
 313     // addition
 314     saved_ca = coefficient_a - T1;
 315     coefficient_a =
 316       (SINT64) saved_ca *(SINT64) __bid_power10_table_128[scale_k].w[0];
 317     extra_digits = diff_dec_expon - scale_k;
 318
 319     // apply sign
 320     saved_cb = (coefficient_b + sign_ab) ^ sign_ab;
 321     // add 10^16 and rounding constant
 322     coefficient_b =
 323       saved_cb + 10000000000000000ull +
 324       __bid_round_const_table[rmode][extra_digits];
 325
 326     // get P*(2^M[extra_digits])/10^extra_digits
 327     __mul_64x64_to_128 (CT, coefficient_b,
 328                         __bid_reciprocals10_64[extra_digits]);
 329
 330     // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
 331     amount = __bid_short_recip_scale[extra_digits];
 332     C0_64 = CT.w[1] >> amount;
 333
 334     // result coefficient
 335     C64 = C0_64 + coefficient_a;
 336     // filter out difficult (corner) cases
 337     // the following test is equivalent to
 338     // ( (initial_coefficient_a + Ts) < P_ca &&
 339     //     (initial_coefficient_a + Ts) > P_ca_m1 ),
 340     // which ensures the number of digits in coefficient_a does not change
 341     // after adding (the appropriately scaled and rounded) coefficient_b
 342     if ((UINT64) (C64 - 1000000000000000ull - 1) > 9000000000000000ull - 2) {
 343       if (C64 >= 10000000000000000ull) {
 344         // result has more than 16 digits
 345         if (!scale_k) {
 346           // must divide coeff_a by 10
 347           saved_ca = saved_ca + T1;
 348           __mul_64x64_to_128 (CA, saved_ca, 0x3333333333333334ull);
 349               //__bid_reciprocals10_64[1]);
 350           coefficient_a = CA.w[1] >> 1;
 351           rem_a =
 352             saved_ca - (coefficient_a << 3) - (coefficient_a << 1);
 353           coefficient_a = coefficient_a - T1;
 354
 355           saved_cb +=
 356             /*90000000000000000 */ +rem_a *
 357             __bid_power10_table_128[diff_dec_expon].w[0];
 358         } else
 359           coefficient_a =
 360             (SINT64) (saved_ca - T1 -
 361                       (T1 << 3)) * (SINT64) __bid_power10_table_128[scale_k -
 362                                                               1].w[0];
 363
 364         extra_digits++;
 365         coefficient_b =
 366           saved_cb + 100000000000000000ull +
 367           __bid_round_const_table[rmode][extra_digits];
 368
 369         // get P*(2^M[extra_digits])/10^extra_digits
 370         __mul_64x64_to_128 (CT, coefficient_b,
 371                             __bid_reciprocals10_64[extra_digits]);
 372
 373         // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
 374         amount = __bid_short_recip_scale[extra_digits];
 375         C0_64 = CT.w[1] >> amount;
 376
 377         // result coefficient
 378         C64 = C0_64 + coefficient_a;
 379       } else if (C64 <= 1000000000000000ull) {
 380         // less than 16 digits in result
 381         coefficient_a =
 382           (SINT64) saved_ca *(SINT64) __bid_power10_table_128[scale_k +
 383                                                         1].w[0];
 384         //extra_digits --;
 385         exponent_b--;
 386         coefficient_b =
 387           (saved_cb << 3) + (saved_cb << 1) + 100000000000000000ull +
 388           __bid_round_const_table[rmode][extra_digits];
 389
 390         // get P*(2^M[extra_digits])/10^extra_digits
 391         __mul_64x64_to_128 (CT_new, coefficient_b,
 392                             __bid_reciprocals10_64[extra_digits]);
 393
 394         // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
 395         amount = __bid_short_recip_scale[extra_digits];
 396         C0_64 = CT_new.w[1] >> amount;
 397
 398         // result coefficient
 399         C64_new = C0_64 + coefficient_a;
 400         if (C64_new < 10000000000000000ull) {
 401           C64 = C64_new;
 402 #ifdef SET_STATUS_FLAGS
 403           CT = CT_new;
 404 #endif
 405         } else
 406           exponent_b++;
 407       }
 408
 409     }
 410
 411   }
 412
 413 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 414 #ifndef IEEE_ROUND_NEAREST
 415   if (rmode == 0) //ROUNDING_TO_NEAREST
 416 #endif
 417     if (C64 & 1) {
 418       // check whether fractional part of initial_P/10^extra_digits
 419       // is exactly .5
 420       // this is the same as fractional part of
 421       //      (initial_P + 0.5*10^extra_digits)/10^extra_digits is exactly zero
 422
 423       // get remainder
 424       remainder_h = CT.w[1] << (64 - amount);
 425
 426       // test whether fractional part is 0
 427       if (!remainder_h && (CT.w[0] < __bid_reciprocals10_64[extra_digits])) {
 428         C64--;
 429       }
 430     }
 431 #endif
 432
 433 #ifdef SET_STATUS_FLAGS
 434   status = INEXACT_EXCEPTION;
 435
 436   // get remainder
 437   remainder_h = CT.w[1] << (64 - amount);
 438
 439   switch (rmode) {
 440   case ROUNDING_TO_NEAREST:
 441   case ROUNDING_TIES_AWAY:
 442     // test whether fractional part is 0
 443     if ((remainder_h == 0x8000000000000000ull)
 444         && (CT.w[0] < __bid_reciprocals10_64[extra_digits]))
 445       status = EXACT_STATUS;
 446     break;
 447   case ROUNDING_DOWN:
 448   case ROUNDING_TO_ZERO:
 449     if (!remainder_h && (CT.w[0] < __bid_reciprocals10_64[extra_digits]))
 450       status = EXACT_STATUS;
 451     break;
 452   default:
 453     // round up
 454     __add_carry_out (tmp, carry, CT.w[0],
 455                      __bid_reciprocals10_64[extra_digits]);
 456     if ((remainder_h >> (64 - amount)) + carry >=
 457         (((UINT64) 1) << amount))
 458       status = EXACT_STATUS;
 459     break;
 460   }
 461   __set_status_flags (fpsc, status);
 462
 463 #endif
 464
 465   return get_BID64 (sign_s, exponent_b + extra_digits, C64,
 466                     rounding_mode, fpsc);
 467 }
 468
 469
 470 ///////////////////////////////////////////////////////////////////
 471 // round 128-bit coefficient and return result in BID64 format
 472 // do not worry about midpoint cases
 473 //////////////////////////////////////////////////////////////////
 474 static UINT64
 475 __bid_simple_round64_sticky (UINT64 sign, int exponent, UINT128 P,
 476                              int extra_digits, int rounding_mode,
 477                              unsigned *fpsc) {
 478   UINT128 Q_high, Q_low, C128;
 479   UINT64 C64;
 480   int amount, rmode;
 481
 482   rmode = rounding_mode;
 483 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 484 #ifndef IEEE_ROUND_NEAREST
 485   if (sign && (unsigned) (rmode - 1) < 2)
 486     rmode = 3 - rmode;
 487 #endif
 488 #endif
 489   __add_128_64 (P, P, __bid_round_const_table[rmode][extra_digits]);
 490
 491   // get P*(2^M[extra_digits])/10^extra_digits
 492   __mul_128x128_full (Q_high, Q_low, P,
 493                       __bid_reciprocals10_128[extra_digits]);
 494
 495   // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
 496   amount = __bid_recip_scale[extra_digits];
 497   __shr_128 (C128, Q_high, amount);
 498
 499   C64 = __low_64 (C128);
 500
 501 #ifdef SET_STATUS_FLAGS
 502
 503   __set_status_flags (fpsc, INEXACT_EXCEPTION);
 504
 505 #endif
 506
 507   return get_BID64 (sign, exponent, C64, rounding_mode, fpsc);
 508 }
 509
 510 ///////////////////////////////////////////////////////////////////
 511 // round 128-bit coefficient and return result in BID64 format
 512 ///////////////////////////////////////////////////////////////////
 513 static UINT64
 514 __bid_full_round64 (UINT64 sign, int exponent, UINT128 P,
 515                     int extra_digits, int rounding_mode,
 516                     unsigned *fpsc) {
 517   UINT128 Q_high, Q_low, C128, Stemp, PU;
 518   UINT64 remainder_h, C64, carry, CY;
 519   int amount, amount2, rmode, status = 0;
 520
 521   if (exponent < 0) {
 522     if (exponent >= -16 && (extra_digits + exponent < 0)) {
 523       extra_digits = -exponent;
 524 #ifdef SET_STATUS_FLAGS
 525       if (extra_digits > 0) {
 526         rmode = rounding_mode;
 527         if (sign && (unsigned) (rmode - 1) < 2)
 528           rmode = 3 - rmode;
 529         __add_128_128 (PU, P,
 530                        __bid_round_const_table_128[rmode][extra_digits]);
 531         if (__unsigned_compare_gt_128
 532             (__bid_power10_table_128[extra_digits + 15], PU))
 533           status = UNDERFLOW_EXCEPTION;
 534       }
 535 #endif
 536     }
 537   }
 538
 539   if (extra_digits > 0) {
 540     exponent += extra_digits;
 541     rmode = rounding_mode;
 542     if (sign && (unsigned) (rmode - 1) < 2)
 543       rmode = 3 - rmode;
 544     __add_128_128 (P, P, __bid_round_const_table_128[rmode][extra_digits]);
 545
 546     // get P*(2^M[extra_digits])/10^extra_digits
 547     __mul_128x128_full (Q_high, Q_low, P,
 548                         __bid_reciprocals10_128[extra_digits]);
 549
 550     // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
 551     amount = __bid_recip_scale[extra_digits];
 552     __shr_128_long (C128, Q_high, amount);
 553
 554     C64 = __low_64 (C128);
 555
 556 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 557 #ifndef IEEE_ROUND_NEAREST
 558     if (rmode == 0) //ROUNDING_TO_NEAREST
 559 #endif
 560       if (C64 & 1) {
 561         // check whether fractional part of initial_P/10^extra_digits
 562         // is exactly .5
 563
 564         // get remainder
 565         amount2 = 64 - amount;
 566         remainder_h = 0;
 567         remainder_h--;
 568         remainder_h >>= amount2;
 569         remainder_h = remainder_h & Q_high.w[0];
 570
 571         if (!remainder_h
 572             && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
 573                 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
 574                     && Q_low.w[0] <
 575                     __bid_reciprocals10_128[extra_digits].w[0]))) {
 576           C64--;
 577         }
 578       }
 579 #endif
 580
 581 #ifdef SET_STATUS_FLAGS
 582     status |= INEXACT_EXCEPTION;
 583
 584     // get remainder
 585     remainder_h = Q_high.w[0] << (64 - amount);
 586
 587     switch (rmode) {
 588     case ROUNDING_TO_NEAREST:
 589     case ROUNDING_TIES_AWAY:
 590       // test whether fractional part is 0
 591       if (remainder_h == 0x8000000000000000ull
 592           && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
 593               || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
 594                   && Q_low.w[0] <
 595                   __bid_reciprocals10_128[extra_digits].w[0])))
 596         status = EXACT_STATUS;
 597       break;
 598     case ROUNDING_DOWN:
 599     case ROUNDING_TO_ZERO:
 600       if (!remainder_h
 601           && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
 602               || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
 603                   && Q_low.w[0] <
 604                   __bid_reciprocals10_128[extra_digits].w[0])))
 605         status = EXACT_STATUS;
 606       break;
 607     default:
 608       // round up
 609       __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
 610                        __bid_reciprocals10_128[extra_digits].w[0]);
 611       __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
 612                           __bid_reciprocals10_128[extra_digits].w[1], CY);
 613       if ((remainder_h >> (64 - amount)) + carry >=
 614           (((UINT64) 1) << amount))
 615         status = EXACT_STATUS;
 616     }
 617
 618     __set_status_flags (fpsc, status);
 619
 620 #endif
 621   } else {
 622     C64 = P.w[0];
 623     if (!C64) {
 624       sign = 0;
 625       if (rounding_mode == ROUNDING_DOWN)
 626         sign = 0x8000000000000000ull;
 627     }
 628   }
 629   return get_BID64 (sign, exponent, C64, rounding_mode, fpsc);
 630 }
 631
 632 /////////////////////////////////////////////////////////////////////////////////
 633 // round 192-bit coefficient (P, remainder_P) and return result in BID64 format
 634 // the lowest 64 bits (remainder_P) are used for midpoint checking only
 635 ////////////////////////////////////////////////////////////////////////////////
 636 UINT64
 637 __bid_full_round64_remainder (UINT64 sign, int exponent, UINT128 P,
 638                               int extra_digits, UINT64 remainder_P,
 639                               int rounding_mode, unsigned *fpsc,
 640                               unsigned uf_status) {
 641   UINT128 Q_high, Q_low, C128, Stemp;
 642   UINT64 remainder_h, C64, carry, CY;
 643   int amount, amount2, rmode, status = uf_status;
 644
 645   rmode = rounding_mode;
 646   if (sign && (unsigned) (rmode - 1) < 2)
 647     rmode = 3 - rmode;
 648   if (rmode == ROUNDING_UP && remainder_P) {
 649     P.w[0]++;
 650     if (!P.w[0])
 651       P.w[1]++;
 652   }
 653
 654   if (extra_digits) {
 655     __add_128_64 (P, P, __bid_round_const_table[rmode][extra_digits]);
 656
 657     // get P*(2^M[extra_digits])/10^extra_digits
 658     __mul_128x128_full (Q_high, Q_low, P,
 659                         __bid_reciprocals10_128[extra_digits]);
 660
 661     // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
 662     amount = __bid_recip_scale[extra_digits];
 663     __shr_128 (C128, Q_high, amount);
 664
 665     C64 = __low_64 (C128);
 666
 667 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 668 #ifndef IEEE_ROUND_NEAREST
 669     if (rmode == 0) //ROUNDING_TO_NEAREST
 670 #endif
 671       if (!remainder_P && (C64 & 1)) {
 672         // check whether fractional part of initial_P/10^extra_digits
 673         // is exactly .5
 674
 675         // get remainder
 676         amount2 = 64 - amount;
 677         remainder_h = 0;
 678         remainder_h--;
 679         remainder_h >>= amount2;
 680         remainder_h = remainder_h & Q_high.w[0];
 681
 682         if (!remainder_h
 683             && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
 684                 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
 685                     && Q_low.w[0] <
 686                     __bid_reciprocals10_128[extra_digits].w[0]))) {
 687           C64--;
 688         }
 689       }
 690 #endif
 691
 692 #ifdef SET_STATUS_FLAGS
 693     status |= INEXACT_EXCEPTION;
 694
 695     if (!remainder_P) {
 696       // get remainder
 697       remainder_h = Q_high.w[0] << (64 - amount);
 698
 699       switch (rmode) {
 700       case ROUNDING_TO_NEAREST:
 701       case ROUNDING_TIES_AWAY:
 702         // test whether fractional part is 0
 703         if (remainder_h == 0x8000000000000000ull
 704             && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
 705                 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
 706                     && Q_low.w[0] <
 707                     __bid_reciprocals10_128[extra_digits].w[0])))
 708           status = EXACT_STATUS;
 709         break;
 710       case ROUNDING_DOWN:
 711       case ROUNDING_TO_ZERO:
 712         if (!remainder_h
 713             && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
 714                 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
 715                     && Q_low.w[0] <
 716                     __bid_reciprocals10_128[extra_digits].w[0])))
 717           status = EXACT_STATUS;
 718         break;
 719       default:
 720         // round up
 721         __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
 722                          __bid_reciprocals10_128[extra_digits].w[0]);
 723         __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
 724                             __bid_reciprocals10_128[extra_digits].w[1], CY);
 725         if ((remainder_h >> (64 - amount)) + carry >=
 726             (((UINT64) 1) << amount))
 727           status = EXACT_STATUS;
 728       }
 729     }
 730     __set_status_flags (fpsc, status);
 731
 732 #endif
 733   } else {
 734     C64 = P.w[0];
 735 #ifdef SET_STATUS_FLAGS
 736     if (remainder_P) {
 737       __set_status_flags (fpsc, uf_status | INEXACT_EXCEPTION);
 738     }
 739 #endif
 740   }
 741
 742   return get_BID64 (sign, exponent + extra_digits, C64, rounding_mode,
 743                     fpsc);
 744 }
 745
 746
 747 ///////////////////////////////////////////////////////////////////
 748 // get P/10^extra_digits
 749 // result fits in 64 bits
 750 ///////////////////////////////////////////////////////////////////
 751 __BID_INLINE__ UINT64
 752 __truncate (UINT128 P, int extra_digits)
 753 // extra_digits <= 16
 754 {
 755   UINT128 Q_high, Q_low, C128;
 756   UINT64 C64;
 757   int amount;
 758
 759   // get P*(2^M[extra_digits])/10^extra_digits
 760   __mul_128x128_full (Q_high, Q_low, P,
 761                       __bid_reciprocals10_128[extra_digits]);
 762
 763   // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
 764   amount = __bid_recip_scale[extra_digits];
 765   __shr_128 (C128, Q_high, amount);
 766
 767   C64 = __low_64 (C128);
 768
 769   return C64;
 770 }
 771
 772
 773 ///////////////////////////////////////////////////////////////////
 774 // return number of decimal digits in 128-bit value X
 775 ///////////////////////////////////////////////////////////////////
 776 __BID_INLINE__ int
 777 __get_dec_digits64 (UINT128 X) {
 778   int_double tempx;
 779   int digits_x, bin_expon_cx;
 780
 781   if (!X.w[1]) {
 782     //--- get number of bits in the coefficients of x and y ---
 783     tempx.d = (double) X.w[0];
 784     bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
 785     // get number of decimal digits in the coeff_x
 786     digits_x = __bid_estimate_decimal_digits[bin_expon_cx];
 787     if (X.w[0] >= __bid_power10_table_128[digits_x].w[0])
 788       digits_x++;
 789     return digits_x;
 790   }
 791   tempx.d = (double) X.w[1];
 792   bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
 793   // get number of decimal digits in the coeff_x
 794   digits_x = __bid_estimate_decimal_digits[bin_expon_cx + 64];
 795   if (__unsigned_compare_ge_128 (X, __bid_power10_table_128[digits_x]))
 796     digits_x++;
 797
 798   return digits_x;
 799 }
 800
 801
 802 ////////////////////////////////////////////////////////////////////////////////
 803 //
 804 // add 64-bit coefficient to 128-bit coefficient, return result in BID64 format
 805 //
 806 ////////////////////////////////////////////////////////////////////////////////
 807 __BID_INLINE__ UINT64
 808 get_add128 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
 809             UINT64 sign_y, int final_exponent_y, UINT128 CY,
 810             int extra_digits, int rounding_mode, unsigned *fpsc) {
 811   UINT128 CY_L, CX, FS, F, CT, ST, T2;
 812   UINT64 CYh, CY0L, T, S, coefficient_y, remainder_y;
 813   SINT64 D = 0;
 814   int_double tempx;
 815   int diff_dec_expon, extra_digits2, exponent_y, status;
 816   int extra_dx, diff_dec2, bin_expon_cx, digits_x, rmode;
 817
 818   // CY has more than 16 decimal digits
 819
 820   exponent_y = final_exponent_y - extra_digits;
 821
 822 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
 823   rounding_mode = 0;
 824 #endif
 825 #ifdef IEEE_ROUND_NEAREST
 826   rounding_mode = 0;
 827 #endif
 828
 829   if (exponent_x > exponent_y) {
 830     // normalize x
 831     //--- get number of bits in the coefficients of x and y ---
 832     tempx.d = (double) coefficient_x;
 833     bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
 834     // get number of decimal digits in the coeff_x
 835     digits_x = __bid_estimate_decimal_digits[bin_expon_cx];
 836     if (coefficient_x >= __bid_power10_table_128[digits_x].w[0])
 837       digits_x++;
 838
 839     extra_dx = 16 - digits_x;
 840     coefficient_x *= __bid_power10_table_128[extra_dx].w[0];
 841     if ((sign_x ^ sign_y) && (coefficient_x == 1000000000000000ull)) {
 842       extra_dx++;
 843       coefficient_x = 10000000000000000ull;
 844     }
 845     exponent_x -= extra_dx;
 846
 847     if (exponent_x > exponent_y) {
 848
 849       // exponent_x > exponent_y
 850       diff_dec_expon = exponent_x - exponent_y;
 851
 852       if (exponent_x <= final_exponent_y + 1) {
 853         __mul_64x64_to_128 (CX, coefficient_x,
 854                             __bid_power10_table_128[diff_dec_expon].w[0]);
 855
 856         if (sign_x == sign_y) {
 857           __add_128_128 (CT, CY, CX);
 858           if ((exponent_x >
 859                final_exponent_y) /*&& (final_exponent_y>0) */ )
 860             extra_digits++;
 861           if (__unsigned_compare_ge_128
 862               (CT, __bid_power10_table_128[16 + extra_digits]))
 863             extra_digits++;
 864         } else {
 865           __sub_128_128 (CT, CY, CX);
 866           if (((SINT64) CT.w[1]) < 0) {
 867             CT.w[0] = 0 - CT.w[0];
 868             CT.w[1] = 0 - CT.w[1];
 869             if (CT.w[0])
 870               CT.w[1]--;
 871             sign_y = sign_x;
 872           } else if (!(CT.w[1] | CT.w[0])) {
 873             sign_y =
 874               (rounding_mode !=
 875                ROUNDING_DOWN) ? 0 : 0x8000000000000000ull;
 876           }
 877           if ((exponent_x + 1 >=
 878                final_exponent_y) /*&& (final_exponent_y>=0) */ ) {
 879             extra_digits = __get_dec_digits64 (CT) - 16;
 880             if (extra_digits <= 0) {
 881               if (!CT.w[0] && rounding_mode == ROUNDING_DOWN)
 882                 sign_y = 0x8000000000000000ull;
 883               return get_BID64 (sign_y, exponent_y, CT.w[0],
 884                                 rounding_mode, fpsc);
 885             }
 886           } else
 887             if (__unsigned_compare_gt_128
 888                 (__bid_power10_table_128[15 + extra_digits], CT))
 889             extra_digits--;
 890         }
 891
 892         return __bid_full_round64 (sign_y, exponent_y, CT, extra_digits,
 893                                    rounding_mode, fpsc);
 894       }
 895       // diff_dec2+extra_digits is the number of digits to eliminate from
 896       //                           argument CY
 897       diff_dec2 = exponent_x - final_exponent_y;
 898
 899       if (diff_dec2 >= 17) {
 900 #ifndef IEEE_ROUND_NEAREST
 901 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 902         if ((rounding_mode) & 3) {
 903           switch (rounding_mode) {
 904           case ROUNDING_UP:
 905             if (!sign_y) {
 906               D = ((SINT64) (sign_x ^ sign_y)) >> 63;
 907               D = D + D + 1;
 908               coefficient_x += D;
 909             }
 910             break;
 911           case ROUNDING_DOWN:
 912             if (sign_y) {
 913               D = ((SINT64) (sign_x ^ sign_y)) >> 63;
 914               D = D + D + 1;
 915               coefficient_x += D;
 916             }
 917             break;
 918           case ROUNDING_TO_ZERO:
 919             if (sign_y != sign_x) {
 920               D = 0 - 1;
 921               coefficient_x += D;
 922             }
 923             break;
 924           }
 925           if (coefficient_x < 1000000000000000ull) {
 926             coefficient_x -= D;
 927             coefficient_x =
 928               D + (coefficient_x << 1) + (coefficient_x << 3);
 929             exponent_x--;
 930           }
 931         }
 932 #endif
 933 #endif
 934 #ifdef SET_STATUS_FLAGS
 935         if (CY.w[1] | CY.w[0])
 936           __set_status_flags (fpsc, INEXACT_EXCEPTION);
 937 #endif
 938         return get_BID64 (sign_x, exponent_x, coefficient_x,
 939                           rounding_mode, fpsc);
 940       }
 941       // here exponent_x <= 16+final_exponent_y
 942
 943       // truncate CY to 16 dec. digits
 944       CYh = __truncate (CY, extra_digits);
 945
 946       // get remainder
 947       T = __bid_power10_table_128[extra_digits].w[0];
 948       __mul_64x64_to_64 (CY0L, CYh, T);
 949
 950       remainder_y = CY.w[0] - CY0L;
 951
 952       // align coeff_x, CYh
 953       __mul_64x64_to_128 (CX, coefficient_x,
 954                           __bid_power10_table_128[diff_dec2].w[0]);
 955
 956       if (sign_x == sign_y) {
 957         __add_128_64 (CT, CX, CYh);
 958         if (__unsigned_compare_ge_128
 959             (CT, __bid_power10_table_128[16 + diff_dec2]))
 960           diff_dec2++;
 961       } else {
 962         if (remainder_y)
 963           CYh++;
 964         __sub_128_64 (CT, CX, CYh);
 965         if (__unsigned_compare_gt_128
 966             (__bid_power10_table_128[15 + diff_dec2], CT))
 967           diff_dec2--;
 968       }
 969
 970       return __bid_full_round64_remainder (sign_x, final_exponent_y, CT,
 971                                            diff_dec2, remainder_y,
 972                                            rounding_mode, fpsc, 0);
 973     }
 974   }
 975   // Here (exponent_x <= exponent_y)
 976   {
 977     diff_dec_expon = exponent_y - exponent_x;
 978
 979     if (diff_dec_expon > MAX_FORMAT_DIGITS) {
 980       rmode = rounding_mode;
 981
 982       if ((sign_x ^ sign_y)) {
 983         if (!CY.w[0])
 984           CY.w[1]--;
 985         CY.w[0]--;
 986         if (__unsigned_compare_gt_128
 987             (__bid_power10_table_128[15 + extra_digits], CY)) {
 988           if (rmode & 3) {
 989             extra_digits--;
 990             final_exponent_y--;
 991           } else {
 992             CY.w[0] = 1000000000000000ull;
 993             CY.w[1] = 0;
 994             extra_digits = 0;
 995           }
 996         }
 997       }
 998       __scale128_10 (CY, CY);
 999       extra_digits++;
1000       CY.w[0] |= 1;
1001
1002       return __bid_simple_round64_sticky (sign_y, final_exponent_y, CY,
1003                                           extra_digits, rmode, fpsc);
1004     }
1005     // apply sign to coeff_x
1006     sign_x ^= sign_y;
1007     sign_x = ((SINT64) sign_x) >> 63;
1008     CX.w[0] = (coefficient_x + sign_x) ^ sign_x;
1009     CX.w[1] = sign_x;
1010
1011     // check whether CY (rounded to 16 digits) and CX have
1012     //                     any digits in the same position
1013     diff_dec2 = final_exponent_y - exponent_x;
1014
1015     if (diff_dec2 <= 17) {
1016       // align CY to 10^ex
1017       S = __bid_power10_table_128[diff_dec_expon].w[0];
1018       __mul_64x128_short (CY_L, S, CY);
1019
1020       __add_128_128 (ST, CY_L, CX);
1021       extra_digits2 = __get_dec_digits64 (ST) - 16;
1022       return __bid_full_round64 (sign_y, exponent_x, ST, extra_digits2,
1023                                  rounding_mode, fpsc);
1024     }
1025     // truncate CY to 16 dec. digits
1026     CYh = __truncate (CY, extra_digits);
1027
1028     // get remainder
1029     T = __bid_power10_table_128[extra_digits].w[0];
1030     __mul_64x64_to_64 (CY0L, CYh, T);
1031
1032     coefficient_y = CY.w[0] - CY0L;
1033     // add rounding constant
1034     rmode = rounding_mode;
1035     if (sign_y && (unsigned) (rmode - 1) < 2)
1036       rmode = 3 - rmode;
1037 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1038 #ifndef IEEE_ROUND_NEAREST
1039     if (!(rmode & 3)) //ROUNDING_TO_NEAREST
1040 #endif
1041 #endif
1042     {
1043       coefficient_y += __bid_round_const_table[rmode][extra_digits];
1044     }
1045     // align coefficient_y,  coefficient_x
1046     S = __bid_power10_table_128[diff_dec_expon].w[0];
1047     __mul_64x64_to_128 (F, coefficient_y, S);
1048
1049     // fraction
1050     __add_128_128 (FS, F, CX);
1051
1052 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1053 #ifndef IEEE_ROUND_NEAREST
1054     if (rmode == 0) //ROUNDING_TO_NEAREST
1055 #endif
1056     {
1057       // rounding code, here RN_EVEN
1058       // 10^(extra_digits+diff_dec_expon)
1059       T2 = __bid_power10_table_128[diff_dec_expon + extra_digits];
1060       if (__unsigned_compare_gt_128 (FS, T2)
1061           || ((CYh & 1) && __test_equal_128 (FS, T2))) {
1062         CYh++;
1063         __sub_128_128 (FS, FS, T2);
1064       }
1065     }
1066 #endif
1067 #ifndef IEEE_ROUND_NEAREST
1068 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1069     if (rmode == 4) //ROUNDING_TO_NEAREST
1070 #endif
1071     {
1072       // rounding code, here RN_AWAY
1073       // 10^(extra_digits+diff_dec_expon)
1074       T2 = __bid_power10_table_128[diff_dec_expon + extra_digits];
1075       if (__unsigned_compare_ge_128 (FS, T2)) {
1076         CYh++;
1077         __sub_128_128 (FS, FS, T2);
1078       }
1079     }
1080 #endif
1081 #ifndef IEEE_ROUND_NEAREST
1082 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1083     switch (rmode) {
1084     case ROUNDING_DOWN:
1085     case ROUNDING_TO_ZERO:
1086       if ((SINT64) FS.w[1] < 0) {
1087         CYh--;
1088         if (CYh < 1000000000000000ull) {
1089           CYh = 9999999999999999ull;
1090           final_exponent_y--;
1091         }
1092       } else {
1093         T2 = __bid_power10_table_128[diff_dec_expon + extra_digits];
1094         if (__unsigned_compare_ge_128 (FS, T2)) {
1095           CYh++;
1096           __sub_128_128 (FS, FS, T2);
1097         }
1098       }
1099       break;
1100     case ROUNDING_UP:
1101       if ((SINT64) FS.w[1] < 0)
1102         break;
1103       T2 = __bid_power10_table_128[diff_dec_expon + extra_digits];
1104       if (__unsigned_compare_gt_128 (FS, T2)) {
1105         CYh += 2;
1106         __sub_128_128 (FS, FS, T2);
1107       } else if ((FS.w[1] == T2.w[1]) && (FS.w[0] == T2.w[0])) {
1108         CYh++;
1109         FS.w[1] = FS.w[0] = 0;
1110       } else if (FS.w[1] | FS.w[0])
1111         CYh++;
1112       break;
1113     }
1114 #endif
1115 #endif
1116
1117 #ifdef SET_STATUS_FLAGS
1118     status = INEXACT_EXCEPTION;
1119 #ifndef IEEE_ROUND_NEAREST
1120 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1121     if (!(rmode & 3))
1122 #endif
1123 #endif
1124     {
1125       // RN modes
1126       if ((FS.w[1] ==
1127            __bid_round_const_table_128[0][diff_dec_expon + extra_digits].w[1])
1128           && (FS.w[0] ==
1129               __bid_round_const_table_128[0][diff_dec_expon +
1130                                        extra_digits].w[0]))
1131         status = EXACT_STATUS;
1132     }
1133 #ifndef IEEE_ROUND_NEAREST
1134 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1135     else if (!FS.w[1] && !FS.w[0])
1136       status = EXACT_STATUS;
1137 #endif
1138 #endif
1139
1140     __set_status_flags (fpsc, status);
1141 #endif
1142
1143     return get_BID64 (sign_y, final_exponent_y, CYh, rounding_mode,
1144                       fpsc);
1145   }
1146
1147 }
1148
1149
1150 //////////////////////////////////////////////////////////////////////////
1151 //
1152 //    0*10^ey + cz*10^ez,   ey<ez
1153 //
1154 //////////////////////////////////////////////////////////////////////////
1155
1156 __BID_INLINE__ UINT64
1157 add_zero64 (int exponent_y, UINT64 sign_z, int exponent_z,
1158             UINT64 coefficient_z, unsigned *prounding_mode,
1159             unsigned *fpsc) {
1160   int_double tempx;
1161   int bin_expon, scale_k, scale_cz;
1162   int diff_expon;
1163
1164   diff_expon = exponent_z - exponent_y;
1165
1166   tempx.d = (double) coefficient_z;
1167   bin_expon = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
1168   scale_cz = __bid_estimate_decimal_digits[bin_expon];
1169   if (coefficient_z >= __bid_power10_table_128[scale_cz].w[0])
1170     scale_cz++;
1171
1172   scale_k = 16 - scale_cz;
1173   if (diff_expon < scale_k)
1174     scale_k = diff_expon;
1175   coefficient_z *= __bid_power10_table_128[scale_k].w[0];
1176
1177   return get_BID64 (sign_z, exponent_z - scale_k, coefficient_z,
1178                     *prounding_mode, fpsc);
1179 }
1180 #endif