libgcc/config/libbid/bid128_fma.c

   1 /* Copyright (C) 2007  Free Software Foundation, Inc.
   2
   3 This file is part of GCC.
   4
   5 GCC is free software; you can redistribute it and/or modify it under
   6 the terms of the GNU General Public License as published by the Free
   7 Software Foundation; either version 2, or (at your option) any later
   8 version.
   9
  10 In addition to the permissions in the GNU General Public License, the
  11 Free Software Foundation gives you unlimited permission to link the
  12 compiled version of this file into combinations with other programs,
  13 and to distribute those combinations without any restriction coming
  14 from the use of this file.  (The General Public License restrictions
  15 do apply in other respects; for example, they cover modification of
  16 the file, and distribution when not linked into a combine
  17 executable.)
  18
  19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  21 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  22 for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GCC; see the file COPYING.  If not, write to the Free
  26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  27 02110-1301, USA.  */
  28
  29 /*****************************************************************************
  30  *
  31  *  BID128 fma   x * y + z
  32  *
  33  ****************************************************************************/
  34
  35 #include "bid_internal.h"
  36
  37 static void
  38 rounding_correction (unsigned int rnd_mode,
  39                      unsigned int is_inexact_lt_midpoint,
  40                      unsigned int is_inexact_gt_midpoint,
  41                      unsigned int is_midpoint_lt_even,
  42                      unsigned int is_midpoint_gt_even,
  43                      int unbexp,
  44                      UINT128 * ptrres, _IDEC_flags * ptrfpsf) {
  45   // unbiased true exponent unbexp may be larger than emax
  46
  47   UINT128 res = *ptrres; // expected to have the correct sign and coefficient
  48   // (the exponent field is ignored, as unbexp is used instead)
  49   UINT64 sign, exp;
  50   UINT64 C_hi, C_lo;
  51
  52   // general correction from RN to RA, RM, RP, RZ
  53   // Note: if the result is negative, then is_inexact_lt_midpoint,
  54   // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even
  55   // have to be considered as if determined for the absolute value of the
  56   // result (so they seem to be reversed)
  57
  58   if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
  59       is_midpoint_lt_even || is_midpoint_gt_even) {
  60     *ptrfpsf |= INEXACT_EXCEPTION;
  61   }
  62   // apply correction to result calculated with unbounded exponent
  63   sign = res.w[1] & MASK_SIGN;
  64   exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax
  65   C_hi = res.w[1] & MASK_COEFF;
  66   C_lo = res.w[0];
  67   if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) ||
  68       ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) &&
  69       is_midpoint_gt_even))) ||
  70       (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) ||
  71       ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) &&
  72       is_midpoint_gt_even)))) {
  73     // C = C + 1
  74     C_lo = C_lo + 1;
  75     if (C_lo == 0)
  76       C_hi = C_hi + 1;
  77     if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) {
  78       // C = 10^34 => rounding overflow
  79       C_hi = 0x0000314dc6448d93ull;
  80       C_lo = 0x38c15b0a00000000ull; // 10^33
  81       // exp = exp + EXP_P1;
  82       unbexp = unbexp + 1;
  83       exp = (UINT64) (unbexp + 6176) << 49;
  84     }
  85   } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
  86       ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) ||
  87       (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) {
  88     // C = C - 1
  89     C_lo = C_lo - 1;
  90     if (C_lo == 0xffffffffffffffffull)
  91       C_hi--;
  92     // check if we crossed into the lower decade
  93     if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) {
  94       // C = 10^33 - 1
  95       if (exp > 0) {
  96         C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
  97         C_lo = 0x378d8e63ffffffffull;
  98         // exp = exp - EXP_P1;
  99         unbexp = unbexp - 1;
 100         exp = (UINT64) (unbexp + 6176) << 49;
 101       } else { // if exp = 0
 102         if (is_midpoint_lt_even || is_midpoint_lt_even ||
 103             is_inexact_gt_midpoint || is_inexact_gt_midpoint) // tiny & inexact
 104           *ptrfpsf |= UNDERFLOW_EXCEPTION;
 105       }
 106     }
 107   } else {
 108     ; // the result is already correct
 109   }
 110   if (unbexp > expmax) { // 6111
 111     *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
 112     exp = 0;
 113     if (!sign) { // result is positive
 114       if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf
 115         C_hi = 0x7800000000000000ull;
 116         C_lo = 0x0000000000000000ull;
 117       } else { // res = +MAXFP = (10^34-1) * 10^emax
 118         C_hi = 0x5fffed09bead87c0ull;
 119         C_lo = 0x378d8e63ffffffffull;
 120       }
 121     } else { // result is negative
 122       if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf
 123         C_hi = 0xf800000000000000ull;
 124         C_lo = 0x0000000000000000ull;
 125       } else { // res = -MAXFP = -(10^34-1) * 10^emax
 126         C_hi = 0xdfffed09bead87c0ull;
 127         C_lo = 0x378d8e63ffffffffull;
 128       }
 129     }
 130   }
 131   // assemble the result
 132   res.w[1] = sign | exp | C_hi;
 133   res.w[0] = C_lo;
 134   *ptrres = res;
 135 }
 136
 137 static void
 138 add256 (UINT256 x, UINT256 y, UINT256 * pz) {
 139   // *z = x + yl assume the sum fits in 256 bits
 140   UINT256 z;
 141   z.w[0] = x.w[0] + y.w[0];
 142   if (z.w[0] < x.w[0]) {
 143     x.w[1]++;
 144     if (x.w[1] == 0x0000000000000000ull) {
 145       x.w[2]++;
 146       if (x.w[2] == 0x0000000000000000ull) {
 147         x.w[3]++;
 148       }
 149     }
 150   }
 151   z.w[1] = x.w[1] + y.w[1];
 152   if (z.w[1] < x.w[1]) {
 153     x.w[2]++;
 154     if (x.w[2] == 0x0000000000000000ull) {
 155       x.w[3]++;
 156     }
 157   }
 158   z.w[2] = x.w[2] + y.w[2];
 159   if (z.w[2] < x.w[2]) {
 160     x.w[3]++;
 161   }
 162   z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible
 163   *pz = z;
 164 }
 165
 166 static void
 167 sub256 (UINT256 x, UINT256 y, UINT256 * pz) {
 168   // *z = x - y; assume x >= y
 169   UINT256 z;
 170   z.w[0] = x.w[0] - y.w[0];
 171   if (z.w[0] > x.w[0]) {
 172     x.w[1]--;
 173     if (x.w[1] == 0xffffffffffffffffull) {
 174       x.w[2]--;
 175       if (x.w[2] == 0xffffffffffffffffull) {
 176         x.w[3]--;
 177       }
 178     }
 179   }
 180   z.w[1] = x.w[1] - y.w[1];
 181   if (z.w[1] > x.w[1]) {
 182     x.w[2]--;
 183     if (x.w[2] == 0xffffffffffffffffull) {
 184       x.w[3]--;
 185     }
 186   }
 187   z.w[2] = x.w[2] - y.w[2];
 188   if (z.w[2] > x.w[2]) {
 189     x.w[3]--;
 190   }
 191   z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y
 192   *pz = z;
 193 }
 194
 195
 196 static int
 197 nr_digits256 (UINT256 R256) {
 198   int ind;
 199   // determine the number of decimal digits in R256
 200   if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) {
 201     // between 1 and 19 digits
 202     for (ind = 1; ind <= 19; ind++) {
 203       if (R256.w[0] < ten2k64[ind]) {
 204         break;
 205       }
 206     }
 207     // ind digits
 208   } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 &&
 209              (R256.w[1] < ten2k128[0].w[1] ||
 210               (R256.w[1] == ten2k128[0].w[1]
 211                && R256.w[0] < ten2k128[0].w[0]))) {
 212     // 20 digits
 213     ind = 20;
 214   } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) {
 215     // between 21 and 38 digits
 216     for (ind = 1; ind <= 18; ind++) {
 217       if (R256.w[1] < ten2k128[ind].w[1] ||
 218           (R256.w[1] == ten2k128[ind].w[1] &&
 219            R256.w[0] < ten2k128[ind].w[0])) {
 220         break;
 221       }
 222     }
 223     // ind + 20 digits
 224     ind = ind + 20;
 225   } else if (R256.w[3] == 0x0 &&
 226              (R256.w[2] < ten2k256[0].w[2] ||
 227               (R256.w[2] == ten2k256[0].w[2] &&
 228                R256.w[1] < ten2k256[0].w[1]) ||
 229               (R256.w[2] == ten2k256[0].w[2] &&
 230                R256.w[1] == ten2k256[0].w[1] &&
 231                R256.w[0] < ten2k256[0].w[0]))) {
 232     // 39 digits
 233     ind = 39;
 234   } else {
 235     // between 40 and 68 digits
 236     for (ind = 1; ind <= 29; ind++) {
 237       if (R256.w[3] < ten2k256[ind].w[3] ||
 238           (R256.w[3] == ten2k256[ind].w[3] &&
 239            R256.w[2] < ten2k256[ind].w[2]) ||
 240           (R256.w[3] == ten2k256[ind].w[3] &&
 241            R256.w[2] == ten2k256[ind].w[2] &&
 242            R256.w[1] < ten2k256[ind].w[1]) ||
 243           (R256.w[3] == ten2k256[ind].w[3] &&
 244            R256.w[2] == ten2k256[ind].w[2] &&
 245            R256.w[1] == ten2k256[ind].w[1] &&
 246            R256.w[0] < ten2k256[ind].w[0])) {
 247         break;
 248       }
 249     }
 250     // ind + 39 digits
 251     ind = ind + 39;
 252   }
 253   return (ind);
 254 }
 255
 256 // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
 257 // use the rounding information from ptr_is_* to avoid a double rounding error
 258 static void
 259 add_and_round (int q3,
 260                int q4,
 261                int e4,
 262                int delta,
 263                int p34,
 264                UINT64 z_sign,
 265                UINT64 p_sign,
 266                UINT128 C3,
 267                UINT256 C4,
 268                int rnd_mode,
 269                int *ptr_is_midpoint_lt_even,
 270                int *ptr_is_midpoint_gt_even,
 271                int *ptr_is_inexact_lt_midpoint,
 272                int *ptr_is_inexact_gt_midpoint,
 273                _IDEC_flags * ptrfpsf, UINT128 * ptrres) {
 274
 275   int scale;
 276   int x0;
 277   int ind;
 278   UINT64 R64;
 279   UINT128 P128, R128;
 280   UINT192 P192, R192;
 281   UINT256 R256;
 282   int is_midpoint_lt_even = 0;
 283   int is_midpoint_gt_even = 0;
 284   int is_inexact_lt_midpoint = 0;
 285   int is_inexact_gt_midpoint = 0;
 286   int is_midpoint_lt_even0 = 0;
 287   int is_midpoint_gt_even0 = 0;
 288   int is_inexact_lt_midpoint0 = 0;
 289   int is_inexact_gt_midpoint0 = 0;
 290   int incr_exp = 0;
 291   int is_tiny = 0;
 292   int lt_half_ulp = 0;
 293   int eq_half_ulp = 0;
 294   // int gt_half_ulp = 0;
 295   UINT128 res = *ptrres;
 296
 297   // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
 298   scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
 299   // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1
 300
 301   // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
 302   // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
 303   if (scale == 0) {
 304     R256.w[3] = 0x0ull;
 305     R256.w[2] = 0x0ull;
 306     R256.w[1] = C3.w[1];
 307     R256.w[0] = C3.w[0];
 308   } else if (scale <= 19) { // 10^scale fits in 64 bits
 309     P128.w[1] = 0;
 310     P128.w[0] = ten2k64[scale];
 311     __mul_128x128_to_256 (R256, P128, C3);
 312   } else if (scale <= 38) { // 10^scale fits in 128 bits
 313     __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3);
 314   } else if (scale <= 57) { // 39 <= scale <= 57
 315     // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
 316     // (10^67 has 223 bits; 10^69 has 230 bits);
 317     // must split the computation:
 318     // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
 319     // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
 320     // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
 321     __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3);
 322     // now multiply R128 by 10^38
 323     __mul_128x128_to_256 (R256, R128, ten2k128[18]);
 324   } else { // 58 <= scale <= 66
 325     // 10^scale takes between 193 and 220 bits,
 326     // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
 327     // must split the computation:
 328     // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
 329     // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
 330     // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
 331     // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
 332     // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
 333     __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]);
 334     // now calculate 10*38 * 10^(scale-38) * C3
 335     __mul_128x128_to_256 (R256, R128, ten2k128[18]);
 336   }
 337   // C3 * 10^scale is now in R256
 338
 339   // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least
 340   // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is
 341   // possible
 342   // add/subtract C4 and C3 * 10^scale; the exponent is e4
 343   if (p_sign == z_sign) { // R256 = C4 + R256
 344     // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
 345     // but may require rounding
 346     add256 (C4, R256, &R256);
 347   } else { // if (p_sign != z_sign) { // R256 = C4 - R256
 348     // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
 349     // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
 350     // but may require rounding
 351
 352     // compare first R256 = C3 * 10^scale and C4
 353     if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) ||
 354         (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) ||
 355         (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] &&
 356         R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4
 357       // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
 358       // but may require rounding
 359       sub256 (R256, C4, &R256);
 360       // flip p_sign too, because the result has the sign of z
 361       p_sign = z_sign;
 362     } else { // if C4 > C3 * 10^scale
 363       // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
 364       // but may require rounding
 365       sub256 (C4, R256, &R256);
 366     }
 367     // if the result is pure zero, the sign depends on the rounding mode
 368     // (x*y and z had opposite signs)
 369     if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull &&
 370         R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) {
 371       if (rnd_mode != ROUNDING_DOWN)
 372         p_sign = 0x0000000000000000ull;
 373       else
 374         p_sign = 0x8000000000000000ull;
 375       // the exponent is max (e4, expmin)
 376       if (e4 < -6176)
 377         e4 = expmin;
 378       // assemble result
 379       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49);
 380       res.w[0] = 0x0;
 381       *ptrres = res;
 382       return;
 383     }
 384   }
 385
 386   // determine the number of decimal digits in R256
 387   ind = nr_digits256 (R256);
 388
 389   // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
 390   // round to the destination precision, with unbounded exponent
 391
 392   if (ind <= p34) {
 393     // result rounded to the destination precision with unbounded exponent
 394     // is exact
 395     if (ind + e4 < p34 + expmin) {
 396       is_tiny = 1; // applies to all rounding modes
 397     }
 398     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1];
 399     res.w[0] = R256.w[0];
 400     // Note: res is correct only if expmin <= e4 <= expmax
 401   } else { // if (ind > p34)
 402     // if more than P digits, round to nearest to P digits
 403     // round R256 to p34 digits
 404     x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
 405     if (ind <= 38) {
 406       P128.w[1] = R256.w[1];
 407       P128.w[0] = R256.w[0];
 408       round128_19_38 (ind, x0, P128, &R128, &incr_exp,
 409                       &is_midpoint_lt_even, &is_midpoint_gt_even,
 410                       &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
 411     } else if (ind <= 57) {
 412       P192.w[2] = R256.w[2];
 413       P192.w[1] = R256.w[1];
 414       P192.w[0] = R256.w[0];
 415       round192_39_57 (ind, x0, P192, &R192, &incr_exp,
 416                       &is_midpoint_lt_even, &is_midpoint_gt_even,
 417                       &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
 418       R128.w[1] = R192.w[1];
 419       R128.w[0] = R192.w[0];
 420     } else { // if (ind <= 68)
 421       round256_58_76 (ind, x0, R256, &R256, &incr_exp,
 422                       &is_midpoint_lt_even, &is_midpoint_gt_even,
 423                       &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
 424       R128.w[1] = R256.w[1];
 425       R128.w[0] = R256.w[0];
 426     }
 427     // the rounded result has p34 = 34 digits
 428     e4 = e4 + x0 + incr_exp;
 429     if (rnd_mode == ROUNDING_TO_NEAREST) {
 430       if (e4 < expmin) {
 431         is_tiny = 1; // for other rounding modes apply correction
 432       }
 433     } else {
 434       // for RM, RP, RZ, RA apply correction in order to determine tininess
 435       // but do not save the result; apply the correction to
 436       // (-1)^p_sign * significand * 10^0
 437       P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1];
 438       P128.w[0] = R128.w[0];
 439       rounding_correction (rnd_mode,
 440                            is_inexact_lt_midpoint,
 441                            is_inexact_gt_midpoint, is_midpoint_lt_even,
 442                            is_midpoint_gt_even, 0, &P128, ptrfpsf);
 443       scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
 444       // the number of digits in the significand is p34 = 34
 445       if (e4 + scale < expmin) {
 446         is_tiny = 1;
 447       }
 448     }
 449     ind = p34; // the number of decimal digits in the signifcand of res
 450     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN
 451     res.w[0] = R128.w[0];
 452     // Note: res is correct only if expmin <= e4 <= expmax
 453     // set the inexact flag after rounding with bounded exponent, if any
 454   }
 455   // at this point we have the result rounded with unbounded exponent in
 456   // res and we know its tininess:
 457   // res = (-1)^p_sign * significand * 10^e4,
 458   // where q (significand) = ind <= p34
 459   // Note: res is correct only if expmin <= e4 <= expmax
 460
 461   // check for overflow if RN
 462   if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) {
 463     res.w[1] = p_sign | 0x7800000000000000ull;
 464     res.w[0] = 0x0000000000000000ull;
 465     *ptrres = res;
 466     *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
 467     return; // BID_RETURN (res)
 468   } // else not overflow or not RN, so continue
 469
 470   // if (e4 >= expmin) we have the result rounded with bounded exponent
 471   if (e4 < expmin) {
 472     x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
 473     // where the result rounded [at most] once is
 474     //   (-1)^p_sign * significand_res * 10^e4
 475
 476     // avoid double rounding error
 477     is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
 478     is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
 479     is_midpoint_lt_even0 = is_midpoint_lt_even;
 480     is_midpoint_gt_even0 = is_midpoint_gt_even;
 481     is_inexact_lt_midpoint = 0;
 482     is_inexact_gt_midpoint = 0;
 483     is_midpoint_lt_even = 0;
 484     is_midpoint_gt_even = 0;
 485
 486     if (x0 > ind) {
 487       // nothing is left of res when moving the decimal point left x0 digits
 488       is_inexact_lt_midpoint = 1;
 489       res.w[1] = p_sign | 0x0000000000000000ull;
 490       res.w[0] = 0x0000000000000000ull;
 491       e4 = expmin;
 492     } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
 493       // this is <, =, or > 1/2 ulp
 494       // compare the ind-digit value in the significand of res with
 495       // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
 496       // less than, equal to, or greater than 1/2 ulp (significand of res)
 497       R128.w[1] = res.w[1] & MASK_COEFF;
 498       R128.w[0] = res.w[0];
 499       if (ind <= 19) {
 500         if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
 501           lt_half_ulp = 1;
 502           is_inexact_lt_midpoint = 1;
 503         } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
 504           eq_half_ulp = 1;
 505           is_midpoint_gt_even = 1;
 506         } else { // > 1/2 ulp
 507           // gt_half_ulp = 1;
 508           is_inexact_gt_midpoint = 1;
 509         }
 510       } else { // if (ind <= 38) {
 511         if (R128.w[1] < midpoint128[ind - 20].w[1] ||
 512             (R128.w[1] == midpoint128[ind - 20].w[1] &&
 513             R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
 514           lt_half_ulp = 1;
 515           is_inexact_lt_midpoint = 1;
 516         } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
 517             R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
 518           eq_half_ulp = 1;
 519           is_midpoint_gt_even = 1;
 520         } else { // > 1/2 ulp
 521           // gt_half_ulp = 1;
 522           is_inexact_gt_midpoint = 1;
 523         }
 524       }
 525       if (lt_half_ulp || eq_half_ulp) {
 526         // res = +0.0 * 10^expmin
 527         res.w[1] = 0x0000000000000000ull;
 528         res.w[0] = 0x0000000000000000ull;
 529       } else { // if (gt_half_ulp)
 530         // res = +1 * 10^expmin
 531         res.w[1] = 0x0000000000000000ull;
 532         res.w[0] = 0x0000000000000001ull;
 533       }
 534       res.w[1] = p_sign | res.w[1];
 535       e4 = expmin;
 536     } else { // if (1 <= x0 <= ind - 1 <= 33)
 537       // round the ind-digit result to ind - x0 digits
 538
 539       if (ind <= 18) { // 2 <= ind <= 18
 540         round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
 541                       &is_midpoint_lt_even, &is_midpoint_gt_even,
 542                       &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
 543         res.w[1] = 0x0;
 544         res.w[0] = R64;
 545       } else if (ind <= 38) {
 546         P128.w[1] = res.w[1] & MASK_COEFF;
 547         P128.w[0] = res.w[0];
 548         round128_19_38 (ind, x0, P128, &res, &incr_exp,
 549                         &is_midpoint_lt_even, &is_midpoint_gt_even,
 550                         &is_inexact_lt_midpoint,
 551                         &is_inexact_gt_midpoint);
 552       }
 553       e4 = e4 + x0; // expmin
 554       // we want the exponent to be expmin, so if incr_exp = 1 then
 555       // multiply the rounded result by 10 - it will still fit in 113 bits
 556       if (incr_exp) {
 557         // 64 x 128 -> 128
 558         P128.w[1] = res.w[1] & MASK_COEFF;
 559         P128.w[0] = res.w[0];
 560         __mul_64x128_to_128 (res, ten2k64[1], P128);
 561       }
 562       res.w[1] =
 563         p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
 564       // avoid a double rounding error
 565       if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
 566           is_midpoint_lt_even) { // double rounding error upward
 567         // res = res - 1
 568         res.w[0]--;
 569         if (res.w[0] == 0xffffffffffffffffull)
 570           res.w[1]--;
 571         // Note: a double rounding error upward is not possible; for this
 572         // the result after the first rounding would have to be 99...95
 573         // (35 digits in all), possibly followed by a number of zeros; this
 574         // is not possible in Cases (2)-(6) or (15)-(17) which may get here
 575         is_midpoint_lt_even = 0;
 576         is_inexact_lt_midpoint = 1;
 577       } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
 578           is_midpoint_gt_even) { // double rounding error downward
 579         // res = res + 1
 580         res.w[0]++;
 581         if (res.w[0] == 0)
 582           res.w[1]++;
 583         is_midpoint_gt_even = 0;
 584         is_inexact_gt_midpoint = 1;
 585       } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
 586                  !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
 587         // if this second rounding was exact the result may still be
 588         // inexact because of the first rounding
 589         if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
 590           is_inexact_gt_midpoint = 1;
 591         }
 592         if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
 593           is_inexact_lt_midpoint = 1;
 594         }
 595       } else if (is_midpoint_gt_even &&
 596                  (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
 597         // pulled up to a midpoint
 598         is_inexact_lt_midpoint = 1;
 599         is_inexact_gt_midpoint = 0;
 600         is_midpoint_lt_even = 0;
 601         is_midpoint_gt_even = 0;
 602       } else if (is_midpoint_lt_even &&
 603                  (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
 604         // pulled down to a midpoint
 605         is_inexact_lt_midpoint = 0;
 606         is_inexact_gt_midpoint = 1;
 607         is_midpoint_lt_even = 0;
 608         is_midpoint_gt_even = 0;
 609       } else {
 610         ;
 611       }
 612     }
 613   }
 614   // res contains the correct result
 615   // apply correction if not rounding to nearest
 616   if (rnd_mode != ROUNDING_TO_NEAREST) {
 617     rounding_correction (rnd_mode,
 618                          is_inexact_lt_midpoint, is_inexact_gt_midpoint,
 619                          is_midpoint_lt_even, is_midpoint_gt_even,
 620                          e4, &res, ptrfpsf);
 621   }
 622   if (is_midpoint_lt_even || is_midpoint_gt_even ||
 623       is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
 624     // set the inexact flag
 625     *ptrfpsf |= INEXACT_EXCEPTION;
 626     if (is_tiny)
 627       *ptrfpsf |= UNDERFLOW_EXCEPTION;
 628   }
 629
 630   *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 631   *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 632   *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 633   *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 634   *ptrres = res;
 635   return;
 636 }
 637
 638
 639 #if DECIMAL_CALL_BY_REFERENCE
 640 static void
 641 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
 642                 int *ptr_is_midpoint_gt_even,
 643                 int *ptr_is_inexact_lt_midpoint,
 644                 int *ptr_is_inexact_gt_midpoint, UINT128 * pres,
 645                 UINT128 * px, UINT128 * py,
 646                 UINT128 *
 647                 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
 648                 _EXC_INFO_PARAM) {
 649   UINT128 x = *px, y = *py, z = *pz;
 650 #if !DECIMAL_GLOBAL_ROUNDING
 651   unsigned int rnd_mode = *prnd_mode;
 652 #endif
 653 #else
 654 static UINT128
 655 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
 656                 int *ptr_is_midpoint_gt_even,
 657                 int *ptr_is_inexact_lt_midpoint,
 658                 int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y,
 659                 UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
 660                 _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
 661 #endif
 662
 663   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
 664   UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign;
 665   UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp;
 666   int true_p_exp;
 667   UINT128 C1, C2, C3;
 668   UINT256 C4;
 669   int q1 = 0, q2 = 0, q3 = 0, q4;
 670   int e1, e2, e3, e4;
 671   int scale, ind, delta, x0;
 672   int p34 = P34; // used to modify the limit on the number of digits
 673   BID_UI64DOUBLE tmp;
 674   int x_nr_bits, y_nr_bits, z_nr_bits;
 675   unsigned int save_fpsf;
 676   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
 677   int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
 678   int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0;
 679   int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
 680   int incr_exp = 0;
 681   int lsb;
 682   int lt_half_ulp = 0;
 683   int eq_half_ulp = 0;
 684   int gt_half_ulp = 0;
 685   int is_tiny = 0;
 686   UINT64 R64, tmp64;
 687   UINT128 P128, R128;
 688   UINT192 P192, R192;
 689   UINT256 R256;
 690
 691   // the following are based on the table of special cases for fma; the NaN
 692   // behavior is similar to that of the IA-64 Architecture fma
 693
 694   // identify cases where at least one operand is NaN
 695
 696   BID_SWAP128 (x);
 697   BID_SWAP128 (y);
 698   BID_SWAP128 (z);
 699   if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
 700     // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
 701     // check first for non-canonical NaN payload
 702     if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
 703         (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
 704          (y.w[0] > 0x38c15b09ffffffffull))) {
 705       y.w[1] = y.w[1] & 0xffffc00000000000ull;
 706       y.w[0] = 0x0ull;
 707     }
 708     if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
 709       // set invalid flag
 710       *pfpsf |= INVALID_EXCEPTION;
 711       // return quiet (y)
 712       res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
 713       res.w[0] = y.w[0];
 714     } else { // y is QNaN
 715       // return y
 716       res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
 717       res.w[0] = y.w[0];
 718       // if z = SNaN or x = SNaN signal invalid exception
 719       if ((z.w[1] & MASK_SNAN) == MASK_SNAN ||
 720           (x.w[1] & MASK_SNAN) == MASK_SNAN) {
 721         // set invalid flag
 722         *pfpsf |= INVALID_EXCEPTION;
 723       }
 724     }
 725     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 726     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 727     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 728     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 729     BID_SWAP128 (res);
 730     BID_RETURN (res)
 731   } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN
 732     // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
 733     // check first for non-canonical NaN payload
 734     if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
 735         (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
 736          (z.w[0] > 0x38c15b09ffffffffull))) {
 737       z.w[1] = z.w[1] & 0xffffc00000000000ull;
 738       z.w[0] = 0x0ull;
 739     }
 740     if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN
 741       // set invalid flag
 742       *pfpsf |= INVALID_EXCEPTION;
 743       // return quiet (z)
 744       res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
 745       res.w[0] = z.w[0];
 746     } else { // z is QNaN
 747       // return z
 748       res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
 749       res.w[0] = z.w[0];
 750       // if x = SNaN signal invalid exception
 751       if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {
 752         // set invalid flag
 753         *pfpsf |= INVALID_EXCEPTION;
 754       }
 755     }
 756     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 757     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 758     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 759     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 760     BID_SWAP128 (res);
 761     BID_RETURN (res)
 762   } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
 763     // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
 764     // check first for non-canonical NaN payload
 765     if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
 766         (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
 767          (x.w[0] > 0x38c15b09ffffffffull))) {
 768       x.w[1] = x.w[1] & 0xffffc00000000000ull;
 769       x.w[0] = 0x0ull;
 770     }
 771     if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
 772       // set invalid flag
 773       *pfpsf |= INVALID_EXCEPTION;
 774       // return quiet (x)
 775       res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
 776       res.w[0] = x.w[0];
 777     } else { // x is QNaN
 778       // return x
 779       res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
 780       res.w[0] = x.w[0];
 781     }
 782     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 783     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 784     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 785     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 786     BID_SWAP128 (res);
 787     BID_RETURN (res)
 788   }
 789   // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
 790   // for non-canonical values
 791
 792   x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
 793   C1.w[1] = x.w[1] & MASK_COEFF;
 794   C1.w[0] = x.w[0];
 795   if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf
 796     // if x is not infinity check for non-canonical values - treated as zero
 797     if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
 798       // non-canonical
 799       x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
 800       C1.w[1] = 0; // significand high
 801       C1.w[0] = 0; // significand low
 802     } else { // G0_G1 != 11
 803       x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
 804       if (C1.w[1] > 0x0001ed09bead87c0ull ||
 805           (C1.w[1] == 0x0001ed09bead87c0ull &&
 806            C1.w[0] > 0x378d8e63ffffffffull)) {
 807         // x is non-canonical if coefficient is larger than 10^34 -1
 808         C1.w[1] = 0;
 809         C1.w[0] = 0;
 810       } else { // canonical
 811         ;
 812       }
 813     }
 814   }
 815   y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
 816   C2.w[1] = y.w[1] & MASK_COEFF;
 817   C2.w[0] = y.w[0];
 818   if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf
 819     // if y is not infinity check for non-canonical values - treated as zero
 820     if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
 821       // non-canonical
 822       y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
 823       C2.w[1] = 0; // significand high
 824       C2.w[0] = 0; // significand low
 825     } else { // G0_G1 != 11
 826       y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
 827       if (C2.w[1] > 0x0001ed09bead87c0ull ||
 828           (C2.w[1] == 0x0001ed09bead87c0ull &&
 829            C2.w[0] > 0x378d8e63ffffffffull)) {
 830         // y is non-canonical if coefficient is larger than 10^34 -1
 831         C2.w[1] = 0;
 832         C2.w[0] = 0;
 833       } else { // canonical
 834         ;
 835       }
 836     }
 837   }
 838   z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
 839   C3.w[1] = z.w[1] & MASK_COEFF;
 840   C3.w[0] = z.w[0];
 841   if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf
 842     // if z is not infinity check for non-canonical values - treated as zero
 843     if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
 844       // non-canonical
 845       z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
 846       C3.w[1] = 0; // significand high
 847       C3.w[0] = 0; // significand low
 848     } else { // G0_G1 != 11
 849       z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits
 850       if (C3.w[1] > 0x0001ed09bead87c0ull ||
 851           (C3.w[1] == 0x0001ed09bead87c0ull &&
 852            C3.w[0] > 0x378d8e63ffffffffull)) {
 853         // z is non-canonical if coefficient is larger than 10^34 -1
 854         C3.w[1] = 0;
 855         C3.w[0] = 0;
 856       } else { // canonical
 857         ;
 858       }
 859     }
 860   }
 861
 862   p_sign = x_sign ^ y_sign; // sign of the product
 863
 864   // identify cases where at least one operand is infinity
 865
 866   if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf
 867     if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
 868       if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
 869         if (p_sign == z_sign) {
 870           res.w[1] = z_sign | MASK_INF;
 871           res.w[0] = 0x0;
 872         } else {
 873           // return QNaN Indefinite
 874           res.w[1] = 0x7c00000000000000ull;
 875           res.w[0] = 0x0000000000000000ull;
 876           // set invalid flag
 877           *pfpsf |= INVALID_EXCEPTION;
 878         }
 879       } else { // z = 0 or z = f
 880         res.w[1] = p_sign | MASK_INF;
 881         res.w[0] = 0x0;
 882       }
 883     } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f
 884       if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
 885         if (p_sign == z_sign) {
 886           res.w[1] = z_sign | MASK_INF;
 887           res.w[0] = 0x0;
 888         } else {
 889           // return QNaN Indefinite
 890           res.w[1] = 0x7c00000000000000ull;
 891           res.w[0] = 0x0000000000000000ull;
 892           // set invalid flag
 893           *pfpsf |= INVALID_EXCEPTION;
 894         }
 895       } else { // z = 0 or z = f
 896         res.w[1] = p_sign | MASK_INF;
 897         res.w[0] = 0x0;
 898       }
 899     } else { // y = 0
 900       // return QNaN Indefinite
 901       res.w[1] = 0x7c00000000000000ull;
 902       res.w[0] = 0x0000000000000000ull;
 903       // set invalid flag
 904       *pfpsf |= INVALID_EXCEPTION;
 905     }
 906     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 907     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 908     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 909     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 910     BID_SWAP128 (res);
 911     BID_RETURN (res)
 912   } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
 913     if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
 914       // x = f, necessarily
 915       if ((p_sign != z_sign)
 916           || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) {
 917         // return QNaN Indefinite
 918         res.w[1] = 0x7c00000000000000ull;
 919         res.w[0] = 0x0000000000000000ull;
 920         // set invalid flag
 921         *pfpsf |= INVALID_EXCEPTION;
 922       } else {
 923         res.w[1] = z_sign | MASK_INF;
 924         res.w[0] = 0x0;
 925       }
 926     } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0
 927       // z = 0, f, inf
 928       // return QNaN Indefinite
 929       res.w[1] = 0x7c00000000000000ull;
 930       res.w[0] = 0x0000000000000000ull;
 931       // set invalid flag
 932       *pfpsf |= INVALID_EXCEPTION;
 933     } else {
 934       // x = f and z = 0, f, necessarily
 935       res.w[1] = p_sign | MASK_INF;
 936       res.w[0] = 0x0;
 937     }
 938     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 939     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 940     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 941     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 942     BID_SWAP128 (res);
 943     BID_RETURN (res)
 944   } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
 945     // x = 0, f and y = 0, f, necessarily
 946     res.w[1] = z_sign | MASK_INF;
 947     res.w[0] = 0x0;
 948     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 949     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 950     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 951     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 952     BID_SWAP128 (res);
 953     BID_RETURN (res)
 954   }
 955
 956   true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176;
 957   if (true_p_exp < -6176)
 958     p_exp = 0; // cannot be less than EXP_MIN
 959   else
 960     p_exp = (UINT64) (true_p_exp + 6176) << 49;
 961
 962   if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
 963     // the result is 0
 964     if (p_exp < z_exp)
 965       res.w[1] = p_exp; // preferred exponent
 966     else
 967       res.w[1] = z_exp; // preferred exponent
 968     if (p_sign == z_sign) {
 969       res.w[1] |= z_sign;
 970       res.w[0] = 0x0;
 971     } else { // x * y and z have opposite signs
 972       if (rnd_mode == ROUNDING_DOWN) {
 973         // res = -0.0
 974         res.w[1] |= MASK_SIGN;
 975         res.w[0] = 0x0;
 976       } else {
 977         // res = +0.0
 978         // res.w[1] |= 0x0;
 979         res.w[0] = 0x0;
 980       }
 981     }
 982     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
 983     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
 984     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
 985     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
 986     BID_SWAP128 (res);
 987     BID_RETURN (res)
 988   }
 989   // from this point on, we may need to know the number of decimal digits
 990   // in the significands of x, y, z when x, y, z != 0
 991
 992   if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite)
 993     // q1 = nr. of decimal digits in x
 994     // determine first the nr. of bits in x
 995     if (C1.w[1] == 0) {
 996       if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
 997         // split the 64-bit value in two 32-bit halves to avoid rounding errors
 998         if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
 999           tmp.d = (double) (C1.w[0] >> 32); // exact conversion
1000           x_nr_bits =
1001             33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1002         } else { // x < 2^32
1003           tmp.d = (double) (C1.w[0]); // exact conversion
1004           x_nr_bits =
1005             1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1006         }
1007       } else { // if x < 2^53
1008         tmp.d = (double) C1.w[0]; // exact conversion
1009         x_nr_bits =
1010           1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1011       }
1012     } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1013       tmp.d = (double) C1.w[1]; // exact conversion
1014       x_nr_bits =
1015         65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1016     }
1017     q1 = nr_digits[x_nr_bits - 1].digits;
1018     if (q1 == 0) {
1019       q1 = nr_digits[x_nr_bits - 1].digits1;
1020       if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1021           (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1022            C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1023         q1++;
1024     }
1025   }
1026
1027   if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite)
1028     if (C2.w[1] == 0) {
1029       if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53
1030         // split the 64-bit value in two 32-bit halves to avoid rounding errors
1031         if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32
1032           tmp.d = (double) (C2.w[0] >> 32); // exact conversion
1033           y_nr_bits =
1034             32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1035         } else { // y < 2^32
1036           tmp.d = (double) C2.w[0]; // exact conversion
1037           y_nr_bits =
1038             ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1039         }
1040       } else { // if y < 2^53
1041         tmp.d = (double) C2.w[0]; // exact conversion
1042         y_nr_bits =
1043           ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1044       }
1045     } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
1046       tmp.d = (double) C2.w[1]; // exact conversion
1047       y_nr_bits =
1048         64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1049     }
1050
1051     q2 = nr_digits[y_nr_bits].digits;
1052     if (q2 == 0) {
1053       q2 = nr_digits[y_nr_bits].digits1;
1054       if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi ||
1055           (C2.w[1] == nr_digits[y_nr_bits].threshold_hi &&
1056            C2.w[0] >= nr_digits[y_nr_bits].threshold_lo))
1057         q2++;
1058     }
1059   }
1060
1061   if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite)
1062     if (C3.w[1] == 0) {
1063       if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53
1064         // split the 64-bit value in two 32-bit halves to avoid rounding errors
1065         if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32
1066           tmp.d = (double) (C3.w[0] >> 32); // exact conversion
1067           z_nr_bits =
1068             32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1069         } else { // z < 2^32
1070           tmp.d = (double) C3.w[0]; // exact conversion
1071           z_nr_bits =
1072             ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1073         }
1074       } else { // if z < 2^53
1075         tmp.d = (double) C3.w[0]; // exact conversion
1076         z_nr_bits =
1077           ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1078       }
1079     } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
1080       tmp.d = (double) C3.w[1]; // exact conversion
1081       z_nr_bits =
1082         64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1083     }
1084
1085     q3 = nr_digits[z_nr_bits].digits;
1086     if (q3 == 0) {
1087       q3 = nr_digits[z_nr_bits].digits1;
1088       if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi ||
1089           (C3.w[1] == nr_digits[z_nr_bits].threshold_hi &&
1090            C3.w[0] >= nr_digits[z_nr_bits].threshold_lo))
1091         q3++;
1092     }
1093   }
1094
1095   if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) ||
1096       (C2.w[1] == 0x0 && C2.w[0] == 0x0)) {
1097     // x = 0 or y = 0
1098     // z = f, necessarily; for 0 + z return z, with the preferred exponent
1099     // the result is z, but need to get the preferred exponent
1100     if (z_exp <= p_exp) { // the preferred exponent is z_exp
1101       res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1];
1102       res.w[0] = C3.w[0];
1103     } else { // if (p_exp < z_exp) the preferred exponent is p_exp
1104       // return (C3 * 10^scale) * 10^(z_exp - scale)
1105       // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
1106       scale = p34 - q3;
1107       ind = (z_exp - p_exp) >> 49;
1108       if (ind < scale)
1109         scale = ind;
1110       if (scale == 0) {
1111         res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant
1112         res.w[0] = z.w[0];
1113       } else if (q3 <= 19) { // z fits in 64 bits
1114         if (scale <= 19) { // 10^scale fits in 64 bits
1115           // 64 x 64 C3.w[0] * ten2k64[scale]
1116           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1117         } else { // 10^scale fits in 128 bits
1118           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1119           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1120         }
1121       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1122         // 64 x 128 ten2k64[scale] * C3
1123         __mul_128x64_to_128 (res, ten2k64[scale], C3);
1124       }
1125       // subtract scale from the exponent
1126       z_exp = z_exp - ((UINT64) scale << 49);
1127       res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1128     }
1129     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1130     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1131     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1132     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1133     BID_SWAP128 (res);
1134     BID_RETURN (res)
1135   } else {
1136     ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
1137   }
1138
1139   e1 = (x_exp >> 49) - 6176; // unbiased exponent of x
1140   e2 = (y_exp >> 49) - 6176; // unbiased exponent of y
1141   e3 = (z_exp >> 49) - 6176; // unbiased exponent of z
1142   e4 = e1 + e2; // unbiased exponent of the exact x * y
1143
1144   // calculate C1 * C2 and its number of decimal digits, q4
1145
1146   // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
1147   // where 2 <= q1 + q2 <= 68
1148   // calculate C4 = C1 * C2 and determine q
1149   C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0;
1150   if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
1151     C4.w[0] = C1.w[0] * C2.w[0];
1152     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1153     if (C4.w[0] < ten2k64[q1 + q2 - 1])
1154       q4 = q1 + q2 - 1; // q4 in [1, 18]
1155     else
1156       q4 = q1 + q2; // q4 in [2, 19]
1157     // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1158   } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits
1159     // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
1160     __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]);
1161     // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
1162     if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1
1163       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1164       q4 = 19; // 19 = q1 + q2 - 1
1165     } else {
1166       // if (C4.w[1] == 0)
1167       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1168       // else
1169       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1170       q4 = 20; // 20 = q1 + q2
1171     }
1172   } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38
1173     // C4 = C1 * C2 fits in 64 or 128 bits
1174     // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
1175     // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
1176     if (q1 <= 19) {
1177       __mul_128x64_to_128 (C4, C1.w[0], C2);
1178     } else { // q2 <= 19
1179       __mul_128x64_to_128 (C4, C2.w[0], C1);
1180     }
1181     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1182     if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] ||
1183         (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] &&
1184          C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) {
1185       // if (C4.w[1] == 0) // q4 = 20, necessarily
1186       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1187       // else
1188       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1189       q4 = q1 + q2 - 1; // q4 in [20, 37]
1190     } else {
1191       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1192       q4 = q1 + q2; // q4 in [21, 38]
1193     }
1194   } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits
1195     // both C1 and C2 fit in 128 bits (actually in 113 bits)
1196     // may replace this by 128x128_to192
1197     __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0
1198     // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
1199     if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] ||
1200                          (C4.w[1] == ten2k128[18].w[1]
1201                           && C4.w[0] < ten2k128[18].w[0]))) {
1202       // 18 = 38 - 20 = q1+q2-1 - 20
1203       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1204       q4 = 38; // 38 = q1 + q2 - 1
1205     } else {
1206       // if (C4.w[2] == 0)
1207       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1208       // else
1209       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1210       q4 = 39; // 39 = q1 + q2
1211     }
1212   } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57
1213     // C4 = C1 * C2 fits in 128 or 192 bits
1214     // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
1215     // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1216     // may fit in 64 bits
1217     if (C1.w[1] == 0) { // C1 fits in 64 bits
1218       // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1219       __mul_64x128_full (C4.w[2], C4, C1.w[0], C2);
1220     } else if (C2.w[1] == 0) { // C2 fits in 64 bits
1221       // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1222       __mul_64x128_full (C4.w[2], C4, C2.w[0], C1);
1223     } else { // both C1 and C2 require 128 bits
1224       // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1225       __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1226     }
1227     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1228     if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1229         (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1230          (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1231           (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1232            C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) {
1233       // if (C4.w[2] == 0) // q4 = 39, necessarily
1234       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1235       // else
1236       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1237       q4 = q1 + q2 - 1; // q4 in [39, 56]
1238     } else {
1239       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1240       q4 = q1 + q2; // q4 in [40, 57]
1241     }
1242   } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits
1243     // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1244     // may fit in 64 bits
1245     if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits
1246       __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192
1247     } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits
1248       __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192
1249     } else { // C1 * C2 will fit in 192 bits or in 256 bits
1250       __mul_128x128_to_256 (C4, C1, C2);
1251     }
1252     // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
1253     if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] ||
1254                          (C4.w[2] == ten2k256[18].w[2]
1255                           && (C4.w[1] < ten2k256[18].w[1]
1256                               || (C4.w[1] == ten2k256[18].w[1]
1257                                   && C4.w[0] < ten2k256[18].w[0]))))) {
1258       // 18 = 57 - 39 = q1+q2-1 - 39
1259       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1260       q4 = 57; // 57 = q1 + q2 - 1
1261     } else {
1262       // if (C4.w[3] == 0)
1263       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1264       // else
1265       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1266       q4 = 58; // 58 = q1 + q2
1267     }
1268   } else { // if 59 <= q1 + q2 <= 68
1269     // C4 = C1 * C2 fits in 192 or 256 bits
1270     // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
1271     // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
1272     // 64 bits
1273     // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1274     __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1275     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1276     if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] ||
1277         (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] &&
1278          (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1279           (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1280            (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1281             (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1282              C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) {
1283       // if (C4.w[3] == 0) // q4 = 58, necessarily
1284       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1285       // else
1286       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1287       q4 = q1 + q2 - 1; // q4 in [58, 67]
1288     } else {
1289       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1290       q4 = q1 + q2; // q4 in [59, 68]
1291     }
1292   }
1293
1294   if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0
1295     save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
1296     *pfpsf = 0;
1297
1298     if (q4 > p34) {
1299
1300       // truncate C4 to p34 digits into res
1301       // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
1302       x0 = q4 - p34;
1303       if (q4 <= 38) {
1304         P128.w[1] = C4.w[1];
1305         P128.w[0] = C4.w[0];
1306         round128_19_38 (q4, x0, P128, &res, &incr_exp,
1307                         &is_midpoint_lt_even, &is_midpoint_gt_even,
1308                         &is_inexact_lt_midpoint,
1309                         &is_inexact_gt_midpoint);
1310       } else if (q4 <= 57) { // 35 <= q4 <= 57
1311         P192.w[2] = C4.w[2];
1312         P192.w[1] = C4.w[1];
1313         P192.w[0] = C4.w[0];
1314         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
1315                         &is_midpoint_lt_even, &is_midpoint_gt_even,
1316                         &is_inexact_lt_midpoint,
1317                         &is_inexact_gt_midpoint);
1318         res.w[0] = R192.w[0];
1319         res.w[1] = R192.w[1];
1320       } else { // if (q4 <= 68)
1321         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
1322                         &is_midpoint_lt_even, &is_midpoint_gt_even,
1323                         &is_inexact_lt_midpoint,
1324                         &is_inexact_gt_midpoint);
1325         res.w[0] = R256.w[0];
1326         res.w[1] = R256.w[1];
1327       }
1328       e4 = e4 + x0;
1329       if (incr_exp) {
1330         e4 = e4 + 1;
1331       }
1332       q4 = p34;
1333       // res is now the coefficient of the result rounded to the destination
1334       // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
1335     } else { // if (q4 <= p34)
1336       // C4 * 10^e4 is the result rounded to the destination precision, with
1337       // unbounded exponent (which is exact)
1338
1339       if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) {
1340         // e4 is too large, but can be brought within range by scaling up C4
1341         scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
1342         // res = (C4 * 10^scale) * 10^expmax
1343         if (q4 <= 19) { // C4 fits in 64 bits
1344           if (scale <= 19) { // 10^scale fits in 64 bits
1345             // 64 x 64 C4.w[0] * ten2k64[scale]
1346             __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]);
1347           } else { // 10^scale fits in 128 bits
1348             // 64 x 128 C4.w[0] * ten2k128[scale - 20]
1349             __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]);
1350           }
1351         } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
1352           // 64 x 128 ten2k64[scale] * CC43
1353           __mul_128x64_to_128 (res, ten2k64[scale], C4);
1354         }
1355         e4 = e4 - scale; // expmax
1356         q4 = q4 + scale;
1357       } else {
1358         res.w[1] = C4.w[1];
1359         res.w[0] = C4.w[0];
1360       }
1361       // res is the coefficient of the result rounded to the destination
1362       // precision, with unbounded exponent (it has q4 digits); the exponent
1363       // is e4 (exact result)
1364     }
1365
1366     // check for overflow
1367     if (q4 + e4 > p34 + expmax) {
1368       if (rnd_mode == ROUNDING_TO_NEAREST) {
1369         res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf
1370         res.w[0] = 0x0000000000000000ull;
1371         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1372       } else {
1373         res.w[1] = p_sign | res.w[1];
1374         rounding_correction (rnd_mode,
1375                              is_inexact_lt_midpoint,
1376                              is_inexact_gt_midpoint,
1377                              is_midpoint_lt_even, is_midpoint_gt_even,
1378                              e4, &res, pfpsf);
1379       }
1380       *pfpsf |= save_fpsf;
1381       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1382       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1383       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1384       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1385       BID_SWAP128 (res);
1386       BID_RETURN (res)
1387     }
1388     // check for underflow
1389     if (q4 + e4 < expmin + P34) {
1390       is_tiny = 1; // the result is tiny
1391       if (e4 < expmin) {
1392         // if e4 < expmin, we must truncate more of res
1393         x0 = expmin - e4; // x0 >= 1
1394         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
1395         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
1396         is_midpoint_lt_even0 = is_midpoint_lt_even;
1397         is_midpoint_gt_even0 = is_midpoint_gt_even;
1398         is_inexact_lt_midpoint = 0;
1399         is_inexact_gt_midpoint = 0;
1400         is_midpoint_lt_even = 0;
1401         is_midpoint_gt_even = 0;
1402         // the number of decimal digits in res is q4
1403         if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
1404           if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
1405             round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
1406                           &is_midpoint_lt_even, &is_midpoint_gt_even,
1407                           &is_inexact_lt_midpoint,
1408                           &is_inexact_gt_midpoint);
1409             if (incr_exp) {
1410               // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
1411               R64 = ten2k64[q4 - x0];
1412             }
1413             // res.w[1] = 0; (from above)
1414             res.w[0] = R64;
1415           } else { // if (q4 <= 34)
1416             // 19 <= q4 <= 38
1417             P128.w[1] = res.w[1];
1418             P128.w[0] = res.w[0];
1419             round128_19_38 (q4, x0, P128, &res, &incr_exp,
1420                             &is_midpoint_lt_even, &is_midpoint_gt_even,
1421                             &is_inexact_lt_midpoint,
1422                             &is_inexact_gt_midpoint);
1423             if (incr_exp) {
1424               // increase coefficient by a factor of 10; this will be <= 10^33
1425               // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
1426               if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
1427                 // res.w[1] = 0;
1428                 res.w[0] = ten2k64[q4 - x0];
1429               } else { // 20 <= q4 - x0 <= 37
1430                 res.w[0] = ten2k128[q4 - x0 - 20].w[0];
1431                 res.w[1] = ten2k128[q4 - x0 - 20].w[1];
1432               }
1433             }
1434           }
1435           e4 = e4 + x0; // expmin
1436         } else if (x0 == q4) {
1437           // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
1438           // determine relationship with 1/2 ulp
1439           if (q4 <= 19) {
1440             if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1441               lt_half_ulp = 1;
1442               is_inexact_lt_midpoint = 1;
1443             } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1444               eq_half_ulp = 1;
1445               is_midpoint_gt_even = 1;
1446             } else { // > 1/2 ulp
1447               // gt_half_ulp = 1;
1448               is_inexact_gt_midpoint = 1;
1449             }
1450           } else { // if (q4 <= 34)
1451             if (res.w[1] < midpoint128[q4 - 20].w[1] ||
1452                 (res.w[1] == midpoint128[q4 - 20].w[1] &&
1453                 res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
1454               lt_half_ulp = 1;
1455               is_inexact_lt_midpoint = 1;
1456             } else if (res.w[1] == midpoint128[q4 - 20].w[1] &&
1457                 res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1458               eq_half_ulp = 1;
1459               is_midpoint_gt_even = 1;
1460             } else { // > 1/2 ulp
1461               // gt_half_ulp = 1;
1462               is_inexact_gt_midpoint = 1;
1463             }
1464           }
1465           if (lt_half_ulp || eq_half_ulp) {
1466             // res = +0.0 * 10^expmin
1467             res.w[1] = 0x0000000000000000ull;
1468             res.w[0] = 0x0000000000000000ull;
1469           } else { // if (gt_half_ulp)
1470             // res = +1 * 10^expmin
1471             res.w[1] = 0x0000000000000000ull;
1472             res.w[0] = 0x0000000000000001ull;
1473           }
1474           e4 = expmin;
1475         } else { // if (x0 > q4)
1476           // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
1477           res.w[1] = 0;
1478           res.w[0] = 0;
1479           e4 = expmin;
1480           is_inexact_lt_midpoint = 1;
1481         }
1482         // avoid a double rounding error
1483         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
1484             is_midpoint_lt_even) { // double rounding error upward
1485           // res = res - 1
1486           res.w[0]--;
1487           if (res.w[0] == 0xffffffffffffffffull)
1488             res.w[1]--;
1489           // Note: a double rounding error upward is not possible; for this
1490           // the result after the first rounding would have to be 99...95
1491           // (35 digits in all), possibly followed by a number of zeros; this
1492           // not possible for f * f + 0
1493           is_midpoint_lt_even = 0;
1494           is_inexact_lt_midpoint = 1;
1495         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
1496             is_midpoint_gt_even) { // double rounding error downward
1497           // res = res + 1
1498           res.w[0]++;
1499           if (res.w[0] == 0)
1500             res.w[1]++;
1501           is_midpoint_gt_even = 0;
1502           is_inexact_gt_midpoint = 1;
1503         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
1504                    !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
1505           // if this second rounding was exact the result may still be
1506           // inexact because of the first rounding
1507           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
1508             is_inexact_gt_midpoint = 1;
1509           }
1510           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
1511             is_inexact_lt_midpoint = 1;
1512           }
1513         } else if (is_midpoint_gt_even &&
1514                    (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
1515           // pulled up to a midpoint
1516           is_inexact_lt_midpoint = 1;
1517           is_inexact_gt_midpoint = 0;
1518           is_midpoint_lt_even = 0;
1519           is_midpoint_gt_even = 0;
1520         } else if (is_midpoint_lt_even &&
1521                    (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
1522           // pulled down to a midpoint
1523           is_inexact_lt_midpoint = 0;
1524           is_inexact_gt_midpoint = 1;
1525           is_midpoint_lt_even = 0;
1526           is_midpoint_gt_even = 0;
1527         } else {
1528           ;
1529         }
1530       } else { // if e4 >= emin then q4 < P and the result is tiny and exact
1531         if (e3 < e4) {
1532           // if (e3 < e4) the preferred exponent is e3
1533           // return (C4 * 10^scale) * 10^(e4 - scale)
1534           // where scale = min (p34-q4, (e4 - e3))
1535           scale = p34 - q4;
1536           ind = e4 - e3;
1537           if (ind < scale)
1538             scale = ind;
1539           if (scale == 0) {
1540             ; // res and e4 are unchanged
1541           } else if (q4 <= 19) { // C4 fits in 64 bits
1542             if (scale <= 19) { // 10^scale fits in 64 bits
1543               // 64 x 64 res.w[0] * ten2k64[scale]
1544               __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]);
1545             } else { // 10^scale fits in 128 bits
1546               // 64 x 128 res.w[0] * ten2k128[scale - 20]
1547               __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]);
1548             }
1549           } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
1550             // 64 x 128 ten2k64[scale] * C3
1551             __mul_128x64_to_128 (res, ten2k64[scale], res);
1552           }
1553           // subtract scale from the exponent
1554           e4 = e4 - scale;
1555         }
1556       }
1557
1558       // check for inexact result
1559       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1560           is_midpoint_lt_even || is_midpoint_gt_even) {
1561         // set the inexact flag and the underflow flag
1562         *pfpsf |= INEXACT_EXCEPTION;
1563         *pfpsf |= UNDERFLOW_EXCEPTION;
1564       }
1565       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1566       if (rnd_mode != ROUNDING_TO_NEAREST) {
1567         rounding_correction (rnd_mode,
1568                              is_inexact_lt_midpoint,
1569                              is_inexact_gt_midpoint,
1570                              is_midpoint_lt_even, is_midpoint_gt_even,
1571                              e4, &res, pfpsf);
1572       }
1573       *pfpsf |= save_fpsf;
1574       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1575       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1576       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1577       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1578       BID_SWAP128 (res);
1579       BID_RETURN (res)
1580     }
1581     // no overflow, and no underflow for rounding to nearest
1582     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1583
1584     if (rnd_mode != ROUNDING_TO_NEAREST) {
1585       rounding_correction (rnd_mode,
1586                            is_inexact_lt_midpoint,
1587                            is_inexact_gt_midpoint,
1588                            is_midpoint_lt_even, is_midpoint_gt_even,
1589                            e4, &res, pfpsf);
1590       // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
1591       if (e4 == expmin) {
1592         if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
1593             ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
1594              res.w[0] < 0x38c15b0a00000000ull)) {
1595           is_tiny = 1;
1596         }
1597       }
1598     }
1599
1600     if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1601         is_midpoint_lt_even || is_midpoint_gt_even) {
1602       // set the inexact flag
1603       *pfpsf |= INEXACT_EXCEPTION;
1604       if (is_tiny)
1605         *pfpsf |= UNDERFLOW_EXCEPTION;
1606     }
1607
1608     if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact
1609       // need to ensure that the result has the preferred exponent
1610       p_exp = res.w[1] & MASK_EXP;
1611       if (z_exp < p_exp) { // the preferred exponent is z_exp
1612         // signficand of res in C3
1613         C3.w[1] = res.w[1] & MASK_COEFF;
1614         C3.w[0] = res.w[0];
1615         // the number of decimal digits of x * y is q4 <= 34
1616         // Note: the coefficient fits in 128 bits
1617
1618         // return (C3 * 10^scale) * 10^(p_exp - scale)
1619         // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
1620         scale = p34 - q4;
1621         ind = (p_exp - z_exp) >> 49;
1622         if (ind < scale)
1623           scale = ind;
1624         // subtract scale from the exponent
1625         p_exp = p_exp - ((UINT64) scale << 49);
1626         if (scale == 0) {
1627           ; // leave res unchanged
1628         } else if (q4 <= 19) { // x * y fits in 64 bits
1629           if (scale <= 19) { // 10^scale fits in 64 bits
1630             // 64 x 64 C3.w[0] * ten2k64[scale]
1631             __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1632           } else { // 10^scale fits in 128 bits
1633             // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1634             __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1635           }
1636           res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1637         } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
1638           // 64 x 128 ten2k64[scale] * C3
1639           __mul_128x64_to_128 (res, ten2k64[scale], C3);
1640           res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1641         }
1642       } // else leave the result as it is, because p_exp <= z_exp
1643     }
1644     *pfpsf |= save_fpsf;
1645     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1646     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1647     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1648     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1649     BID_SWAP128 (res);
1650     BID_RETURN (res)
1651   } // else we have f * f + f
1652
1653   // continue with x = f, y = f, z = f
1654
1655   delta = q3 + e3 - q4 - e4;
1656 delta_ge_zero:
1657   if (delta >= 0) {
1658
1659     if (p34 <= delta - 1 ||     // Case (1')
1660         (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A)
1661       // check for overflow, which can occur only in Case (1')
1662       if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) {
1663         // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
1664         // condition for (q3 + e3) > (p34 + expmax)
1665         if (rnd_mode == ROUNDING_TO_NEAREST) {
1666           res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
1667           res.w[0] = 0x0000000000000000ull;
1668           *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1669         } else {
1670           if (p_sign == z_sign) {
1671             is_inexact_lt_midpoint = 1;
1672           } else {
1673             is_inexact_gt_midpoint = 1;
1674           }
1675           // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
1676           scale = p34 - q3;
1677           if (scale == 0) {
1678             res.w[1] = z_sign | C3.w[1];
1679             res.w[0] = C3.w[0];
1680           } else {
1681             if (q3 <= 19) { // C3 fits in 64 bits
1682               if (scale <= 19) { // 10^scale fits in 64 bits
1683                 // 64 x 64 C3.w[0] * ten2k64[scale]
1684                 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1685               } else { // 10^scale fits in 128 bits
1686                 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1687                 __mul_128x64_to_128 (res, C3.w[0],
1688                                      ten2k128[scale - 20]);
1689               }
1690             } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
1691               // 64 x 128 ten2k64[scale] * C3
1692               __mul_128x64_to_128 (res, ten2k64[scale], C3);
1693             }
1694             // the coefficient in res has q3 + scale = p34 digits
1695           }
1696           e3 = e3 - scale;
1697           res.w[1] = z_sign | res.w[1];
1698           rounding_correction (rnd_mode,
1699                                is_inexact_lt_midpoint,
1700                                is_inexact_gt_midpoint,
1701                                is_midpoint_lt_even, is_midpoint_gt_even,
1702                                e3, &res, pfpsf);
1703         }
1704         *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1705         *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1706         *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1707         *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1708         BID_SWAP128 (res);
1709         BID_RETURN (res)
1710       }
1711       // res = z
1712       if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3)
1713         // return (C3 * 10^scale) * 10^(z_exp - scale)
1714         // where scale = min (p34-q3, z_exp-EMIN)
1715         scale = p34 - q3;
1716         ind = e3 + 6176;
1717         if (ind < scale)
1718           scale = ind;
1719         if (scale == 0) {
1720           res.w[1] = C3.w[1];
1721           res.w[0] = C3.w[0];
1722         } else if (q3 <= 19) { // z fits in 64 bits
1723           if (scale <= 19) { // 10^scale fits in 64 bits
1724             // 64 x 64 C3.w[0] * ten2k64[scale]
1725             __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1726           } else { // 10^scale fits in 128 bits
1727             // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1728             __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1729           }
1730         } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1731           // 64 x 128 ten2k64[scale] * C3
1732           __mul_128x64_to_128 (res, ten2k64[scale], C3);
1733         }
1734         // the coefficient in res has q3 + scale digits
1735         // subtract scale from the exponent
1736         z_exp = z_exp - ((UINT64) scale << 49);
1737         e3 = e3 - scale;
1738         res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1739         if (scale + q3 < p34)
1740           *pfpsf |= UNDERFLOW_EXCEPTION;
1741       } else {
1742         scale = 0;
1743         res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1];
1744         res.w[0] = C3.w[0];
1745       }
1746
1747       // use the following to avoid double rounding errors when operating on
1748       // mixed formats in rounding to nearest, and for correcting the result
1749       // if not rounding to nearest
1750       if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) {
1751         // there is a gap of exactly one digit between the scaled C3 and C4
1752         // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
1753         if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) ||
1754             (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) ||
1755             (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] ||
1756                           C3.w[0] != ten2k128[q3 - 21].w[0]))) {
1757           // C3 * 10^ scale != 10^(q3-1)
1758           // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
1759           // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
1760           is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value
1761         } else { // if C3 * 10^scale = 10^(q3+scale-1)
1762           // ok from above e3 = (z_exp >> 49) - 6176;
1763           // the result is always inexact
1764           if (q4 == 1) {
1765             R64 = C4.w[0];
1766           } else {
1767             // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
1768             // x = q4-1, 1 <= x <= 67 and check if this operation is exact
1769             if (q4 <= 18) { // 2 <= q4 <= 18
1770               round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
1771                             &is_midpoint_lt_even, &is_midpoint_gt_even,
1772                             &is_inexact_lt_midpoint,
1773                             &is_inexact_gt_midpoint);
1774             } else if (q4 <= 38) {
1775               P128.w[1] = C4.w[1];
1776               P128.w[0] = C4.w[0];
1777               round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
1778                               &is_midpoint_lt_even,
1779                               &is_midpoint_gt_even,
1780                               &is_inexact_lt_midpoint,
1781                               &is_inexact_gt_midpoint);
1782               R64 = R128.w[0]; // one decimal digit
1783             } else if (q4 <= 57) {
1784               P192.w[2] = C4.w[2];
1785               P192.w[1] = C4.w[1];
1786               P192.w[0] = C4.w[0];
1787               round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
1788                               &is_midpoint_lt_even,
1789                               &is_midpoint_gt_even,
1790                               &is_inexact_lt_midpoint,
1791                               &is_inexact_gt_midpoint);
1792               R64 = R192.w[0]; // one decimal digit
1793             } else { // if (q4 <= 68)
1794               round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
1795                               &is_midpoint_lt_even,
1796                               &is_midpoint_gt_even,
1797                               &is_inexact_lt_midpoint,
1798                               &is_inexact_gt_midpoint);
1799               R64 = R256.w[0]; // one decimal digit
1800             }
1801             if (incr_exp) {
1802               R64 = 10;
1803             }
1804           }
1805           if (q4 == 1 && C4.w[0] == 5) {
1806             is_inexact_lt_midpoint = 0;
1807             is_inexact_gt_midpoint = 0;
1808             is_midpoint_lt_even = 1;
1809             is_midpoint_gt_even = 0;
1810           } else if ((e3 == expmin) ||
1811                      R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
1812             // result does not change
1813             is_inexact_lt_midpoint = 0;
1814             is_inexact_gt_midpoint = 1;
1815             is_midpoint_lt_even = 0;
1816             is_midpoint_gt_even = 0;
1817           } else {
1818             is_inexact_lt_midpoint = 1;
1819             is_inexact_gt_midpoint = 0;
1820             is_midpoint_lt_even = 0;
1821             is_midpoint_gt_even = 0;
1822             // result decremented is 10^(q3+scale) - 1
1823             if ((q3 + scale) <= 19) {
1824               res.w[1] = 0;
1825               res.w[0] = ten2k64[q3 + scale];
1826             } else { // if ((q3 + scale + 1) <= 35)
1827               res.w[1] = ten2k128[q3 + scale - 20].w[1];
1828               res.w[0] = ten2k128[q3 + scale - 20].w[0];
1829             }
1830             res.w[0] = res.w[0] - 1; // borrow never occurs
1831             z_exp = z_exp - EXP_P1;
1832             e3 = e3 - 1;
1833             res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
1834           }
1835           if (e3 == expmin) {
1836             if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) {
1837               ; // result not tiny (in round-to-nearest mode)
1838             } else {
1839               *pfpsf |= UNDERFLOW_EXCEPTION;
1840             }
1841           }
1842         } // end 10^(q3+scale-1)
1843         // set the inexact flag
1844         *pfpsf |= INEXACT_EXCEPTION;
1845       } else {
1846         if (p_sign == z_sign) {
1847           // if (z_sign), set as if for absolute value
1848           is_inexact_lt_midpoint = 1;
1849         } else { // if (p_sign != z_sign)
1850           // if (z_sign), set as if for absolute value
1851           is_inexact_gt_midpoint = 1;
1852         }
1853         *pfpsf |= INEXACT_EXCEPTION;
1854       }
1855       // the result is always inexact => set the inexact flag
1856       // Determine tininess:
1857       //    if (exp > expmin)
1858       //      the result is not tiny
1859       //    else // if exp = emin
1860       //      if (q3 + scale < p34)
1861       //        the result is tiny
1862       //      else // if (q3 + scale = p34)
1863       //        if (C3 * 10^scale > 10^33)
1864       //          the result is not tiny
1865       //        else // if C3 * 10^scale = 10^33
1866       //          if (xy * z > 0)
1867       //            the result is not tiny
1868       //          else // if (xy * z < 0)
1869       //            if (z > 0)
1870       //              if rnd_mode != RP
1871       //                the result is tiny
1872       //              else // if RP
1873       //                the result is not tiny
1874       //            else // if (z < 0)
1875       //              if rnd_mode != RM
1876       //                the result is tiny
1877       //              else // if RM
1878       //                the result is not tiny
1879       //              endif
1880       //            endif
1881       //          endif
1882       //        endif
1883       //      endif
1884       //    endif
1885       if ((e3 == expmin && (q3 + scale) < p34) ||
1886           (e3 == expmin && (q3 + scale) == p34 &&
1887           (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&   // 10^33_high
1888           res.w[0] == 0x38c15b0a00000000ull &&  // 10^33_low
1889           z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) ||
1890           (z_sign && rnd_mode != ROUNDING_DOWN)))) {
1891         *pfpsf |= UNDERFLOW_EXCEPTION;
1892       }
1893       if (rnd_mode != ROUNDING_TO_NEAREST) {
1894         rounding_correction (rnd_mode,
1895                              is_inexact_lt_midpoint,
1896                              is_inexact_gt_midpoint,
1897                              is_midpoint_lt_even, is_midpoint_gt_even,
1898                              e3, &res, pfpsf);
1899       }
1900       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1901       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1902       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1903       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1904       BID_SWAP128 (res);
1905       BID_RETURN (res)
1906
1907     } else if (p34 == delta) { // Case (1''B)
1908
1909       // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
1910       // and C3 can be scaled up to p34 digits if needed
1911
1912       // scale C3 to p34 digits if needed
1913       scale = p34 - q3; // 0 <= scale <= p34 - 1
1914       if (scale == 0) {
1915         res.w[1] = C3.w[1];
1916         res.w[0] = C3.w[0];
1917       } else if (q3 <= 19) { // z fits in 64 bits
1918         if (scale <= 19) { // 10^scale fits in 64 bits
1919           // 64 x 64 C3.w[0] * ten2k64[scale]
1920           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1921         } else { // 10^scale fits in 128 bits
1922           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1923           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1924         }
1925       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1926         // 64 x 128 ten2k64[scale] * C3
1927         __mul_128x64_to_128 (res, ten2k64[scale], C3);
1928       }
1929       // subtract scale from the exponent
1930       z_exp = z_exp - ((UINT64) scale << 49);
1931       e3 = e3 - scale;
1932       // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits
1933
1934       // determine whether x * y is less than, equal to, or greater than
1935       // 1/2 ulp (z)
1936       if (q4 <= 19) {
1937         if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1938           lt_half_ulp = 1;
1939         } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1940           eq_half_ulp = 1;
1941         } else { // > 1/2 ulp
1942           gt_half_ulp = 1;
1943         }
1944       } else if (q4 <= 38) {
1945         if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] ||
1946             (C4.w[1] == midpoint128[q4 - 20].w[1] &&
1947             C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
1948           lt_half_ulp = 1;
1949         } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] &&
1950             C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1951           eq_half_ulp = 1;
1952         } else { // > 1/2 ulp
1953           gt_half_ulp = 1;
1954         }
1955       } else if (q4 <= 58) {
1956         if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] ||
1957             (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1958             C4.w[1] < midpoint192[q4 - 39].w[1]) ||
1959             (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1960             C4.w[1] == midpoint192[q4 - 39].w[1] &&
1961             C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
1962           lt_half_ulp = 1;
1963         } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] &&
1964             C4.w[1] == midpoint192[q4 - 39].w[1] &&
1965             C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
1966           eq_half_ulp = 1;
1967         } else { // > 1/2 ulp
1968           gt_half_ulp = 1;
1969         }
1970       } else {
1971         if (C4.w[3] < midpoint256[q4 - 59].w[3] ||
1972             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1973             C4.w[2] < midpoint256[q4 - 59].w[2]) ||
1974             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1975             C4.w[2] == midpoint256[q4 - 59].w[2] &&
1976             C4.w[1] < midpoint256[q4 - 59].w[1]) ||
1977             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1978             C4.w[2] == midpoint256[q4 - 59].w[2] &&
1979             C4.w[1] == midpoint256[q4 - 59].w[1] &&
1980             C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
1981           lt_half_ulp = 1;
1982         } else if (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1983             C4.w[2] == midpoint256[q4 - 59].w[2] &&
1984             C4.w[1] == midpoint256[q4 - 59].w[1] &&
1985             C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
1986           eq_half_ulp = 1;
1987         } else { // > 1/2 ulp
1988           gt_half_ulp = 1;
1989         }
1990       }
1991
1992       if (p_sign == z_sign) {
1993         if (lt_half_ulp) {
1994           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1995           // use the following to avoid double rounding errors when operating on
1996           // mixed formats in rounding to nearest
1997           is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value
1998         } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
1999           // add 1 ulp to the significand
2000           res.w[0]++;
2001           if (res.w[0] == 0x0ull)
2002             res.w[1]++;
2003           // check for rounding overflow, when coeff == 10^34
2004           if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull &&
2005               res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34
2006             e3 = e3 + 1;
2007             // coeff = 10^33
2008             z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP;
2009             res.w[1] = 0x0000314dc6448d93ull;
2010             res.w[0] = 0x38c15b0a00000000ull;
2011           }
2012           // end add 1 ulp
2013           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2014           if (eq_half_ulp) {
2015             is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2016           } else {
2017             is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2018           }
2019         } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2020           // leave unchanged
2021           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2022           is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2023         }
2024         // the result is always inexact, and never tiny
2025         // set the inexact flag
2026         *pfpsf |= INEXACT_EXCEPTION;
2027         // check for overflow
2028         if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) {
2029           res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2030           res.w[0] = 0x0000000000000000ull;
2031           *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2032           *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2033           *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2034           *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2035           *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2036           BID_SWAP128 (res);
2037           BID_RETURN (res)
2038         }
2039         if (rnd_mode != ROUNDING_TO_NEAREST) {
2040           rounding_correction (rnd_mode,
2041                                is_inexact_lt_midpoint,
2042                                is_inexact_gt_midpoint,
2043                                is_midpoint_lt_even, is_midpoint_gt_even,
2044                                e3, &res, pfpsf);
2045           z_exp = res.w[1] & MASK_EXP;
2046         }
2047       } else { // if (p_sign != z_sign)
2048         // consider two cases, because C3 * 10^scale = 10^33 is a special case
2049         if (res.w[1] != 0x0000314dc6448d93ull ||
2050             res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
2051           if (lt_half_ulp) {
2052             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2053             // use the following to avoid double rounding errors when operating
2054             // on mixed formats in rounding to nearest
2055             is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2056           } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
2057             // subtract 1 ulp from the significand
2058             res.w[0]--;
2059             if (res.w[0] == 0xffffffffffffffffull)
2060               res.w[1]--;
2061             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2062             if (eq_half_ulp) {
2063               is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2064             } else {
2065               is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2066             }
2067           } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2068             // leave unchanged
2069             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2070             is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2071           }
2072           // the result is always inexact, and never tiny
2073           // check for overflow for RN
2074           if (e3 > expmax) {
2075             if (rnd_mode == ROUNDING_TO_NEAREST) {
2076               res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2077               res.w[0] = 0x0000000000000000ull;
2078               *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2079             } else {
2080               rounding_correction (rnd_mode,
2081                                    is_inexact_lt_midpoint,
2082                                    is_inexact_gt_midpoint,
2083                                    is_midpoint_lt_even,
2084                                    is_midpoint_gt_even, e3, &res,
2085                                    pfpsf);
2086             }
2087             *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2088             *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2089             *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2090             *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2091             BID_SWAP128 (res);
2092             BID_RETURN (res)
2093           }
2094           // set the inexact flag
2095           *pfpsf |= INEXACT_EXCEPTION;
2096           if (rnd_mode != ROUNDING_TO_NEAREST) {
2097             rounding_correction (rnd_mode,
2098                                  is_inexact_lt_midpoint,
2099                                  is_inexact_gt_midpoint,
2100                                  is_midpoint_lt_even,
2101                                  is_midpoint_gt_even, e3, &res, pfpsf);
2102           }
2103           z_exp = res.w[1] & MASK_EXP;
2104         } else { // if C3 * 10^scale = 10^33
2105           e3 = (z_exp >> 49) - 6176;
2106           if (e3 > expmin) {
2107             // the result is exact if exp > expmin and C4 = d*10^(q4-1),
2108             // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
2109             if (q4 == 1) {
2110               // if q4 = 1 the result is exact
2111               // result coefficient = 10^34 - C4
2112               res.w[1] = 0x0001ed09bead87c0ull;
2113               res.w[0] = 0x378d8e6400000000ull - C4.w[0];
2114               z_exp = z_exp - EXP_P1;
2115               e3 = e3 - 1;
2116               res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2117             } else {
2118               // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
2119               // x = q4-1, 1 <= x <= 67 and check if this operation is exact
2120               if (q4 <= 18) { // 2 <= q4 <= 18
2121                 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
2122                               &is_midpoint_lt_even,
2123                               &is_midpoint_gt_even,
2124                               &is_inexact_lt_midpoint,
2125                               &is_inexact_gt_midpoint);
2126               } else if (q4 <= 38) {
2127                 P128.w[1] = C4.w[1];
2128                 P128.w[0] = C4.w[0];
2129                 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
2130                                 &is_midpoint_lt_even,
2131                                 &is_midpoint_gt_even,
2132                                 &is_inexact_lt_midpoint,
2133                                 &is_inexact_gt_midpoint);
2134                 R64 = R128.w[0]; // one decimal digit
2135               } else if (q4 <= 57) {
2136                 P192.w[2] = C4.w[2];
2137                 P192.w[1] = C4.w[1];
2138                 P192.w[0] = C4.w[0];
2139                 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
2140                                 &is_midpoint_lt_even,
2141                                 &is_midpoint_gt_even,
2142                                 &is_inexact_lt_midpoint,
2143                                 &is_inexact_gt_midpoint);
2144                 R64 = R192.w[0]; // one decimal digit
2145               } else { // if (q4 <= 68)
2146                 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
2147                                 &is_midpoint_lt_even,
2148                                 &is_midpoint_gt_even,
2149                                 &is_inexact_lt_midpoint,
2150                                 &is_inexact_gt_midpoint);
2151                 R64 = R256.w[0]; // one decimal digit
2152               }
2153               if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2154                   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2155                 // the result is exact: 10^34 - R64
2156                 // incr_exp = 0 with certainty
2157                 z_exp = z_exp - EXP_P1;
2158                 e3 = e3 - 1;
2159                 res.w[1] =
2160                   z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
2161                 res.w[0] = 0x378d8e6400000000ull - R64;
2162               } else {
2163                 // We want R64 to be the top digit of C4, but we actually
2164                 // obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
2165                 // because the top digit is (C4 * 10^(-q4+1))RZ
2166                 // however, if incr_exp = 1 then R64 = 10 with certainty
2167                 if (incr_exp) {
2168                   R64 = 10;
2169                 }
2170                 // the result is inexact as C4 has more than 1 significant digit
2171                 // and C3 * 10^scale = 10^33
2172                 // example of case that is treated here:
2173                 // 100...0 * 10^e3 - 0.41 * 10^e3 =
2174                 // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
2175                 // note that (e3 > expmin}
2176                 // in order to round, subtract R64 from 10^34 and then compare
2177                 // C4 - R64 * 10^(q4-1) with 1/2 ulp
2178                 // calculate 10^34 - R64
2179                 res.w[1] = 0x0001ed09bead87c0ull;
2180                 res.w[0] = 0x378d8e6400000000ull - R64;
2181                 z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
2182                 // calculate C4 - R64 * 10^(q4-1); this is a rare case and
2183                 // R64 is small, 1 <= R64 <= 9
2184                 e3 = e3 - 1;
2185                 if (is_inexact_lt_midpoint) {
2186                   is_inexact_lt_midpoint = 0;
2187                   is_inexact_gt_midpoint = 1;
2188                 } else if (is_inexact_gt_midpoint) {
2189                   is_inexact_gt_midpoint = 0;
2190                   is_inexact_lt_midpoint = 1;
2191                 } else if (is_midpoint_lt_even) {
2192                   is_midpoint_lt_even = 0;
2193                   is_midpoint_gt_even = 1;
2194                 } else if (is_midpoint_gt_even) {
2195                   is_midpoint_gt_even = 0;
2196                   is_midpoint_lt_even = 1;
2197                 } else {
2198                   ;
2199                 }
2200                 // the result is always inexact, and never tiny
2201                 // check for overflow for RN
2202                 if (e3 > expmax) {
2203                   if (rnd_mode == ROUNDING_TO_NEAREST) {
2204                     res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2205                     res.w[0] = 0x0000000000000000ull;
2206                     *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2207                   } else {
2208                     rounding_correction (rnd_mode,
2209                                          is_inexact_lt_midpoint,
2210                                          is_inexact_gt_midpoint,
2211                                          is_midpoint_lt_even,
2212                                          is_midpoint_gt_even, e3, &res,
2213                                          pfpsf);
2214                   }
2215                   *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2216                   *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2217                   *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2218                   *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2219                   BID_SWAP128 (res);
2220                   BID_RETURN (res)
2221                 }
2222                 // set the inexact flag
2223                 *pfpsf |= INEXACT_EXCEPTION;
2224                 res.w[1] =
2225                   z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2226                 if (rnd_mode != ROUNDING_TO_NEAREST) {
2227                   rounding_correction (rnd_mode,
2228                                        is_inexact_lt_midpoint,
2229                                        is_inexact_gt_midpoint,
2230                                        is_midpoint_lt_even,
2231                                        is_midpoint_gt_even, e3, &res,
2232                                        pfpsf);
2233                 }
2234                 z_exp = res.w[1] & MASK_EXP;
2235               } // end result is inexact
2236             } // end q4 > 1
2237           } else { // if (e3 = emin)
2238             // if e3 = expmin the result is also tiny (the condition for
2239             // tininess is C4 > 050...0 [q4 digits] which is met because
2240             // the msd of C4 is not zero)
2241             // the result is tiny and inexact in all rounding modes;
2242             // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp,
2243             // gt_half_ulp to calculate)
2244             // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged
2245
2246             // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
2247             if (gt_half_ulp) { // res = 10^33 - 1
2248               res.w[1] = 0x0000314dc6448d93ull;
2249               res.w[0] = 0x38c15b09ffffffffull;
2250             } else {
2251               res.w[1] = 0x0000314dc6448d93ull;
2252               res.w[0] = 0x38c15b0a00000000ull;
2253             }
2254             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2255             *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later
2256
2257             if (eq_half_ulp) {
2258               is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2259             } else if (lt_half_ulp) {
2260               is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value
2261             } else { // if (gt_half_ulp)
2262               is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2263             }
2264
2265             if (rnd_mode != ROUNDING_TO_NEAREST) {
2266               rounding_correction (rnd_mode,
2267                   is_inexact_lt_midpoint,
2268                   is_inexact_gt_midpoint,
2269                   is_midpoint_lt_even,
2270                   is_midpoint_gt_even, e3, &res,
2271                   pfpsf);
2272               z_exp = res.w[1] & MASK_EXP;
2273             }
2274           } // end e3 = emin
2275           // set the inexact flag (if the result was not exact)
2276           if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2277               is_midpoint_lt_even || is_midpoint_gt_even)
2278             *pfpsf |= INEXACT_EXCEPTION;
2279         } // end 10^33
2280       } // end if (p_sign != z_sign)
2281       res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2282       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2283       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2284       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2285       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2286       BID_SWAP128 (res);
2287       BID_RETURN (res)
2288
2289     } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2290         (q3 <= delta && delta + q4 <= p34) || // Case (3)
2291         (delta < q3 && p34 < delta + q4) || // Case (4)
2292         (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5)
2293         (delta + q4 < q3)) && // Case (6)
2294         !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6)
2295
2296       // the result has the sign of z
2297
2298       if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2299           (delta < q3 && p34 < delta + q4)) { // Case (4)
2300         // round first the sum x * y + z with unbounded exponent
2301         // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1,
2302         // 1 <= scale <= 33
2303         // calculate res = C3 * 10^scale
2304         scale = p34 - q3;
2305         x0 = delta + q4 - p34;
2306       } else if (delta + q4 < q3) { // Case (6)
2307         // make Case (6) look like Case (3) or Case (5) with scale = 0
2308         // by scaling up C4 by 10^(q3 - delta - q4)
2309         scale = q3 - delta - q4; // 1 <= scale <= 33
2310         if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
2311           if (scale <= 19) { // 10^scale fits in 64 bits
2312             // 64 x 64 C4.w[0] * ten2k64[scale]
2313             __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]);
2314           } else { // 10^scale fits in 128 bits
2315             // 64 x 128 C4.w[0] * ten2k128[scale - 20]
2316             __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]);
2317           }
2318         } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
2319           // 64 x 128 ten2k64[scale] * C4
2320           __mul_128x64_to_128 (P128, ten2k64[scale], C4);
2321         }
2322         C4.w[0] = P128.w[0];
2323         C4.w[1] = P128.w[1];
2324         // e4 does not need adjustment, as it is not used from this point on
2325         scale = 0;
2326         x0 = 0;
2327         // now Case (6) looks like Case (3) or Case (5) with scale = 0
2328       } else { // if Case (3) or Case (5)
2329         // Note: Case (3) is similar to Case (2), but scale differs and the
2330         // result is exact, unless it is tiny (so x0 = 0 when calculating the
2331         // result with unbounded exponent)
2332
2333         // calculate first the sum x * y + z with unbounded exponent (exact)
2334         // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
2335         // 1 <= scale <= 33
2336         // calculate res = C3 * 10^scale
2337         scale = delta + q4 - q3;
2338         x0 = 0;
2339         // Note: the comments which follow refer [mainly] to Case (2)]
2340       }
2341
2342     case2_repeat:
2343       if (scale == 0) { // this could happen e.g. if we return to case2_repeat
2344         // or in Case (4)
2345         res.w[1] = C3.w[1];
2346         res.w[0] = C3.w[0];
2347       } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits
2348         if (scale <= 19) { // 10^scale fits in 64 bits
2349           // 64 x 64 C3.w[0] * ten2k64[scale]
2350           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
2351         } else { // 10^scale fits in 128 bits
2352           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
2353           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
2354         }
2355       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
2356         // 64 x 128 ten2k64[scale] * C3
2357         __mul_128x64_to_128 (res, ten2k64[scale], C3);
2358       }
2359       // e3 is already calculated
2360       e3 = e3 - scale;
2361       // now res = C3 * 10^scale and e3 = e3 - scale
2362       // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
2363       // because the result was too small
2364
2365       // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
2366       // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
2367       // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
2368       // the rounding fits in 128 bits!)
2369       // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
2370       // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
2371       if (x0 == 0) { // this could happen only if we return to case2_repeat, or
2372         // for Case (3) or Case (6)
2373         R128.w[1] = C4.w[1];
2374         R128.w[0] = C4.w[0];
2375       } else if (q4 <= 18) {
2376         // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
2377         round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
2378             &is_midpoint_lt_even, &is_midpoint_gt_even,
2379             &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
2380         if (incr_exp) {
2381           // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
2382           R64 = ten2k64[q4 - x0];
2383         }
2384         R128.w[1] = 0;
2385         R128.w[0] = R64;
2386       } else if (q4 <= 38) {
2387         // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
2388         P128.w[1] = C4.w[1];
2389         P128.w[0] = C4.w[0];
2390         round128_19_38 (q4, x0, P128, &R128, &incr_exp,
2391             &is_midpoint_lt_even, &is_midpoint_gt_even,
2392             &is_inexact_lt_midpoint,
2393             &is_inexact_gt_midpoint);
2394         if (incr_exp) {
2395           // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
2396           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2397             R128.w[0] = ten2k64[q4 - x0];
2398             // R128.w[1] stays 0
2399           } else { // 20 <= q4 - x0 <= 37
2400             R128.w[0] = ten2k128[q4 - x0 - 20].w[0];
2401             R128.w[1] = ten2k128[q4 - x0 - 20].w[1];
2402           }
2403         }
2404       } else if (q4 <= 57) {
2405         // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
2406         P192.w[2] = C4.w[2];
2407         P192.w[1] = C4.w[1];
2408         P192.w[0] = C4.w[0];
2409         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2410             &is_midpoint_lt_even, &is_midpoint_gt_even,
2411             &is_inexact_lt_midpoint,
2412             &is_inexact_gt_midpoint);
2413         // R192.w[2] is always 0
2414         if (incr_exp) {
2415           // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
2416           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2417             R192.w[0] = ten2k64[q4 - x0];
2418             // R192.w[1] stays 0
2419             // R192.w[2] stays 0
2420           } else { // 20 <= q4 - x0 <= 33
2421             R192.w[0] = ten2k128[q4 - x0 - 20].w[0];
2422             R192.w[1] = ten2k128[q4 - x0 - 20].w[1];
2423             // R192.w[2] stays 0
2424           }
2425         }
2426         R128.w[1] = R192.w[1];
2427         R128.w[0] = R192.w[0];
2428       } else {
2429         // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
2430         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2431             &is_midpoint_lt_even, &is_midpoint_gt_even,
2432             &is_inexact_lt_midpoint,
2433             &is_inexact_gt_midpoint);
2434         // R256.w[3] and R256.w[2] are always 0
2435         if (incr_exp) {
2436           // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
2437           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2438             R256.w[0] = ten2k64[q4 - x0];
2439             // R256.w[1] stays 0
2440             // R256.w[2] stays 0
2441             // R256.w[3] stays 0
2442           } else { // 20 <= q4 - x0 <= 33
2443             R256.w[0] = ten2k128[q4 - x0 - 20].w[0];
2444             R256.w[1] = ten2k128[q4 - x0 - 20].w[1];
2445             // R256.w[2] stays 0
2446             // R256.w[3] stays 0
2447           }
2448         }
2449         R128.w[1] = R256.w[1];
2450         R128.w[0] = R256.w[0];
2451       }
2452       // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
2453       // rounded to nearest, which were copied into R128
2454       if (z_sign == p_sign) {
2455         lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale
2456         // the sum can result in [up to] p34 or p34 + 1 digits
2457         res.w[0] = res.w[0] + R128.w[0];
2458         res.w[1] = res.w[1] + R128.w[1];
2459         if (res.w[0] < R128.w[0])
2460           res.w[1]++; // carry
2461         // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
2462         if (res.w[1] > 0x0001ed09bead87c0ull ||
2463             (res.w[1] == 0x0001ed09bead87c0ull &&
2464              res.w[0] > 0x378d8e63ffffffffull)) {
2465           // avoid double rounding error
2466           is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2467           is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2468           is_midpoint_lt_even0 = is_midpoint_lt_even;
2469           is_midpoint_gt_even0 = is_midpoint_gt_even;
2470           is_inexact_lt_midpoint = 0;
2471           is_inexact_gt_midpoint = 0;
2472           is_midpoint_lt_even = 0;
2473           is_midpoint_gt_even = 0;
2474           P128.w[1] = res.w[1];
2475           P128.w[0] = res.w[0];
2476           round128_19_38 (35, 1, P128, &res, &incr_exp,
2477               &is_midpoint_lt_even, &is_midpoint_gt_even,
2478               &is_inexact_lt_midpoint,
2479               &is_inexact_gt_midpoint);
2480           // incr_exp is 0 with certainty in this case
2481           // avoid a double rounding error
2482           if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2483               is_midpoint_lt_even) { // double rounding error upward
2484             // res = res - 1
2485             res.w[0]--;
2486             if (res.w[0] == 0xffffffffffffffffull)
2487               res.w[1]--;
2488             // Note: a double rounding error upward is not possible; for this
2489             // the result after the first rounding would have to be 99...95
2490             // (35 digits in all), possibly followed by a number of zeros; this
2491             // not possible in Cases (2)-(6) or (15)-(17) which may get here
2492             is_midpoint_lt_even = 0;
2493             is_inexact_lt_midpoint = 1;
2494           } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2495               is_midpoint_gt_even) { // double rounding error downward
2496             // res = res + 1
2497             res.w[0]++;
2498             if (res.w[0] == 0)
2499               res.w[1]++;
2500             is_midpoint_gt_even = 0;
2501             is_inexact_gt_midpoint = 1;
2502           } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2503                      !is_inexact_lt_midpoint
2504                      && !is_inexact_gt_midpoint) {
2505             // if this second rounding was exact the result may still be
2506             // inexact because of the first rounding
2507             if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2508               is_inexact_gt_midpoint = 1;
2509             }
2510             if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2511               is_inexact_lt_midpoint = 1;
2512             }
2513           } else if (is_midpoint_gt_even &&
2514                      (is_inexact_gt_midpoint0
2515                       || is_midpoint_lt_even0)) {
2516             // pulled up to a midpoint
2517             is_inexact_lt_midpoint = 1;
2518             is_inexact_gt_midpoint = 0;
2519             is_midpoint_lt_even = 0;
2520             is_midpoint_gt_even = 0;
2521           } else if (is_midpoint_lt_even &&
2522                      (is_inexact_lt_midpoint0
2523                       || is_midpoint_gt_even0)) {
2524             // pulled down to a midpoint
2525             is_inexact_lt_midpoint = 0;
2526             is_inexact_gt_midpoint = 1;
2527             is_midpoint_lt_even = 0;
2528             is_midpoint_gt_even = 0;
2529           } else {
2530             ;
2531           }
2532           // adjust exponent
2533           e3 = e3 + 1;
2534           if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2535               !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2536             if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2537                 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2538               is_inexact_lt_midpoint = 1;
2539             }
2540           }
2541         } else {
2542           // this is the result rounded with unbounded exponent, unless a
2543           // correction is needed
2544           res.w[1] = res.w[1] & MASK_COEFF;
2545           if (lsb == 1) {
2546             if (is_midpoint_gt_even) {
2547               // res = res + 1
2548               is_midpoint_gt_even = 0;
2549               is_midpoint_lt_even = 1;
2550               res.w[0]++;
2551               if (res.w[0] == 0x0)
2552                 res.w[1]++;
2553               // check for rounding overflow
2554               if (res.w[1] == 0x0001ed09bead87c0ull &&
2555                   res.w[0] == 0x378d8e6400000000ull) {
2556                 // res = 10^34 => rounding overflow
2557                 res.w[1] = 0x0000314dc6448d93ull;
2558                 res.w[0] = 0x38c15b0a00000000ull; // 10^33
2559                 e3++;
2560               }
2561             } else if (is_midpoint_lt_even) {
2562               // res = res - 1
2563               is_midpoint_lt_even = 0;
2564               is_midpoint_gt_even = 1;
2565               res.w[0]--;
2566               if (res.w[0] == 0xffffffffffffffffull)
2567                 res.w[1]--;
2568               // if the result is pure zero, the sign depends on the rounding
2569               // mode (x*y and z had opposite signs)
2570               if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2571                 if (rnd_mode != ROUNDING_DOWN)
2572                   z_sign = 0x0000000000000000ull;
2573                 else
2574                   z_sign = 0x8000000000000000ull;
2575                 // the exponent is max (e3, expmin)
2576                 res.w[1] = 0x0;
2577                 res.w[0] = 0x0;
2578                 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2579                 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2580                 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2581                 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2582                 BID_SWAP128 (res);
2583                 BID_RETURN (res)
2584               }
2585             } else {
2586               ;
2587             }
2588           }
2589         }
2590       } else { // if (z_sign != p_sign)
2591         lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
2592         // used to swap rounding indicators if p_sign != z_sign
2593         // the sum can result in [up to] p34 or p34 - 1 digits
2594         tmp64 = res.w[0];
2595         res.w[0] = res.w[0] - R128.w[0];
2596         res.w[1] = res.w[1] - R128.w[1];
2597         if (res.w[0] > tmp64)
2598           res.w[1]--; // borrow
2599         // if res < 10^33 and exp > expmin need to decrease x0 and
2600         // increase scale by 1
2601         if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull ||
2602                              (res.w[1] == 0x0000314dc6448d93ull &&
2603                               res.w[0] < 0x38c15b0a00000000ull)) ||
2604                             (is_inexact_lt_midpoint
2605                              && res.w[1] == 0x0000314dc6448d93ull
2606                              && res.w[0] == 0x38c15b0a00000000ull))
2607             && x0 >= 1) {
2608           x0 = x0 - 1;
2609           // first restore e3, otherwise it will be too small
2610           e3 = e3 + scale;
2611           scale = scale + 1;
2612           is_inexact_lt_midpoint = 0;
2613           is_inexact_gt_midpoint = 0;
2614           is_midpoint_lt_even = 0;
2615           is_midpoint_gt_even = 0;
2616           incr_exp = 0;
2617           goto case2_repeat;
2618         }
2619         // else this is the result rounded with unbounded exponent;
2620         // because the result has opposite sign to that of C4 which was
2621         // rounded, need to change the rounding indicators
2622         if (is_inexact_lt_midpoint) {
2623           is_inexact_lt_midpoint = 0;
2624           is_inexact_gt_midpoint = 1;
2625         } else if (is_inexact_gt_midpoint) {
2626           is_inexact_gt_midpoint = 0;
2627           is_inexact_lt_midpoint = 1;
2628         } else if (lsb == 0) {
2629           if (is_midpoint_lt_even) {
2630             is_midpoint_lt_even = 0;
2631             is_midpoint_gt_even = 1;
2632           } else if (is_midpoint_gt_even) {
2633             is_midpoint_gt_even = 0;
2634             is_midpoint_lt_even = 1;
2635           } else {
2636             ;
2637           }
2638         } else if (lsb == 1) {
2639           if (is_midpoint_lt_even) {
2640             // res = res + 1
2641             res.w[0]++;
2642             if (res.w[0] == 0x0)
2643               res.w[1]++;
2644             // check for rounding overflow
2645             if (res.w[1] == 0x0001ed09bead87c0ull &&
2646                 res.w[0] == 0x378d8e6400000000ull) {
2647               // res = 10^34 => rounding overflow
2648               res.w[1] = 0x0000314dc6448d93ull;
2649               res.w[0] = 0x38c15b0a00000000ull; // 10^33
2650               e3++;
2651             }
2652           } else if (is_midpoint_gt_even) {
2653             // res = res - 1
2654             res.w[0]--;
2655             if (res.w[0] == 0xffffffffffffffffull)
2656               res.w[1]--;
2657             // if the result is pure zero, the sign depends on the rounding
2658             // mode (x*y and z had opposite signs)
2659             if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2660               if (rnd_mode != ROUNDING_DOWN)
2661                 z_sign = 0x0000000000000000ull;
2662               else
2663                 z_sign = 0x8000000000000000ull;
2664               // the exponent is max (e3, expmin)
2665               res.w[1] = 0x0;
2666               res.w[0] = 0x0;
2667               *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2668               *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2669               *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2670               *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2671               BID_SWAP128 (res);
2672               BID_RETURN (res)
2673             }
2674           } else {
2675             ;
2676           }
2677         } else {
2678           ;
2679         }
2680       }
2681       // check for underflow
2682       if (e3 == expmin) { // and if significand < 10^33 => result is tiny
2683         if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
2684             ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
2685              res.w[0] < 0x38c15b0a00000000ull)) {
2686           is_tiny = 1;
2687         }
2688       } else if (e3 < expmin) {
2689         // the result is tiny, so we must truncate more of res
2690         is_tiny = 1;
2691         x0 = expmin - e3;
2692         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2693         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2694         is_midpoint_lt_even0 = is_midpoint_lt_even;
2695         is_midpoint_gt_even0 = is_midpoint_gt_even;
2696         is_inexact_lt_midpoint = 0;
2697         is_inexact_gt_midpoint = 0;
2698         is_midpoint_lt_even = 0;
2699         is_midpoint_gt_even = 0;
2700         // determine the number of decimal digits in res
2701         if (res.w[1] == 0x0) {
2702           // between 1 and 19 digits
2703           for (ind = 1; ind <= 19; ind++) {
2704             if (res.w[0] < ten2k64[ind]) {
2705               break;
2706             }
2707           }
2708           // ind digits
2709         } else if (res.w[1] < ten2k128[0].w[1] ||
2710                    (res.w[1] == ten2k128[0].w[1]
2711                     && res.w[0] < ten2k128[0].w[0])) {
2712           // 20 digits
2713           ind = 20;
2714         } else { // between 21 and 38 digits
2715           for (ind = 1; ind <= 18; ind++) {
2716             if (res.w[1] < ten2k128[ind].w[1] ||
2717                 (res.w[1] == ten2k128[ind].w[1] &&
2718                  res.w[0] < ten2k128[ind].w[0])) {
2719               break;
2720             }
2721           }
2722           // ind + 20 digits
2723           ind = ind + 20;
2724         }
2725
2726         // at this point ind >= x0; because delta >= 2 on this path, the case
2727         // ind = x0 can occur only in Case (2) or case (3), when C3 has one
2728         // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin),
2729         // the signs of x * y and z are opposite, and through cancellation
2730         // the most significant decimal digit in res has the weight
2731         // 10^(emin-1); however, it is clear that in this case the most
2732         // significant digit is 9, so the result before rounding is
2733         // 0.9... * 10^emin
2734         // Otherwise, ind > x0 because there are non-zero decimal digits in the
2735         // result with weight of at least 10^emin, and correction for underflow
2736         //  can be carried out using the round*_*_2_* () routines
2737         if (x0 == ind) { // the result before rounding is 0.9... * 10^emin
2738           res.w[1] = 0x0;
2739           res.w[0] = 0x1;
2740           is_inexact_gt_midpoint = 1;
2741         } else if (ind <= 18) { // check that 2 <= ind
2742           // 2 <= ind <= 18, 1 <= x0 <= 17
2743           round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
2744                         &is_midpoint_lt_even, &is_midpoint_gt_even,
2745                         &is_inexact_lt_midpoint,
2746                         &is_inexact_gt_midpoint);
2747           if (incr_exp) {
2748             // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
2749             R64 = ten2k64[ind - x0];
2750           }
2751           res.w[1] = 0;
2752           res.w[0] = R64;
2753         } else if (ind <= 38) {
2754           // 19 <= ind <= 38
2755           P128.w[1] = res.w[1];
2756           P128.w[0] = res.w[0];
2757           round128_19_38 (ind, x0, P128, &res, &incr_exp,
2758                           &is_midpoint_lt_even, &is_midpoint_gt_even,
2759                           &is_inexact_lt_midpoint,
2760                           &is_inexact_gt_midpoint);
2761           if (incr_exp) {
2762             // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
2763             if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19
2764               res.w[0] = ten2k64[ind - x0];
2765               // res.w[1] stays 0
2766             } else { // 20 <= ind - x0 <= 37
2767               res.w[0] = ten2k128[ind - x0 - 20].w[0];
2768               res.w[1] = ten2k128[ind - x0 - 20].w[1];
2769             }
2770           }
2771         }
2772         // avoid a double rounding error
2773         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2774             is_midpoint_lt_even) { // double rounding error upward
2775           // res = res - 1
2776           res.w[0]--;
2777           if (res.w[0] == 0xffffffffffffffffull)
2778             res.w[1]--;
2779           // Note: a double rounding error upward is not possible; for this
2780           // the result after the first rounding would have to be 99...95
2781           // (35 digits in all), possibly followed by a number of zeros; this
2782           // not possible in Cases (2)-(6) which may get here
2783           is_midpoint_lt_even = 0;
2784           is_inexact_lt_midpoint = 1;
2785         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2786             is_midpoint_gt_even) { // double rounding error downward
2787           // res = res + 1
2788           res.w[0]++;
2789           if (res.w[0] == 0)
2790             res.w[1]++;
2791           is_midpoint_gt_even = 0;
2792           is_inexact_gt_midpoint = 1;
2793         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2794                    !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2795           // if this second rounding was exact the result may still be
2796           // inexact because of the first rounding
2797           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2798             is_inexact_gt_midpoint = 1;
2799           }
2800           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2801             is_inexact_lt_midpoint = 1;
2802           }
2803         } else if (is_midpoint_gt_even &&
2804                    (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
2805           // pulled up to a midpoint
2806           is_inexact_lt_midpoint = 1;
2807           is_inexact_gt_midpoint = 0;
2808           is_midpoint_lt_even = 0;
2809           is_midpoint_gt_even = 0;
2810         } else if (is_midpoint_lt_even &&
2811                    (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
2812           // pulled down to a midpoint
2813           is_inexact_lt_midpoint = 0;
2814           is_inexact_gt_midpoint = 1;
2815           is_midpoint_lt_even = 0;
2816           is_midpoint_gt_even = 0;
2817         } else {
2818           ;
2819         }
2820         // adjust exponent
2821         e3 = e3 + x0;
2822         if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2823             !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2824           if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2825               is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2826             is_inexact_lt_midpoint = 1;
2827           }
2828         }
2829       } else {
2830         ; // not underflow
2831       }
2832       // check for inexact result
2833       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2834           is_midpoint_lt_even || is_midpoint_gt_even) {
2835         // set the inexact flag
2836         *pfpsf |= INEXACT_EXCEPTION;
2837         if (is_tiny)
2838           *pfpsf |= UNDERFLOW_EXCEPTION;
2839       }
2840       // now check for significand = 10^34 (may have resulted from going
2841       // back to case2_repeat)
2842       if (res.w[1] == 0x0001ed09bead87c0ull &&
2843           res.w[0] == 0x378d8e6400000000ull) { // if  res = 10^34
2844         res.w[1] = 0x0000314dc6448d93ull; // res = 10^33
2845         res.w[0] = 0x38c15b0a00000000ull;
2846         e3 = e3 + 1;
2847       }
2848       res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2849       // check for overflow
2850       if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) {
2851         res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2852         res.w[0] = 0x0000000000000000ull;
2853         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2854       }
2855       if (rnd_mode != ROUNDING_TO_NEAREST) {
2856         rounding_correction (rnd_mode,
2857                              is_inexact_lt_midpoint,
2858                              is_inexact_gt_midpoint,
2859                              is_midpoint_lt_even, is_midpoint_gt_even,
2860                              e3, &res, pfpsf);
2861       }
2862       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2863       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2864       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2865       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2866       BID_SWAP128 (res);
2867       BID_RETURN (res)
2868
2869     } else {
2870
2871       // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
2872       // the signs of x*y and z are opposite; in these cases massive
2873       // cancellation can occur, so it is better to scale either C3 or C4 and
2874       // to perform the subtraction before rounding; rounding is performed
2875       // next, depending on the number of decimal digits in the result and on
2876       // the exponent value
2877       // Note: overlow is not possible in this case
2878       // this is similar to Cases (15), (16), and (17)
2879
2880       if (delta + q4 < q3) { // from Case (6)
2881         // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and
2882         // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
2883         // and call add_and_round; delta stays positive
2884         // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
2885         P128.w[1] = C3.w[1];
2886         P128.w[0] = C3.w[0];
2887         C3.w[1] = C4.w[1];
2888         C3.w[0] = C4.w[0];
2889         C4.w[1] = P128.w[1];
2890         C4.w[0] = P128.w[0];
2891         ind = q3;
2892         q3 = q4;
2893         q4 = ind;
2894         ind = e3;
2895         e3 = e4;
2896         e4 = ind;
2897         tmp_sign = z_sign;
2898         z_sign = p_sign;
2899         p_sign = tmp_sign;
2900       } else { // from Cases (2), (3), (4), (5)
2901         // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be
2902         // scaled up by q4 + delta - q3; this is the same as in Cases (15),
2903         // (16), and (17) if we just change the sign of delta
2904         delta = -delta;
2905       }
2906       add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
2907                      rnd_mode, &is_midpoint_lt_even,
2908                      &is_midpoint_gt_even, &is_inexact_lt_midpoint,
2909                      &is_inexact_gt_midpoint, pfpsf, &res);
2910       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2911       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2912       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2913       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2914       BID_SWAP128 (res);
2915       BID_RETURN (res)
2916
2917     }
2918
2919   } else { // if delta < 0
2920
2921     delta = -delta;
2922
2923     if (p34 < q4 && q4 <= delta) { // Case (7)
2924
2925       // truncate C4 to p34 digits into res
2926       // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
2927       x0 = q4 - p34;
2928       if (q4 <= 38) {
2929         P128.w[1] = C4.w[1];
2930         P128.w[0] = C4.w[0];
2931         round128_19_38 (q4, x0, P128, &res, &incr_exp,
2932                         &is_midpoint_lt_even, &is_midpoint_gt_even,
2933                         &is_inexact_lt_midpoint,
2934                         &is_inexact_gt_midpoint);
2935       } else if (q4 <= 57) { // 35 <= q4 <= 57
2936         P192.w[2] = C4.w[2];
2937         P192.w[1] = C4.w[1];
2938         P192.w[0] = C4.w[0];
2939         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2940                         &is_midpoint_lt_even, &is_midpoint_gt_even,
2941                         &is_inexact_lt_midpoint,
2942                         &is_inexact_gt_midpoint);
2943         res.w[0] = R192.w[0];
2944         res.w[1] = R192.w[1];
2945       } else { // if (q4 <= 68)
2946         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2947                         &is_midpoint_lt_even, &is_midpoint_gt_even,
2948                         &is_inexact_lt_midpoint,
2949                         &is_inexact_gt_midpoint);
2950         res.w[0] = R256.w[0];
2951         res.w[1] = R256.w[1];
2952       }
2953       e4 = e4 + x0;
2954       if (incr_exp) {
2955         e4 = e4 + 1;
2956       }
2957       if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2958           !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2959         // if C4 rounded to p34 digits is exact then the result is inexact,
2960         // in a way that depends on the signs of x * y and z
2961         if (p_sign == z_sign) {
2962           is_inexact_lt_midpoint = 1;
2963         } else { // if (p_sign != z_sign)
2964           if (res.w[1] != 0x0000314dc6448d93ull ||
2965               res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33
2966             is_inexact_gt_midpoint = 1;
2967           } else { // res = 10^33 and exact is a special case
2968             // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
2969             // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
2970             // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
2971             // Note: ulp is really ulp/10 (after borrow which propagates to msd)
2972             if (delta > p34 + 1) { // C3 < 1/2
2973               // res = 10^33, unchanged
2974               is_inexact_gt_midpoint = 1;
2975             } else { // if (delta == p34 + 1)
2976               if (q3 <= 19) {
2977                 if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp
2978                   // res = 10^33, unchanged
2979                   is_inexact_gt_midpoint = 1;
2980                 } else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp
2981                   // res = 10^33, unchanged
2982                   is_midpoint_lt_even = 1;
2983                 } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
2984                   res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2985                   res.w[0] = 0x378d8e63ffffffffull;
2986                   e4 = e4 - 1;
2987                   is_inexact_lt_midpoint = 1;
2988                 }
2989               } else { // if (20 <= q3 <=34)
2990                 if (C3.w[1] < midpoint128[q3 - 20].w[1] ||
2991                     (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2992                     C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
2993                   // res = 10^33, unchanged
2994                   is_inexact_gt_midpoint = 1;
2995                 } else if (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2996                     C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
2997                   // res = 10^33, unchanged
2998                   is_midpoint_lt_even = 1;
2999                 } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
3000                   res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3001                   res.w[0] = 0x378d8e63ffffffffull;
3002                   e4 = e4 - 1;
3003                   is_inexact_lt_midpoint = 1;
3004                 }
3005               }
3006             }
3007           }
3008         }
3009       } else if (is_midpoint_lt_even) {
3010         if (z_sign != p_sign) {
3011           // needs correction: res = res - 1
3012           res.w[0] = res.w[0] - 1;
3013           if (res.w[0] == 0xffffffffffffffffull)
3014             res.w[1]--;
3015           // if it is (10^33-1)*10^e4 then the corect result is
3016           // (10^34-1)*10(e4-1)
3017           if (res.w[1] == 0x0000314dc6448d93ull &&
3018               res.w[0] == 0x38c15b09ffffffffull) {
3019             res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3020             res.w[0] = 0x378d8e63ffffffffull;
3021             e4 = e4 - 1;
3022           }
3023           is_midpoint_lt_even = 0;
3024           is_inexact_lt_midpoint = 1;
3025         } else { // if (z_sign == p_sign)
3026           is_midpoint_lt_even = 0;
3027           is_inexact_gt_midpoint = 1;
3028         }
3029       } else if (is_midpoint_gt_even) {
3030         if (z_sign == p_sign) {
3031           // needs correction: res = res + 1 (cannot cross in the next binade)
3032           res.w[0] = res.w[0] + 1;
3033           if (res.w[0] == 0x0000000000000000ull)
3034             res.w[1]++;
3035           is_midpoint_gt_even = 0;
3036           is_inexact_gt_midpoint = 1;
3037         } else { // if (z_sign != p_sign)
3038           is_midpoint_gt_even = 0;
3039           is_inexact_lt_midpoint = 1;
3040         }
3041       } else {
3042         ; // the rounded result is already correct
3043       }
3044       // check for overflow
3045       if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) {
3046         res.w[1] = p_sign | 0x7800000000000000ull;
3047         res.w[0] = 0x0000000000000000ull;
3048         *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
3049       } else { // no overflow or not RN
3050         p_exp = ((UINT64) (e4 + 6176) << 49);
3051         res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
3052       }
3053       if (rnd_mode != ROUNDING_TO_NEAREST) {
3054         rounding_correction (rnd_mode,
3055                              is_inexact_lt_midpoint,
3056                              is_inexact_gt_midpoint,
3057                              is_midpoint_lt_even, is_midpoint_gt_even,
3058                              e4, &res, pfpsf);
3059       }
3060       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
3061           is_midpoint_lt_even || is_midpoint_gt_even) {
3062         // set the inexact flag
3063         *pfpsf |= INEXACT_EXCEPTION;
3064       }
3065       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3066       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3067       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3068       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3069       BID_SWAP128 (res);
3070       BID_RETURN (res)
3071
3072     } else if ((q4 <= p34 && p34 <= delta) || // Case (8)
3073         (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9)
3074         (q4 <= delta && delta + q3 <= p34) || // Case (10)
3075         (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13)
3076         (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14)
3077         (delta + q3 < q4 && q4 <= p34)) { // Case (18)
3078
3079       // Case (8) is similar to Case (1), with C3 and C4 swapped
3080       // Case (9) is similar to Case (2), with C3 and C4 swapped
3081       // Case (10) is similar to Case (3), with C3 and C4 swapped
3082       // Case (13) is similar to Case (4), with C3 and C4 swapped
3083       // Case (14) is similar to Case (5), with C3 and C4 swapped
3084       // Case (18) is similar to Case (6), with C3 and C4 swapped
3085
3086       // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
3087       // and go back to delta_ge_zero
3088       // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
3089       P128.w[1] = C3.w[1];
3090       P128.w[0] = C3.w[0];
3091       C3.w[1] = C4.w[1];
3092       C3.w[0] = C4.w[0];
3093       C4.w[1] = P128.w[1];
3094       C4.w[0] = P128.w[0];
3095       ind = q3;
3096       q3 = q4;
3097       q4 = ind;
3098       ind = e3;
3099       e3 = e4;
3100       e4 = ind;
3101       tmp_sign = z_sign;
3102       z_sign = p_sign;
3103       p_sign = tmp_sign;
3104       tmp.ui64 = z_exp;
3105       z_exp = p_exp;
3106       p_exp = tmp.ui64;
3107       goto delta_ge_zero;
3108
3109     } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11)
3110                (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12)
3111
3112       // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
3113       // 1 <= x0 <= q3 - 1 <= p34 - 1
3114       x0 = e4 - e3; // or x0 = delta + q3 - q4
3115       if (q3 <= 18) { // 2 <= q3 <= 18
3116         round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
3117                       &is_midpoint_lt_even, &is_midpoint_gt_even,
3118                       &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
3119         // C3.w[1] = 0;
3120         C3.w[0] = R64;
3121       } else if (q3 <= 38) {
3122         round128_19_38 (q3, x0, C3, &R128, &incr_exp,
3123                         &is_midpoint_lt_even, &is_midpoint_gt_even,
3124                         &is_inexact_lt_midpoint,
3125                         &is_inexact_gt_midpoint);
3126         C3.w[1] = R128.w[1];
3127         C3.w[0] = R128.w[0];
3128       }
3129       // the rounded result has q3 - x0 digits
3130       // we want the exponent to be e4, so if incr_exp = 1 then
3131       // multiply the rounded result by 10 - it will still fit in 113 bits
3132       if (incr_exp) {
3133         // 64 x 128 -> 128
3134         P128.w[1] = C3.w[1];
3135         P128.w[0] = C3.w[0];
3136         __mul_64x128_to_128 (C3, ten2k64[1], P128);
3137       }
3138       e3 = e3 + x0; // this is e4
3139       // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3;
3140       // the result will have the sign of x * y; the exponent is e4
3141       R256.w[3] = 0;
3142       R256.w[2] = 0;
3143       R256.w[1] = C3.w[1];
3144       R256.w[0] = C3.w[0];
3145       if (p_sign == z_sign) { // R256 = C4 + R256
3146         add256 (C4, R256, &R256);
3147       } else { // if (p_sign != z_sign) { // R256 = C4 - R256
3148         sub256 (C4, R256, &R256); // the result cannot be pure zero
3149         // because the result has opposite sign to that of R256 which was
3150         // rounded, need to change the rounding indicators
3151         lsb = C4.w[0] & 0x01;
3152         if (is_inexact_lt_midpoint) {
3153           is_inexact_lt_midpoint = 0;
3154           is_inexact_gt_midpoint = 1;
3155         } else if (is_inexact_gt_midpoint) {
3156           is_inexact_gt_midpoint = 0;
3157           is_inexact_lt_midpoint = 1;
3158         } else if (lsb == 0) {
3159           if (is_midpoint_lt_even) {
3160             is_midpoint_lt_even = 0;
3161             is_midpoint_gt_even = 1;
3162           } else if (is_midpoint_gt_even) {
3163             is_midpoint_gt_even = 0;
3164             is_midpoint_lt_even = 1;
3165           } else {
3166             ;
3167           }
3168         } else if (lsb == 1) {
3169           if (is_midpoint_lt_even) {
3170             // res = res + 1
3171             R256.w[0]++;
3172             if (R256.w[0] == 0x0) {
3173               R256.w[1]++;
3174               if (R256.w[1] == 0x0) {
3175                 R256.w[2]++;
3176                 if (R256.w[2] == 0x0) {
3177                   R256.w[3]++;
3178                 }
3179               }
3180             }
3181             // no check for rounding overflow - R256 was a difference
3182           } else if (is_midpoint_gt_even) {
3183             // res = res - 1
3184             R256.w[0]--;
3185             if (R256.w[0] == 0xffffffffffffffffull) {
3186               R256.w[1]--;
3187               if (R256.w[1] == 0xffffffffffffffffull) {
3188                 R256.w[2]--;
3189                 if (R256.w[2] == 0xffffffffffffffffull) {
3190                   R256.w[3]--;
3191                 }
3192               }
3193             }
3194           } else {
3195             ;
3196           }
3197         } else {
3198           ;
3199         }
3200       }
3201       // determine the number of decimal digits in R256
3202       ind = nr_digits256 (R256); // ind >= p34
3203       // if R256 is sum, then ind > p34; if R256 is a difference, then
3204       // ind >= p34; this means that we can calculate the result rounded to
3205       // the destination precision, with unbounded exponent, starting from R256
3206       // and using the indicators from the rounding of C3 to avoid a double
3207       // rounding error
3208
3209       if (ind < p34) {
3210         ;
3211       } else if (ind == p34) {
3212         // the result rounded to the destination precision with
3213         // unbounded exponent
3214         // is (-1)^p_sign * R256 * 10^e4
3215         res.w[1] = R256.w[1];
3216         res.w[0] = R256.w[0];
3217       } else { // if (ind > p34)
3218         // if more than P digits, round to nearest to P digits
3219         // round R256 to p34 digits
3220         x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
3221         // save C3 rounding indicators to help avoid double rounding error
3222         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3223         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3224         is_midpoint_lt_even0 = is_midpoint_lt_even;
3225         is_midpoint_gt_even0 = is_midpoint_gt_even;
3226         // initialize rounding indicators
3227         is_inexact_lt_midpoint = 0;
3228         is_inexact_gt_midpoint = 0;
3229         is_midpoint_lt_even = 0;
3230         is_midpoint_gt_even = 0;
3231         // round to p34 digits; the result fits in 113 bits
3232         if (ind <= 38) {
3233           P128.w[1] = R256.w[1];
3234           P128.w[0] = R256.w[0];
3235           round128_19_38 (ind, x0, P128, &R128, &incr_exp,
3236                           &is_midpoint_lt_even, &is_midpoint_gt_even,
3237                           &is_inexact_lt_midpoint,
3238                           &is_inexact_gt_midpoint);
3239         } else if (ind <= 57) {
3240           P192.w[2] = R256.w[2];
3241           P192.w[1] = R256.w[1];
3242           P192.w[0] = R256.w[0];
3243           round192_39_57 (ind, x0, P192, &R192, &incr_exp,
3244                           &is_midpoint_lt_even, &is_midpoint_gt_even,
3245                           &is_inexact_lt_midpoint,
3246                           &is_inexact_gt_midpoint);
3247           R128.w[1] = R192.w[1];
3248           R128.w[0] = R192.w[0];
3249         } else { // if (ind <= 68)
3250           round256_58_76 (ind, x0, R256, &R256, &incr_exp,
3251                           &is_midpoint_lt_even, &is_midpoint_gt_even,
3252                           &is_inexact_lt_midpoint,
3253                           &is_inexact_gt_midpoint);
3254           R128.w[1] = R256.w[1];
3255           R128.w[0] = R256.w[0];
3256         }
3257         // the rounded result has p34 = 34 digits
3258         e4 = e4 + x0 + incr_exp;
3259
3260         res.w[1] = R128.w[1];
3261         res.w[0] = R128.w[0];
3262
3263         // avoid a double rounding error
3264         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3265             is_midpoint_lt_even) { // double rounding error upward
3266           // res = res - 1
3267           res.w[0]--;
3268           if (res.w[0] == 0xffffffffffffffffull)
3269             res.w[1]--;
3270           is_midpoint_lt_even = 0;
3271           is_inexact_lt_midpoint = 1;
3272           // Note: a double rounding error upward is not possible; for this
3273           // the result after the first rounding would have to be 99...95
3274           // (35 digits in all), possibly followed by a number of zeros; this
3275           // not possible in Cases (2)-(6) or (15)-(17) which may get here
3276           // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
3277           if (res.w[1] == 0x0000314dc6448d93ull &&
3278             res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
3279             res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3280             res.w[0] = 0x378d8e63ffffffffull;
3281             e4--;
3282           }
3283         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3284             is_midpoint_gt_even) { // double rounding error downward
3285           // res = res + 1
3286           res.w[0]++;
3287           if (res.w[0] == 0)
3288             res.w[1]++;
3289           is_midpoint_gt_even = 0;
3290           is_inexact_gt_midpoint = 1;
3291         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3292                    !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
3293           // if this second rounding was exact the result may still be
3294           // inexact because of the first rounding
3295           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3296             is_inexact_gt_midpoint = 1;
3297           }
3298           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3299             is_inexact_lt_midpoint = 1;
3300           }
3301         } else if (is_midpoint_gt_even &&
3302                    (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
3303           // pulled up to a midpoint
3304           is_inexact_lt_midpoint = 1;
3305           is_inexact_gt_midpoint = 0;
3306           is_midpoint_lt_even = 0;
3307           is_midpoint_gt_even = 0;
3308         } else if (is_midpoint_lt_even &&
3309                    (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
3310           // pulled down to a midpoint
3311           is_inexact_lt_midpoint = 0;
3312           is_inexact_gt_midpoint = 1;
3313           is_midpoint_lt_even = 0;
3314           is_midpoint_gt_even = 0;
3315         } else {
3316           ;
3317         }
3318       }
3319
3320       // determine tininess
3321       if (rnd_mode == ROUNDING_TO_NEAREST) {
3322         if (e4 < expmin) {
3323           is_tiny = 1; // for other rounding modes apply correction
3324         }
3325       } else {
3326         // for RM, RP, RZ, RA apply correction in order to determine tininess
3327         // but do not save the result; apply the correction to
3328         // (-1)^p_sign * res * 10^0
3329         P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1];
3330         P128.w[0] = res.w[0];
3331         rounding_correction (rnd_mode,
3332                              is_inexact_lt_midpoint,
3333                              is_inexact_gt_midpoint,
3334                              is_midpoint_lt_even, is_midpoint_gt_even,
3335                              0, &P128, pfpsf);
3336         scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
3337         // the number of digits in the significand is p34 = 34
3338         if (e4 + scale < expmin) {
3339           is_tiny = 1;
3340         }
3341       }
3342
3343       // the result rounded to the destination precision with unbounded exponent
3344       // is (-1)^p_sign * res * 10^e4
3345       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN
3346       // res.w[0] unchanged;
3347       // Note: res is correct only if expmin <= e4 <= expmax
3348       ind = p34; // the number of decimal digits in the signifcand of res
3349
3350       // at this point we have the result rounded with unbounded exponent in
3351       // res and we know its tininess:
3352       // res = (-1)^p_sign * significand * 10^e4,
3353       // where q (significand) = ind = p34
3354       // Note: res is correct only if expmin <= e4 <= expmax
3355
3356       // check for overflow if RN
3357       if (rnd_mode == ROUNDING_TO_NEAREST
3358           && (ind + e4) > (p34 + expmax)) {
3359         res.w[1] = p_sign | 0x7800000000000000ull;
3360         res.w[0] = 0x0000000000000000ull;
3361         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
3362         *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3363         *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3364         *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3365         *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3366         BID_SWAP128 (res);
3367         BID_RETURN (res)
3368       } // else not overflow or not RN, so continue
3369
3370       // from this point on this is similar to the last part of the computation
3371       // for Cases (15), (16), (17)
3372
3373       // if (e4 >= expmin) we have the result rounded with bounded exponent
3374       if (e4 < expmin) {
3375         x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
3376         // where the result rounded [at most] once is
3377         //   (-1)^p_sign * significand_res * 10^e4
3378
3379         // avoid double rounding error
3380         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3381         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3382         is_midpoint_lt_even0 = is_midpoint_lt_even;
3383         is_midpoint_gt_even0 = is_midpoint_gt_even;
3384         is_inexact_lt_midpoint = 0;
3385         is_inexact_gt_midpoint = 0;
3386         is_midpoint_lt_even = 0;
3387         is_midpoint_gt_even = 0;
3388
3389         if (x0 > ind) {
3390           // nothing is left of res when moving the decimal point left x0 digits
3391           is_inexact_lt_midpoint = 1;
3392           res.w[1] = p_sign | 0x0000000000000000ull;
3393           res.w[0] = 0x0000000000000000ull;
3394           e4 = expmin;
3395         } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
3396           // this is <, =, or > 1/2 ulp
3397           // compare the ind-digit value in the significand of res with
3398           // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
3399           // less than, equal to, or greater than 1/2 ulp (significand of res)
3400           R128.w[1] = res.w[1] & MASK_COEFF;
3401           R128.w[0] = res.w[0];
3402           if (ind <= 19) {
3403             if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
3404               lt_half_ulp = 1;
3405               is_inexact_lt_midpoint = 1;
3406             } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
3407               eq_half_ulp = 1;
3408               is_midpoint_gt_even = 1;
3409             } else { // > 1/2 ulp
3410               gt_half_ulp = 1;
3411               is_inexact_gt_midpoint = 1;
3412             }
3413           } else { // if (ind <= 38)
3414             if (R128.w[1] < midpoint128[ind - 20].w[1] ||
3415                 (R128.w[1] == midpoint128[ind - 20].w[1] &&
3416                 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
3417               lt_half_ulp = 1;
3418               is_inexact_lt_midpoint = 1;
3419             } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
3420                 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
3421               eq_half_ulp = 1;
3422               is_midpoint_gt_even = 1;
3423             } else { // > 1/2 ulp
3424               gt_half_ulp = 1;
3425               is_inexact_gt_midpoint = 1;
3426             }
3427           }
3428           if (lt_half_ulp || eq_half_ulp) {
3429             // res = +0.0 * 10^expmin
3430             res.w[1] = 0x0000000000000000ull;
3431             res.w[0] = 0x0000000000000000ull;
3432           } else { // if (gt_half_ulp)
3433             // res = +1 * 10^expmin
3434             res.w[1] = 0x0000000000000000ull;
3435             res.w[0] = 0x0000000000000001ull;
3436           }
3437           res.w[1] = p_sign | res.w[1];
3438           e4 = expmin;
3439         } else { // if (1 <= x0 <= ind - 1 <= 33)
3440           // round the ind-digit result to ind - x0 digits
3441
3442           if (ind <= 18) { // 2 <= ind <= 18
3443             round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
3444                           &is_midpoint_lt_even, &is_midpoint_gt_even,
3445                           &is_inexact_lt_midpoint,
3446                           &is_inexact_gt_midpoint);
3447             res.w[1] = 0x0;
3448             res.w[0] = R64;
3449           } else if (ind <= 38) {
3450             P128.w[1] = res.w[1] & MASK_COEFF;
3451             P128.w[0] = res.w[0];
3452             round128_19_38 (ind, x0, P128, &res, &incr_exp,
3453                             &is_midpoint_lt_even, &is_midpoint_gt_even,
3454                             &is_inexact_lt_midpoint,
3455                             &is_inexact_gt_midpoint);
3456           }
3457           e4 = e4 + x0; // expmin
3458           // we want the exponent to be expmin, so if incr_exp = 1 then
3459           // multiply the rounded result by 10 - it will still fit in 113 bits
3460           if (incr_exp) {
3461             // 64 x 128 -> 128
3462             P128.w[1] = res.w[1] & MASK_COEFF;
3463             P128.w[0] = res.w[0];
3464             __mul_64x128_to_128 (res, ten2k64[1], P128);
3465           }
3466           res.w[1] =
3467             p_sign | ((UINT64) (e4 + 6176) << 49) | (res.
3468                                                      w[1] & MASK_COEFF);
3469           // avoid a double rounding error
3470           if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3471                 is_midpoint_lt_even) { // double rounding error upward
3472             // res = res - 1
3473             res.w[0]--;
3474             if (res.w[0] == 0xffffffffffffffffull)
3475               res.w[1]--;
3476             // Note: a double rounding error upward is not possible; for this
3477             // the result after the first rounding would have to be 99...95
3478             // (35 digits in all), possibly followed by a number of zeros; this
3479             // not possible in this underflow case
3480             is_midpoint_lt_even = 0;
3481             is_inexact_lt_midpoint = 1;
3482           } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3483                 is_midpoint_gt_even) { // double rounding error downward
3484             // res = res + 1
3485             res.w[0]++;
3486             if (res.w[0] == 0)
3487               res.w[1]++;
3488             is_midpoint_gt_even = 0;
3489             is_inexact_gt_midpoint = 1;
3490           } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3491                      !is_inexact_lt_midpoint
3492                      && !is_inexact_gt_midpoint) {
3493             // if this second rounding was exact the result may still be
3494             // inexact because of the first rounding
3495             if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3496               is_inexact_gt_midpoint = 1;
3497             }
3498             if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3499               is_inexact_lt_midpoint = 1;
3500             }
3501           } else if (is_midpoint_gt_even &&
3502                      (is_inexact_gt_midpoint0
3503                       || is_midpoint_lt_even0)) {
3504             // pulled up to a midpoint
3505             is_inexact_lt_midpoint = 1;
3506             is_inexact_gt_midpoint = 0;
3507             is_midpoint_lt_even = 0;
3508             is_midpoint_gt_even = 0;
3509           } else if (is_midpoint_lt_even &&
3510                      (is_inexact_lt_midpoint0
3511                       || is_midpoint_gt_even0)) {
3512             // pulled down to a midpoint
3513             is_inexact_lt_midpoint = 0;
3514             is_inexact_gt_midpoint = 1;
3515             is_midpoint_lt_even = 0;
3516             is_midpoint_gt_even = 0;
3517           } else {
3518             ;
3519           }
3520         }
3521       }
3522       // res contains the correct result
3523       // apply correction if not rounding to nearest
3524       if (rnd_mode != ROUNDING_TO_NEAREST) {
3525         rounding_correction (rnd_mode,
3526                              is_inexact_lt_midpoint,
3527                              is_inexact_gt_midpoint,
3528                              is_midpoint_lt_even, is_midpoint_gt_even,
3529                              e4, &res, pfpsf);
3530       }
3531       if (is_midpoint_lt_even || is_midpoint_gt_even ||
3532           is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
3533         // set the inexact flag
3534         *pfpsf |= INEXACT_EXCEPTION;
3535         if (is_tiny)
3536           *pfpsf |= UNDERFLOW_EXCEPTION;
3537       }
3538       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3539       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3540       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3541       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3542       BID_SWAP128 (res);
3543       BID_RETURN (res)
3544
3545     } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15)
3546         (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16)
3547         (delta + q3 <= p34 && p34 < q4)) { // Case (17)
3548
3549       // calculate first the result rounded to the destination precision, with
3550       // unbounded exponent
3551
3552       add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
3553               rnd_mode, &is_midpoint_lt_even,
3554               &is_midpoint_gt_even, &is_inexact_lt_midpoint,
3555               &is_inexact_gt_midpoint, pfpsf, &res);
3556       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3557       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3558       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3559       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3560       BID_SWAP128 (res);
3561       BID_RETURN (res)
3562
3563     } else {
3564       ;
3565     }
3566
3567   } // end if delta < 0
3568
3569   *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3570   *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3571   *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3572   *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3573   BID_SWAP128 (res);
3574   BID_RETURN (res)
3575
3576 }
3577
3578
3579 #if DECIMAL_CALL_BY_REFERENCE
3580 void
3581 bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
3582             _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3583             _EXC_INFO_PARAM) {
3584   UINT128 x = *px, y = *py, z = *pz;
3585 #if !DECIMAL_GLOBAL_ROUNDING
3586   unsigned int rnd_mode = *prnd_mode;
3587 #endif
3588 #else
3589 UINT128
3590 bid128_fma (UINT128 x, UINT128 y, UINT128 z
3591             _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3592             _EXC_INFO_PARAM) {
3593 #endif
3594   int is_midpoint_lt_even, is_midpoint_gt_even,
3595     is_inexact_lt_midpoint, is_inexact_gt_midpoint;
3596   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3597
3598 #if DECIMAL_CALL_BY_REFERENCE
3599   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3600                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3601                   &res, &x, &y, &z
3602                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3603                   _EXC_INFO_ARG);
3604 #else
3605   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3606                         &is_inexact_lt_midpoint,
3607                         &is_inexact_gt_midpoint, x, y,
3608                         z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3609                         _EXC_INFO_ARG);
3610 #endif
3611   BID_RETURN (res);
3612 }
3613
3614
3615 #if DECIMAL_CALL_BY_REFERENCE
3616 void
3617 bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz
3618                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3619                _EXC_INFO_PARAM) {
3620   UINT64 x = *px, y = *py, z = *pz;
3621 #if !DECIMAL_GLOBAL_ROUNDING
3622   unsigned int rnd_mode = *prnd_mode;
3623 #endif
3624 #else
3625 UINT128
3626 bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z
3627                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3628                _EXC_INFO_PARAM) {
3629 #endif
3630   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3631     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3632   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3633   UINT128 x1, y1, z1;
3634
3635 #if DECIMAL_CALL_BY_REFERENCE
3636   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3637   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3638   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3639   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3640                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3641                   &res, &x1, &y1, &z1
3642                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3643                   _EXC_INFO_ARG);
3644 #else
3645   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3646   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3647   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3648   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3649                         &is_inexact_lt_midpoint,
3650                         &is_inexact_gt_midpoint, x1, y1,
3651                         z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3652                         _EXC_INFO_ARG);
3653 #endif
3654   BID_RETURN (res);
3655 }
3656
3657
3658 #if DECIMAL_CALL_BY_REFERENCE
3659 void
3660 bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3661                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3662                _EXC_INFO_PARAM) {
3663   UINT64 x = *px, y = *py;
3664   UINT128 z = *pz;
3665 #if !DECIMAL_GLOBAL_ROUNDING
3666   unsigned int rnd_mode = *prnd_mode;
3667 #endif
3668 #else
3669 UINT128
3670 bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z
3671                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3672                _EXC_INFO_PARAM) {
3673 #endif
3674   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3675     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3676   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3677   UINT128 x1, y1;
3678
3679 #if DECIMAL_CALL_BY_REFERENCE
3680   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3681   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3682   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3683                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3684                   &res, &x1, &y1, &z
3685                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3686                   _EXC_INFO_ARG);
3687 #else
3688   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3689   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3690   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3691                         &is_inexact_lt_midpoint,
3692                         &is_inexact_gt_midpoint, x1, y1,
3693                         z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3694                         _EXC_INFO_ARG);
3695 #endif
3696   BID_RETURN (res);
3697 }
3698
3699
3700 #if DECIMAL_CALL_BY_REFERENCE
3701 void
3702 bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3703                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3704                _EXC_INFO_PARAM) {
3705   UINT64 x = *px, z = *pz;
3706 #if !DECIMAL_GLOBAL_ROUNDING
3707   unsigned int rnd_mode = *prnd_mode;
3708 #endif
3709 #else
3710 UINT128
3711 bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z
3712                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3713                _EXC_INFO_PARAM) {
3714 #endif
3715   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3716     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3717   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3718   UINT128 x1, z1;
3719
3720 #if DECIMAL_CALL_BY_REFERENCE
3721   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3722   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3723   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3724                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3725                   &res, &x1, py, &z1
3726                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3727                   _EXC_INFO_ARG);
3728 #else
3729   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3730   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3731   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3732                         &is_inexact_lt_midpoint,
3733                         &is_inexact_gt_midpoint, x1, y,
3734                         z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3735                         _EXC_INFO_ARG);
3736 #endif
3737   BID_RETURN (res);
3738 }
3739
3740
3741 #if DECIMAL_CALL_BY_REFERENCE
3742 void
3743 bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3744                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3745                _EXC_INFO_PARAM) {
3746   UINT64 x = *px;
3747 #if !DECIMAL_GLOBAL_ROUNDING
3748   unsigned int rnd_mode = *prnd_mode;
3749 #endif
3750 #else
3751 UINT128
3752 bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z
3753                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3754                _EXC_INFO_PARAM) {
3755 #endif
3756   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3757     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3758   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3759   UINT128 x1;
3760
3761 #if DECIMAL_CALL_BY_REFERENCE
3762   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3763   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3764                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3765                   &res, &x1, py, pz
3766                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3767                   _EXC_INFO_ARG);
3768 #else
3769   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3770   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3771                         &is_inexact_lt_midpoint,
3772                         &is_inexact_gt_midpoint, x1, y,
3773                         z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3774                         _EXC_INFO_ARG);
3775 #endif
3776   BID_RETURN (res);
3777 }
3778
3779
3780 #if DECIMAL_CALL_BY_REFERENCE
3781 void
3782 bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
3783                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3784                _EXC_INFO_PARAM) {
3785   UINT64 y = *py, z = *pz;
3786 #if !DECIMAL_GLOBAL_ROUNDING
3787   unsigned int rnd_mode = *prnd_mode;
3788 #endif
3789 #else
3790 UINT128
3791 bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z
3792                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3793                _EXC_INFO_PARAM) {
3794 #endif
3795   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3796     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3797   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3798   UINT128 y1, z1;
3799
3800 #if DECIMAL_CALL_BY_REFERENCE
3801   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3802   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3803   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3804                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3805                   &res, px, &y1, &z1
3806                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3807                   _EXC_INFO_ARG);
3808 #else
3809   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3810   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3811   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3812                         &is_inexact_lt_midpoint,
3813                         &is_inexact_gt_midpoint, x, y1,
3814                         z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3815                         _EXC_INFO_ARG);
3816 #endif
3817   BID_RETURN (res);
3818 }
3819
3820
3821 #if DECIMAL_CALL_BY_REFERENCE
3822 void
3823 bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
3824                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3825                _EXC_INFO_PARAM) {
3826   UINT64 y = *py;
3827 #if !DECIMAL_GLOBAL_ROUNDING
3828   unsigned int rnd_mode = *prnd_mode;
3829 #endif
3830 #else
3831 UINT128
3832 bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z
3833                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3834                _EXC_INFO_PARAM) {
3835 #endif
3836   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3837     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3838   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3839   UINT128 y1;
3840
3841 #if DECIMAL_CALL_BY_REFERENCE
3842   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3843   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3844                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3845                   &res, px, &y1, pz
3846                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3847                   _EXC_INFO_ARG);
3848 #else
3849   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3850   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3851                         &is_inexact_lt_midpoint,
3852                         &is_inexact_gt_midpoint, x, y1,
3853                         z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3854                         _EXC_INFO_ARG);
3855 #endif
3856   BID_RETURN (res);
3857 }
3858
3859
3860 #if DECIMAL_CALL_BY_REFERENCE
3861 void
3862 bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
3863                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3864                _EXC_INFO_PARAM) {
3865   UINT64 z = *pz;
3866 #if !DECIMAL_GLOBAL_ROUNDING
3867   unsigned int rnd_mode = *prnd_mode;
3868 #endif
3869 #else
3870 UINT128
3871 bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z
3872                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3873                _EXC_INFO_PARAM) {
3874 #endif
3875   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3876     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3877   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3878   UINT128 z1;
3879
3880 #if DECIMAL_CALL_BY_REFERENCE
3881   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3882   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3883                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3884                   &res, px, py, &z1
3885                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3886                   _EXC_INFO_ARG);
3887 #else
3888   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3889   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3890                         &is_inexact_lt_midpoint,
3891                         &is_inexact_gt_midpoint, x, y,
3892                         z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3893                         _EXC_INFO_ARG);
3894 #endif
3895   BID_RETURN (res);
3896 }
3897
3898 // Note: bid128qqq_fma is represented by bid128_fma
3899
3900 // Note: bid64ddd_fma is represented by bid64_fma
3901
3902 #if DECIMAL_CALL_BY_REFERENCE
3903 void
3904 bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3905               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3906               _EXC_INFO_PARAM) {
3907   UINT64 x = *px, y = *py;
3908 #if !DECIMAL_GLOBAL_ROUNDING
3909   unsigned int rnd_mode = *prnd_mode;
3910 #endif
3911 #else
3912 UINT64
3913 bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z
3914               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3915               _EXC_INFO_PARAM) {
3916 #endif
3917   UINT64 res1 = 0xbaddbaddbaddbaddull;
3918   UINT128 x1, y1;
3919
3920 #if DECIMAL_CALL_BY_REFERENCE
3921   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3922   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3923   bid64qqq_fma (&res1, &x1, &y1, pz
3924                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3925                 _EXC_INFO_ARG);
3926 #else
3927   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3928   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3929   res1 = bid64qqq_fma (x1, y1, z
3930                        _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3931                        _EXC_INFO_ARG);
3932 #endif
3933   BID_RETURN (res1);
3934 }
3935
3936
3937 #if DECIMAL_CALL_BY_REFERENCE
3938 void
3939 bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3940               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3941               _EXC_INFO_PARAM) {
3942   UINT64 x = *px, z = *pz;
3943 #if !DECIMAL_GLOBAL_ROUNDING
3944   unsigned int rnd_mode = *prnd_mode;
3945 #endif
3946 #else
3947 UINT64
3948 bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z
3949               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3950               _EXC_INFO_PARAM) {
3951 #endif
3952   UINT64 res1 = 0xbaddbaddbaddbaddull;
3953   UINT128 x1, z1;
3954
3955 #if DECIMAL_CALL_BY_REFERENCE
3956   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3957   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3958   bid64qqq_fma (&res1, &x1, py, &z1
3959                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3960                 _EXC_INFO_ARG);
3961 #else
3962   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3963   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3964   res1 = bid64qqq_fma (x1, y, z1
3965                        _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3966                        _EXC_INFO_ARG);
3967 #endif
3968   BID_RETURN (res1);
3969 }
3970
3971
3972 #if DECIMAL_CALL_BY_REFERENCE
3973 void
3974 bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3975               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3976               _EXC_INFO_PARAM) {
3977   UINT64 x = *px;
3978 #if !DECIMAL_GLOBAL_ROUNDING
3979   unsigned int rnd_mode = *prnd_mode;
3980 #endif
3981 #else
3982 UINT64
3983 bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z
3984               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3985               _EXC_INFO_PARAM) {
3986 #endif
3987   UINT64 res1 = 0xbaddbaddbaddbaddull;
3988   UINT128 x1;
3989
3990 #if DECIMAL_CALL_BY_REFERENCE
3991   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3992   bid64qqq_fma (&res1, &x1, py, pz
3993                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3994                 _EXC_INFO_ARG);
3995 #else
3996   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3997   res1 = bid64qqq_fma (x1, y, z
3998                        _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3999                        _EXC_INFO_ARG);
4000 #endif
4001   BID_RETURN (res1);
4002 }
4003
4004
4005 #if DECIMAL_CALL_BY_REFERENCE
4006 void
4007 bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
4008               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4009               _EXC_INFO_PARAM) {
4010   UINT64 y = *py, z = *pz;
4011 #if !DECIMAL_GLOBAL_ROUNDING
4012   unsigned int rnd_mode = *prnd_mode;
4013 #endif
4014 #else
4015 UINT64
4016 bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z
4017               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4018               _EXC_INFO_PARAM) {
4019 #endif
4020   UINT64 res1 = 0xbaddbaddbaddbaddull;
4021   UINT128 y1, z1;
4022
4023 #if DECIMAL_CALL_BY_REFERENCE
4024   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4025   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4026   bid64qqq_fma (&res1, px, &y1, &z1
4027                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4028                 _EXC_INFO_ARG);
4029 #else
4030   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4031   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4032   res1 = bid64qqq_fma (x, y1, z1
4033                        _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4034                        _EXC_INFO_ARG);
4035 #endif
4036   BID_RETURN (res1);
4037 }
4038
4039
4040 #if DECIMAL_CALL_BY_REFERENCE
4041 void
4042 bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
4043               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4044               _EXC_INFO_PARAM) {
4045   UINT64 y = *py;
4046 #if !DECIMAL_GLOBAL_ROUNDING
4047   unsigned int rnd_mode = *prnd_mode;
4048 #endif
4049 #else
4050 UINT64
4051 bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z
4052               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4053               _EXC_INFO_PARAM) {
4054 #endif
4055   UINT64 res1 = 0xbaddbaddbaddbaddull;
4056   UINT128 y1;
4057
4058 #if DECIMAL_CALL_BY_REFERENCE
4059   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4060   bid64qqq_fma (&res1, px, &y1, pz
4061                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4062                 _EXC_INFO_ARG);
4063 #else
4064   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4065   res1 = bid64qqq_fma (x, y1, z
4066                        _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4067                        _EXC_INFO_ARG);
4068 #endif
4069   BID_RETURN (res1);
4070 }
4071
4072
4073 #if DECIMAL_CALL_BY_REFERENCE
4074 void
4075 bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
4076               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4077               _EXC_INFO_PARAM) {
4078   UINT64 z = *pz;
4079 #if !DECIMAL_GLOBAL_ROUNDING
4080   unsigned int rnd_mode = *prnd_mode;
4081 #endif
4082 #else
4083 UINT64
4084 bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z
4085               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4086               _EXC_INFO_PARAM) {
4087 #endif
4088   UINT64 res1 = 0xbaddbaddbaddbaddull;
4089   UINT128 z1;
4090
4091 #if DECIMAL_CALL_BY_REFERENCE
4092   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4093   bid64qqq_fma (&res1, px, py, &z1
4094                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4095                 _EXC_INFO_ARG);
4096 #else
4097   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4098   res1 = bid64qqq_fma (x, y, z1
4099                        _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4100                        _EXC_INFO_ARG);
4101 #endif
4102   BID_RETURN (res1);
4103 }
4104
4105
4106 #if DECIMAL_CALL_BY_REFERENCE
4107 void
4108 bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
4109               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4110               _EXC_INFO_PARAM) {
4111   UINT128 x = *px, y = *py, z = *pz;
4112 #if !DECIMAL_GLOBAL_ROUNDING
4113   unsigned int rnd_mode = *prnd_mode;
4114 #endif
4115 #else
4116 UINT64
4117 bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z
4118               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4119               _EXC_INFO_PARAM) {
4120 #endif
4121   int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0,
4122     is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
4123   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
4124     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
4125   int incr_exp;
4126   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4127   UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4128   UINT64 res1 = 0xbaddbaddbaddbaddull;
4129   unsigned int save_fpsf; // needed because of the call to bid128_ext_fma
4130   UINT64 sign;
4131   UINT64 exp;
4132   int unbexp;
4133   UINT128 C;
4134   BID_UI64DOUBLE tmp;
4135   int nr_bits;
4136   int q, x0;
4137   int scale;
4138   int lt_half_ulp = 0, eq_half_ulp = 0;
4139
4140   // Note: for rounding modes other than RN or RA, the result can be obtained
4141   // by rounding first to BID128 and then to BID64
4142
4143   save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
4144   *pfpsf = 0;
4145
4146 #if DECIMAL_CALL_BY_REFERENCE
4147   bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4148                   &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0,
4149                   &res, &x, &y, &z
4150                   _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4151                   _EXC_INFO_ARG);
4152 #else
4153   res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4154                         &is_inexact_lt_midpoint0,
4155                         &is_inexact_gt_midpoint0, x, y,
4156                         z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4157                         _EXC_INFO_ARG);
4158 #endif
4159
4160   if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) ||
4161       (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible
4162       ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN)
4163       ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity
4164 #if DECIMAL_CALL_BY_REFERENCE
4165     bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4166 #else
4167     res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4168 #endif
4169     // determine the unbiased exponent of the result
4170     unbexp = ((res1 >> 53) & 0x3ff) - 398;
4171
4172     // if subnormal, res1  must have exp = -398
4173     // if tiny and inexact set underflow and inexact status flags
4174     if (!((res1 & MASK_NAN) == MASK_NAN) &&     // res1 not NaN
4175         (unbexp == -398)
4176         && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull)
4177         && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0
4178             || is_midpoint_lt_even0 || is_midpoint_gt_even0)) {
4179       // set the inexact flag and the underflow flag
4180       *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4181     } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4182                is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4183       // set the inexact flag and the underflow flag
4184       *pfpsf |= INEXACT_EXCEPTION;
4185     }
4186
4187     *pfpsf |= save_fpsf;
4188     BID_RETURN (res1);
4189   } // else continue, and use rounding to nearest to round to 16 digits
4190
4191   // at this point the result is rounded to nearest (even or away) to 34 digits
4192   // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
4193   sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
4194   exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits
4195   unbexp = (exp >> 49) - 6176;
4196   C.w[1] = res.w[HIGH_128W] & MASK_COEFF;
4197   C.w[0] = res.w[LOW_128W];
4198
4199   if ((C.w[1] == 0x0 && C.w[0] == 0x0) ||       // result is zero
4200       (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) {
4201       // clear under/overflow
4202 #if DECIMAL_CALL_BY_REFERENCE
4203     bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4204 #else
4205     res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4206 #endif
4207     *pfpsf |= save_fpsf;
4208     BID_RETURN (res1);
4209   } // else continue
4210
4211   // -398 - 34 <= unbexp <= 369 + 15
4212   if (rnd_mode == ROUNDING_TIES_AWAY) {
4213     // apply correction, if needed, to make the result rounded to nearest-even
4214     if (is_midpoint_gt_even) {
4215       // res = res - 1
4216       res1--; // res1 is now even
4217     } // else the result is already correctly rounded to nearest-even
4218   }
4219   // at this point the result is finite, non-zero canonical normal or subnormal,
4220   // and in most cases overflow or underflow will not occur
4221
4222   // determine the number of digits q in the result
4223   // q = nr. of decimal digits in x
4224   // determine first the nr. of bits in x
4225   if (C.w[1] == 0) {
4226     if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53
4227       // split the 64-bit value in two 32-bit halves to avoid rounding errors
4228       if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32
4229         tmp.d = (double) (C.w[0] >> 32); // exact conversion
4230         nr_bits =
4231           33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4232       } else { // x < 2^32
4233         tmp.d = (double) (C.w[0]); // exact conversion
4234         nr_bits =
4235           1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4236       }
4237     } else { // if x < 2^53
4238       tmp.d = (double) C.w[0]; // exact conversion
4239       nr_bits =
4240         1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4241     }
4242   } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
4243     tmp.d = (double) C.w[1]; // exact conversion
4244     nr_bits =
4245       65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4246   }
4247   q = nr_digits[nr_bits - 1].digits;
4248   if (q == 0) {
4249     q = nr_digits[nr_bits - 1].digits1;
4250     if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi ||
4251         (C.w[1] == nr_digits[nr_bits - 1].threshold_hi &&
4252          C.w[0] >= nr_digits[nr_bits - 1].threshold_lo))
4253       q++;
4254   }
4255   // if q > 16, round to nearest even to 16 digits (but for underflow it may
4256   // have to be truncated even more)
4257   if (q > 16) {
4258     x0 = q - 16;
4259     if (q <= 18) {
4260       round64_2_18 (q, x0, C.w[0], &res1, &incr_exp,
4261                     &is_midpoint_lt_even, &is_midpoint_gt_even,
4262                     &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4263     } else { // 19 <= q <= 34
4264       round128_19_38 (q, x0, C, &res128, &incr_exp,
4265                       &is_midpoint_lt_even, &is_midpoint_gt_even,
4266                       &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4267       res1 = res128.w[0]; // the result fits in 64 bits
4268     }
4269     unbexp = unbexp + x0;
4270     if (incr_exp)
4271       unbexp++;
4272     q = 16; // need to set in case denormalization is necessary
4273   } else {
4274     // the result does not require a second rounding (and it must have
4275     // been exact in the first rounding, since q <= 16)
4276     res1 = C.w[0];
4277   }
4278
4279   // avoid a double rounding error
4280   if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4281       is_midpoint_lt_even) { // double rounding error upward
4282     // res = res - 1
4283     res1--; // res1 becomes odd
4284     is_midpoint_lt_even = 0;
4285     is_inexact_lt_midpoint = 1;
4286     if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1
4287       res1 = 0x002386f26fc0ffffull; // 10^16 - 1
4288       unbexp--;
4289     }
4290   } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4291       is_midpoint_gt_even) { // double rounding error downward
4292     // res = res + 1
4293     res1++; // res1 becomes odd (so it cannot be 10^16)
4294     is_midpoint_gt_even = 0;
4295     is_inexact_gt_midpoint = 1;
4296   } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4297              !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4298     // if this second rounding was exact the result may still be
4299     // inexact because of the first rounding
4300     if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4301       is_inexact_gt_midpoint = 1;
4302     }
4303     if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4304       is_inexact_lt_midpoint = 1;
4305     }
4306   } else if (is_midpoint_gt_even &&
4307              (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4308     // pulled up to a midpoint
4309     is_inexact_lt_midpoint = 1;
4310     is_inexact_gt_midpoint = 0;
4311     is_midpoint_lt_even = 0;
4312     is_midpoint_gt_even = 0;
4313   } else if (is_midpoint_lt_even &&
4314              (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4315     // pulled down to a midpoint
4316     is_inexact_lt_midpoint = 0;
4317     is_inexact_gt_midpoint = 1;
4318     is_midpoint_lt_even = 0;
4319     is_midpoint_gt_even = 0;
4320   } else {
4321     ;
4322   }
4323   // this is the result rounded correctly to nearest even, with unbounded exp.
4324
4325   // check for overflow
4326   if (q + unbexp > P16 + expmax16) {
4327     res1 = sign | 0x7800000000000000ull;
4328     *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
4329     *pfpsf |= save_fpsf;
4330     BID_RETURN (res1)
4331   } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16
4332     // not overflow; the result must be exact, and we can multiply res1 by
4333     // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
4334     scale = unbexp - expmax16;
4335     res1 = res1 * ten2k64[scale]; // res1 * 10^scale
4336     unbexp = expmax16; // unbexp - scale
4337   } else {
4338     ; // continue
4339   }
4340
4341   // check for underflow
4342   if (q + unbexp < P16 + expmin16) {
4343     if (unbexp < expmin16) {
4344       // we must truncate more of res
4345       x0 = expmin16 - unbexp; // x0 >= 1
4346       is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
4347       is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
4348       is_midpoint_lt_even0 = is_midpoint_lt_even;
4349       is_midpoint_gt_even0 = is_midpoint_gt_even;
4350       is_inexact_lt_midpoint = 0;
4351       is_inexact_gt_midpoint = 0;
4352       is_midpoint_lt_even = 0;
4353       is_midpoint_gt_even = 0;
4354       // the number of decimal digits in res1 is q
4355       if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits
4356         // 2 <= q <= 16, 1 <= x0 <= 15
4357         round64_2_18 (q, x0, res1, &res1, &incr_exp,
4358                       &is_midpoint_lt_even, &is_midpoint_gt_even,
4359                       &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4360         if (incr_exp) {
4361           // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
4362           res1 = ten2k64[q - x0];
4363         }
4364         unbexp = unbexp + x0; // expmin16
4365       } else if (x0 == q) {
4366         // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
4367         // determine relationship with 1/2 ulp
4368         // q <= 16
4369         if (res1 < midpoint64[q - 1]) { // < 1/2 ulp
4370           lt_half_ulp = 1;
4371           is_inexact_lt_midpoint = 1;
4372         } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp
4373           eq_half_ulp = 1;
4374           is_midpoint_gt_even = 1;
4375         } else { // > 1/2 ulp
4376           // gt_half_ulp = 1;
4377           is_inexact_gt_midpoint = 1;
4378         }
4379         if (lt_half_ulp || eq_half_ulp) {
4380           // res = +0.0 * 10^expmin16
4381           res1 = 0x0000000000000000ull;
4382         } else { // if (gt_half_ulp)
4383           // res = +1 * 10^expmin16
4384           res1 = 0x0000000000000001ull;
4385         }
4386         unbexp = expmin16;
4387       } else { // if (x0 > q)
4388         // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
4389         res1 = 0x0000000000000000ull;
4390         unbexp = expmin16;
4391         is_inexact_lt_midpoint = 1;
4392       }
4393       // avoid a double rounding error
4394       if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4395           is_midpoint_lt_even) { // double rounding error upward
4396         // res = res - 1
4397         res1--; // res1 becomes odd
4398         is_midpoint_lt_even = 0;
4399         is_inexact_lt_midpoint = 1;
4400       } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4401           is_midpoint_gt_even) { // double rounding error downward
4402         // res = res + 1
4403         res1++; // res1 becomes odd
4404         is_midpoint_gt_even = 0;
4405         is_inexact_gt_midpoint = 1;
4406       } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4407                  !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4408         // if this rounding was exact the result may still be
4409         // inexact because of the previous roundings
4410         if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4411           is_inexact_gt_midpoint = 1;
4412         }
4413         if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4414           is_inexact_lt_midpoint = 1;
4415         }
4416       } else if (is_midpoint_gt_even &&
4417                  (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4418         // pulled up to a midpoint
4419         is_inexact_lt_midpoint = 1;
4420         is_inexact_gt_midpoint = 0;
4421         is_midpoint_lt_even = 0;
4422         is_midpoint_gt_even = 0;
4423       } else if (is_midpoint_lt_even &&
4424                  (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4425         // pulled down to a midpoint
4426         is_inexact_lt_midpoint = 0;
4427         is_inexact_gt_midpoint = 1;
4428         is_midpoint_lt_even = 0;
4429         is_midpoint_gt_even = 0;
4430       } else {
4431         ;
4432       }
4433     }
4434     // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
4435     // and the result is tiny and exact
4436
4437     // check for inexact result
4438     if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4439         is_midpoint_lt_even || is_midpoint_gt_even ||
4440         is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4441         is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4442       // set the inexact flag and the underflow flag
4443       *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4444     }
4445   } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4446              is_midpoint_lt_even || is_midpoint_gt_even) {
4447     *pfpsf |= INEXACT_EXCEPTION;
4448   }
4449   // this is the result rounded correctly to nearest, with bounded exponent
4450
4451   if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction
4452     // res = res + 1
4453     res1++; // res1 is now odd
4454   } // else the result is already correct
4455
4456   // assemble the result
4457   if (res1 < 0x0020000000000000ull) { // res < 2^53
4458     res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1;
4459   } else { // res1 >= 2^53
4460     res1 = sign | MASK_STEERING_BITS |
4461       ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2);
4462   }
4463   *pfpsf |= save_fpsf;
4464   BID_RETURN (res1);
4465 }