libgcc/

[pf3gnuchains/gcc-fork.git] / libgcc / config / libbid / bid128_fma.c
diff --git a/libgcc/config/libbid/bid128_fma.c b/libgcc/config/libbid/bid128_fma.c

index 22e08e2..1d0ffb1 100644 (file)
--- a/libgcc/config/libbid/bid128_fma.c
+++ b/libgcc/config/libbid/bid128_fma.c
@@ -32,24 +32,20 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
   * 
   ****************************************************************************/
  
-#include "bid_conf.h"
-#include "bid_functions.h"
  #include "bid_internal.h"
  
  static void
-rounding_correction (
-    unsigned int rnd_mode, 
-    unsigned int is_inexact_lt_midpoint, 
-    unsigned int is_inexact_gt_midpoint, 
-    unsigned int is_midpoint_lt_even, 
-    unsigned int is_midpoint_gt_even, 
-    int unbexp, 
-    UINT128 * ptrres, 
-    _IDEC_flags * ptrfpsf) {
-    // unbiased true exponent unbexp may be larger than emax
+rounding_correction (unsigned int rnd_mode,
+                    unsigned int is_inexact_lt_midpoint,
+                    unsigned int is_inexact_gt_midpoint,
+                    unsigned int is_midpoint_lt_even,
+                    unsigned int is_midpoint_gt_even,
+                    int unbexp,
+                    UINT128 * ptrres, _IDEC_flags * ptrfpsf) {
+  // unbiased true exponent unbexp may be larger than emax
  
    UINT128 res = *ptrres; // expected to have the correct sign and coefficient
-      // (the exponent field is ignored, as unbexp is used instead)
+  // (the exponent field is ignored, as unbexp is used instead)
    UINT64 sign, exp;
    UINT64 C_hi, C_lo;
  
@@ -68,10 +64,10 @@ rounding_correction (
    exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax
    C_hi = res.w[1] & MASK_COEFF;
    C_lo = res.w[0];
-  if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) || 
+  if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) ||
        ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) && 
-      is_midpoint_gt_even))) ||
-      (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) || 
+      is_midpoint_gt_even))) || 
+      (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) ||
        ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) && 
        is_midpoint_gt_even)))) {
      // C = C + 1
@@ -87,20 +83,26 @@ rounding_correction (
        exp = (UINT64) (unbexp + 6176) << 49;
      }
    } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
-      ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) || 
+      ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) ||
        (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) {
      // C = C - 1
      C_lo = C_lo - 1;
      if (C_lo == 0xffffffffffffffffull)
        C_hi--;
      // check if we crossed into the lower decade
-    if (exp > 0 && C_hi == 0x0000314dc6448d93ull && 
-        C_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
-      C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
-      C_lo = 0x378d8e63ffffffffull;
-      // exp = exp - EXP_P1;
-      unbexp = unbexp - 1;
-      exp = (UINT64) (unbexp + 6176) << 49;
+    if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) { 
+      // C = 10^33 - 1
+      if (exp > 0) {
+        C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
+        C_lo = 0x378d8e63ffffffffull;
+        // exp = exp - EXP_P1;
+        unbexp = unbexp - 1;
+        exp = (UINT64) (unbexp + 6176) << 49;
+      } else { // if exp = 0
+        if (is_midpoint_lt_even || is_midpoint_lt_even ||
+            is_inexact_gt_midpoint || is_inexact_gt_midpoint) // tiny & inexact
+          *ptrfpsf |= UNDERFLOW_EXCEPTION;
+      }
      }
    } else {
      ; // the result is already correct
@@ -192,56 +194,56 @@ sub256 (UINT256 x, UINT256 y, UINT256 * pz) {
  
  
  static int
-__bid_nr_digits256 (UINT256 R256) {
+nr_digits256 (UINT256 R256) {
    int ind;
    // determine the number of decimal digits in R256
    if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) {
      // between 1 and 19 digits
      for (ind = 1; ind <= 19; ind++) {
-      if (R256.w[0] < __bid_ten2k64[ind]) {
+      if (R256.w[0] < ten2k64[ind]) {
          break;
        }
      }
      // ind digits
    } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 &&
-             (R256.w[1] < __bid_ten2k128[0].w[1]
-              || (R256.w[1] == __bid_ten2k128[0].w[1]
-              && R256.w[0] < __bid_ten2k128[0].w[0]))) {
+             (R256.w[1] < ten2k128[0].w[1] ||
+              (R256.w[1] == ten2k128[0].w[1]
+               && R256.w[0] < ten2k128[0].w[0]))) {
      // 20 digits
      ind = 20;
    } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) {
      // between 21 and 38 digits
      for (ind = 1; ind <= 18; ind++) {
-      if (R256.w[1] < __bid_ten2k128[ind].w[1] ||
-          (R256.w[1] == __bid_ten2k128[ind].w[1] &&
-          R256.w[0] < __bid_ten2k128[ind].w[0])) {
+      if (R256.w[1] < ten2k128[ind].w[1] ||
+          (R256.w[1] == ten2k128[ind].w[1] &&
+           R256.w[0] < ten2k128[ind].w[0])) {
          break;
        }
      }
      // ind + 20 digits
      ind = ind + 20;
-  } else if (((R256.w[3] == 0x0 &&
-             (R256.w[2] < __bid_ten2k256[0].w[2])) ||
-              (R256.w[2] == __bid_ten2k256[0].w[2] &&
-              R256.w[1] < __bid_ten2k256[0].w[1]) ||
-              (R256.w[2] == __bid_ten2k256[0].w[2] &&
-              R256.w[1] == __bid_ten2k256[0].w[1] &&
-              R256.w[0] < __bid_ten2k256[0].w[0]))) {
+  } else if (R256.w[3] == 0x0 &&
+             (R256.w[2] < ten2k256[0].w[2] ||
+              (R256.w[2] == ten2k256[0].w[2] &&
+               R256.w[1] < ten2k256[0].w[1]) ||
+              (R256.w[2] == ten2k256[0].w[2] &&
+               R256.w[1] == ten2k256[0].w[1] &&
+               R256.w[0] < ten2k256[0].w[0]))) {
      // 39 digits
      ind = 39;
    } else {
      // between 40 and 68 digits
      for (ind = 1; ind <= 29; ind++) {
-      if (R256.w[3] < __bid_ten2k256[ind].w[3] ||
-          (R256.w[3] == __bid_ten2k256[ind].w[3] &&
-          R256.w[2] < __bid_ten2k256[ind].w[2]) ||
-          (R256.w[3] == __bid_ten2k256[ind].w[3] &&
-          R256.w[2] == __bid_ten2k256[ind].w[2] &&
-          R256.w[1] < __bid_ten2k256[ind].w[1]) ||
-          (R256.w[3] == __bid_ten2k256[ind].w[3] &&
-          R256.w[2] == __bid_ten2k256[ind].w[2] &&
-          R256.w[1] == __bid_ten2k256[ind].w[1] &&
-          R256.w[0] < __bid_ten2k256[ind].w[0])) {
+      if (R256.w[3] < ten2k256[ind].w[3] ||
+          (R256.w[3] == ten2k256[ind].w[3] &&
+           R256.w[2] < ten2k256[ind].w[2]) ||
+          (R256.w[3] == ten2k256[ind].w[3] &&
+           R256.w[2] == ten2k256[ind].w[2] &&
+           R256.w[1] < ten2k256[ind].w[1]) ||
+          (R256.w[3] == ten2k256[ind].w[3] &&
+           R256.w[2] == ten2k256[ind].w[2] &&
+           R256.w[1] == ten2k256[ind].w[1] &&
+           R256.w[0] < ten2k256[ind].w[0])) {
          break;
        }
      }
@@ -305,10 +307,10 @@ add_and_round (int q3,
      R256.w[0] = C3.w[0];
    } else if (scale <= 19) { // 10^scale fits in 64 bits
      P128.w[1] = 0;
-    P128.w[0] = __bid_ten2k64[scale];
+    P128.w[0] = ten2k64[scale];
      __mul_128x128_to_256 (R256, P128, C3);
    } else if (scale <= 38) { // 10^scale fits in 128 bits
-    __mul_128x128_to_256 (R256, __bid_ten2k128[scale - 20], C3);
+    __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3);
    } else if (scale <= 57) { // 39 <= scale <= 57 
      // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
      // (10^67 has 223 bits; 10^69 has 230 bits);  
@@ -316,9 +318,9 @@ add_and_round (int q3,
      // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
      // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
      // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
-    __mul_64x128_to_128 (R128, __bid_ten2k64[scale - 38], C3);
+    __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3);
      // now multiply R128 by 10^38
-    __mul_128x128_to_256 (R256, R128, __bid_ten2k128[18]);
+    __mul_128x128_to_256 (R256, R128, ten2k128[18]);
    } else { // 58 <= scale <= 66
      // 10^scale takes between 193 and 220 bits,
      // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
@@ -328,9 +330,9 @@ add_and_round (int q3,
      // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
      // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
      // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
-    __mul_64x128_to_128 (R128, C3.w[0], __bid_ten2k128[scale - 58]);
+    __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]);
      // now calculate 10*38 * 10^(scale-38) * C3 
-    __mul_128x128_to_256 (R256, R128, __bid_ten2k128[18]);
+    __mul_128x128_to_256 (R256, R128, ten2k128[18]);
    }
    // C3 * 10^scale is now in R256 
  
@@ -348,7 +350,7 @@ add_and_round (int q3,
      // but may require rounding
  
      // compare first R256 = C3 * 10^scale and C4 
-    if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) || 
+    if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) ||
          (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) ||
          (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] &&
          R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4
@@ -382,7 +384,7 @@ add_and_round (int q3,
    }
  
    // determine the number of decimal digits in R256
-  ind = __bid_nr_digits256 (R256);
+  ind = nr_digits256 (R256);
  
    // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
    // round to the destination precision, with unbounded exponent
@@ -403,22 +405,22 @@ add_and_round (int q3,
      if (ind <= 38) {
        P128.w[1] = R256.w[1];
        P128.w[0] = R256.w[0];
-      __bid_round128_19_38 (ind, x0, P128, &R128, &incr_exp,
-                      &is_midpoint_lt_even, &is_midpoint_gt_even,
-                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
+      round128_19_38 (ind, x0, P128, &R128, &incr_exp,
+                     &is_midpoint_lt_even, &is_midpoint_gt_even,
+                     &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
      } else if (ind <= 57) {
        P192.w[2] = R256.w[2];
        P192.w[1] = R256.w[1];
        P192.w[0] = R256.w[0];
-      __bid_round192_39_57 (ind, x0, P192, &R192, &incr_exp,
-                      &is_midpoint_lt_even, &is_midpoint_gt_even,
-                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
+      round192_39_57 (ind, x0, P192, &R192, &incr_exp,
+                     &is_midpoint_lt_even, &is_midpoint_gt_even,
+                     &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
        R128.w[1] = R192.w[1];
        R128.w[0] = R192.w[0];
      } else { // if (ind <= 68)
-      __bid_round256_58_76 (ind, x0, R256, &R256, &incr_exp,
-                      &is_midpoint_lt_even, &is_midpoint_gt_even,
-                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
+      round256_58_76 (ind, x0, R256, &R256, &incr_exp,
+                     &is_midpoint_lt_even, &is_midpoint_gt_even,
+                     &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
        R128.w[1] = R256.w[1];
        R128.w[0] = R256.w[0];
      }
@@ -435,9 +437,9 @@ add_and_round (int q3,
        P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1];
        P128.w[0] = R128.w[0];
        rounding_correction (rnd_mode,
-                           is_inexact_lt_midpoint,
-                           is_inexact_gt_midpoint, is_midpoint_lt_even,
-                           is_midpoint_gt_even, 0, &P128, ptrfpsf);
+                          is_inexact_lt_midpoint,
+                          is_inexact_gt_midpoint, is_midpoint_lt_even,
+                          is_midpoint_gt_even, 0, &P128, ptrfpsf);
        scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
        // the number of digits in the significand is p34 = 34
        if (e4 + scale < expmin) {
@@ -495,10 +497,10 @@ add_and_round (int q3,
        R128.w[1] = res.w[1] & MASK_COEFF;
        R128.w[0] = res.w[0];
        if (ind <= 19) {
-        if (R128.w[0] < __bid_midpoint64[ind - 1]) { // < 1/2 ulp
+        if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
            lt_half_ulp = 1;
            is_inexact_lt_midpoint = 1;
-        } else if (R128.w[0] == __bid_midpoint64[ind - 1]) { // = 1/2 ulp
+        } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
            eq_half_ulp = 1;
            is_midpoint_gt_even = 1;
          } else { // > 1/2 ulp
@@ -506,13 +508,13 @@ add_and_round (int q3,
            is_inexact_gt_midpoint = 1;
          }
        } else { // if (ind <= 38) {
-        if (R128.w[1] < __bid_midpoint128[ind - 20].w[1] || 
-            (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && 
-            R128.w[0] < __bid_midpoint128[ind - 20].w[0])) { // < 1/2 ulp
+        if (R128.w[1] < midpoint128[ind - 20].w[1] || 
+            (R128.w[1] == midpoint128[ind - 20].w[1] && 
+            R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
            lt_half_ulp = 1;
            is_inexact_lt_midpoint = 1;
-        } else if (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && 
-            R128.w[0] == __bid_midpoint128[ind - 20].w[0]) { // = 1/2 ulp
+        } else if (R128.w[1] == midpoint128[ind - 20].w[1] && 
+            R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
            eq_half_ulp = 1;
            is_midpoint_gt_even = 1;
          } else { // > 1/2 ulp
@@ -535,18 +537,18 @@ add_and_round (int q3,
        // round the ind-digit result to ind - x0 digits
  
        if (ind <= 18) { // 2 <= ind <= 18
-        __bid_round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
-                      &is_midpoint_lt_even, &is_midpoint_gt_even,
-                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
+        round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
+                     &is_midpoint_lt_even, &is_midpoint_gt_even,
+                     &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
          res.w[1] = 0x0;
          res.w[0] = R64;
        } else if (ind <= 38) {
          P128.w[1] = res.w[1] & MASK_COEFF;
          P128.w[0] = res.w[0];
-        __bid_round128_19_38 (ind, x0, P128, &res, &incr_exp,
-                        &is_midpoint_lt_even, &is_midpoint_gt_even,
-                        &is_inexact_lt_midpoint,
-                        &is_inexact_gt_midpoint);
+        round128_19_38 (ind, x0, P128, &res, &incr_exp,
+                       &is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint);
        }
        e4 = e4 + x0; // expmin
        // we want the exponent to be expmin, so if incr_exp = 1 then
@@ -555,7 +557,7 @@ add_and_round (int q3,
          // 64 x 128 -> 128
          P128.w[1] = res.w[1] & MASK_COEFF;
          P128.w[0] = res.w[0];
-        __mul_64x128_to_128 (res, __bid_ten2k64[1], P128);
+        __mul_64x128_to_128 (res, ten2k64[1], P128);
        }
        res.w[1] =
          p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
@@ -581,7 +583,7 @@ add_and_round (int q3,
          is_midpoint_gt_even = 0;
          is_inexact_gt_midpoint = 1;
        } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
-          !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
+                !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
          // if this second rounding was exact the result may still be 
          // inexact because of the first rounding
          if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
@@ -591,14 +593,14 @@ add_and_round (int q3,
            is_inexact_lt_midpoint = 1;
          }
        } else if (is_midpoint_gt_even &&
-                 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
+                (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
          // pulled up to a midpoint
          is_inexact_lt_midpoint = 1;
          is_inexact_gt_midpoint = 0;
          is_midpoint_lt_even = 0;
          is_midpoint_gt_even = 0;
        } else if (is_midpoint_lt_even &&
-                 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
+                (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
          // pulled down to a midpoint
          is_inexact_lt_midpoint = 0;
          is_inexact_gt_midpoint = 1;
@@ -613,9 +615,9 @@ add_and_round (int q3,
    // apply correction if not rounding to nearest
    if (rnd_mode != ROUNDING_TO_NEAREST) {
      rounding_correction (rnd_mode,
-                         is_inexact_lt_midpoint, is_inexact_gt_midpoint,
-                         is_midpoint_lt_even, is_midpoint_gt_even,
-                         e4, &res, ptrfpsf);
+                        is_inexact_lt_midpoint, is_inexact_gt_midpoint,
+                        is_midpoint_lt_even, is_midpoint_gt_even,
+                        e4, &res, ptrfpsf);
    }
    if (is_midpoint_lt_even || is_midpoint_gt_even ||
        is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
@@ -635,23 +637,31 @@ add_and_round (int q3,
  
  
  #if DECIMAL_CALL_BY_REFERENCE
-void
-__bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
-            _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
-            _EXC_INFO_PARAM) {
+static void
+bid128_ext_fma (int *ptr_is_midpoint_lt_even,
+               int *ptr_is_midpoint_gt_even,
+               int *ptr_is_inexact_lt_midpoint,
+               int *ptr_is_inexact_gt_midpoint, UINT128 * pres,
+               UINT128 * px, UINT128 * py,
+               UINT128 *
+               pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+               _EXC_INFO_PARAM) {
    UINT128 x = *px, y = *py, z = *pz;
  #if !DECIMAL_GLOBAL_ROUNDING
    unsigned int rnd_mode = *prnd_mode;
  #endif
  #else
-UINT128
-__bid128_fma (UINT128 x, UINT128 y, UINT128 z
-            _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
-            _EXC_INFO_PARAM) {
+static UINT128
+bid128_ext_fma (int *ptr_is_midpoint_lt_even,
+               int *ptr_is_midpoint_gt_even,
+               int *ptr_is_inexact_lt_midpoint,
+               int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y,
+               UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
+               _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
  #endif
  
-  UINT128 res = {{ 0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull }};
-  UINT64 x_sign, y_sign, z_sign, p_sign;
+  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
+  UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign;
    UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp;
    int true_p_exp;
    UINT128 C1, C2, C3;
@@ -679,10 +689,13 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
    UINT256 R256;
  
    // the following are based on the table of special cases for fma; the NaN
-  // behavior is similar to that of the IPF fma 
+  // behavior is similar to that of the IA-64 Architecture fma 
  
    // identify cases where at least one operand is NaN
  
+  BID_SWAP128 (x);
+  BID_SWAP128 (y);
+  BID_SWAP128 (z);
    if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
      // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
      // check first for non-canonical NaN payload
@@ -709,6 +722,11 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
          *pfpsf |= INVALID_EXCEPTION;
        }
      }
+    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+    BID_SWAP128 (res);
      BID_RETURN (res)
    } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN
      // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
@@ -735,6 +753,11 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
          *pfpsf |= INVALID_EXCEPTION;
        }
      }
+    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+    BID_SWAP128 (res);
      BID_RETURN (res)
    } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
      // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
@@ -756,6 +779,11 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
        res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
        res.w[0] = x.w[0];
      }
+    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+    BID_SWAP128 (res);
      BID_RETURN (res)
    }
    // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
@@ -774,8 +802,8 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
      } else { // G0_G1 != 11
        x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
        if (C1.w[1] > 0x0001ed09bead87c0ull ||
-          (C1.w[1] == 0x0001ed09bead87c0ull
-          && C1.w[0] > 0x378d8e63ffffffffull)) {
+          (C1.w[1] == 0x0001ed09bead87c0ull &&
+           C1.w[0] > 0x378d8e63ffffffffull)) {
          // x is non-canonical if coefficient is larger than 10^34 -1
          C1.w[1] = 0;
          C1.w[0] = 0;
@@ -797,8 +825,8 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
      } else { // G0_G1 != 11
        y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
        if (C2.w[1] > 0x0001ed09bead87c0ull ||
-          (C2.w[1] == 0x0001ed09bead87c0ull
-          && C2.w[0] > 0x378d8e63ffffffffull)) {
+          (C2.w[1] == 0x0001ed09bead87c0ull &&
+           C2.w[0] > 0x378d8e63ffffffffull)) {
          // y is non-canonical if coefficient is larger than 10^34 -1
          C2.w[1] = 0;
          C2.w[0] = 0;
@@ -820,8 +848,8 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
      } else { // G0_G1 != 11
        z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits
        if (C3.w[1] > 0x0001ed09bead87c0ull ||
-          (C3.w[1] == 0x0001ed09bead87c0ull
-          && C3.w[0] > 0x378d8e63ffffffffull)) {
+          (C3.w[1] == 0x0001ed09bead87c0ull &&
+           C3.w[0] > 0x378d8e63ffffffffull)) {
          // z is non-canonical if coefficient is larger than 10^34 -1
          C3.w[1] = 0;
          C3.w[0] = 0;
@@ -875,6 +903,11 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
        // set invalid flag
        *pfpsf |= INVALID_EXCEPTION;
      }
+    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+    BID_SWAP128 (res);
      BID_RETURN (res)
    } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
      if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
@@ -887,8 +920,8 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
          // set invalid flag
          *pfpsf |= INVALID_EXCEPTION;
        } else {
-        res.w[1] = z.w[1];
-        res.w[0] = z.w[0];
+        res.w[1] = z_sign | MASK_INF;
+        res.w[0] = 0x0;
        }
      } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0
        // z = 0, f, inf
@@ -902,25 +935,31 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
        res.w[1] = p_sign | MASK_INF;
        res.w[0] = 0x0;
      }
+    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+    BID_SWAP128 (res);
      BID_RETURN (res)
    } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
      // x = 0, f and y = 0, f, necessarily
      res.w[1] = z_sign | MASK_INF;
      res.w[0] = 0x0;
+    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+    BID_SWAP128 (res);
      BID_RETURN (res)
    }
  
    true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176;
    if (true_p_exp < -6176)
      p_exp = 0; // cannot be less than EXP_MIN
-  // MACH DEBUG else if (true_p_exp > 6111)
-  //   p_exp = (UINT64) (6111 + 6176) << 49; // cannot be more than EXP_MAX
    else
      p_exp = (UINT64) (true_p_exp + 6176) << 49;
  
-  if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || 
-      (C2.w[1] == 0x0 && C2.w[0] == 0x0)) &&
-      C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
+  if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
      // the result is 0
      if (p_exp < z_exp)
        res.w[1] = p_exp; // preferred exponent
@@ -940,6 +979,11 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
          res.w[0] = 0x0;
        }
      }
+    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+    BID_SWAP128 (res);
      BID_RETURN (res)
    }
    // from this point on, we may need to know the number of decimal digits
@@ -970,12 +1014,12 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
        x_nr_bits =
          65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
      }
-    q1 = __bid_nr_digits[x_nr_bits - 1].digits;
+    q1 = nr_digits[x_nr_bits - 1].digits;
      if (q1 == 0) {
-      q1 = __bid_nr_digits[x_nr_bits - 1].digits1;
-      if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi ||
-          (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi &&
-          C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo))
+      q1 = nr_digits[x_nr_bits - 1].digits1;
+      if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
+          (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
+           C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
          q1++;
      }
    }
@@ -1004,12 +1048,12 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
          64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
      }
  
-    q2 = __bid_nr_digits[y_nr_bits].digits;
+    q2 = nr_digits[y_nr_bits].digits;
      if (q2 == 0) {
-      q2 = __bid_nr_digits[y_nr_bits].digits1;
-      if (C2.w[1] > __bid_nr_digits[y_nr_bits].threshold_hi ||
-          (C2.w[1] == __bid_nr_digits[y_nr_bits].threshold_hi &&
-          C2.w[0] >= __bid_nr_digits[y_nr_bits].threshold_lo))
+      q2 = nr_digits[y_nr_bits].digits1;
+      if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi ||
+          (C2.w[1] == nr_digits[y_nr_bits].threshold_hi &&
+           C2.w[0] >= nr_digits[y_nr_bits].threshold_lo))
          q2++;
      }
    }
@@ -1038,19 +1082,19 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
          64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
      }
  
-    q3 = __bid_nr_digits[z_nr_bits].digits;
+    q3 = nr_digits[z_nr_bits].digits;
      if (q3 == 0) {
-      q3 = __bid_nr_digits[z_nr_bits].digits1;
-      if (C3.w[1] > __bid_nr_digits[z_nr_bits].threshold_hi ||
-          (C3.w[1] == __bid_nr_digits[z_nr_bits].threshold_hi &&
-          C3.w[0] >= __bid_nr_digits[z_nr_bits].threshold_lo))
+      q3 = nr_digits[z_nr_bits].digits1;
+      if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi ||
+          (C3.w[1] == nr_digits[z_nr_bits].threshold_hi &&
+           C3.w[0] >= nr_digits[z_nr_bits].threshold_lo))
          q3++;
      }
    }
  
-  if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) || 
+  if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) ||
        (C2.w[1] == 0x0 && C2.w[0] == 0x0)) {
-      // x = 0 or y = 0
+    // x = 0 or y = 0
      // z = f, necessarily; for 0 + z return z, with the preferred exponent
      // the result is z, but need to get the preferred exponent
      if (z_exp <= p_exp) { // the preferred exponent is z_exp
@@ -1068,20 +1112,25 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
          res.w[0] = z.w[0];
        } else if (q3 <= 19) { // z fits in 64 bits 
          if (scale <= 19) { // 10^scale fits in 64 bits
-          // 64 x 64 C3.w[0] * __bid_ten2k64[scale]
-          __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]);
+          // 64 x 64 C3.w[0] * ten2k64[scale]
+          __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
          } else { // 10^scale fits in 128 bits
-          // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20]
-          __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]);
+          // 64 x 128 C3.w[0] * ten2k128[scale - 20]
+          __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
          }
        } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 
-        // 64 x 128 __bid_ten2k64[scale] * C3
-        __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3);
+        // 64 x 128 ten2k64[scale] * C3
+        __mul_128x64_to_128 (res, ten2k64[scale], C3);
        }
        // subtract scale from the exponent
        z_exp = z_exp - ((UINT64) scale << 49);
        res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
      }
+    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+    BID_SWAP128 (res);
      BID_RETURN (res)
    } else {
      ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
@@ -1101,7 +1150,7 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
    if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
      C4.w[0] = C1.w[0] * C2.w[0];
      // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
-    if (C4.w[0] < __bid_ten2k64[q1 + q2 - 1])
+    if (C4.w[0] < ten2k64[q1 + q2 - 1])
        q4 = q1 + q2 - 1; // q4 in [1, 18]
      else
        q4 = q1 + q2; // q4 in [2, 19]
@@ -1110,7 +1159,7 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
      // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
      __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]);
      // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
-    if (C4.w[1] == 0 && C4.w[0] < __bid_ten2k64[19]) { // 19 = q1+q2-1
+    if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1
        // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
        q4 = 19; // 19 = q1 + q2 - 1
      } else {
@@ -1130,9 +1179,9 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
        __mul_128x64_to_128 (C4, C2.w[0], C1);
      }
      // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
-    if (C4.w[1] < __bid_ten2k128[q1 + q2 - 21].w[1] ||
-        (C4.w[1] == __bid_ten2k128[q1 + q2 - 21].w[1] &&
-        C4.w[0] < __bid_ten2k128[q1 + q2 - 21].w[0])) {
+    if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] ||
+        (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] &&
+         C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) {
        // if (C4.w[1] == 0) // q4 = 20, necessarily
        //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
        // else
@@ -1147,8 +1196,9 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
      // may replace this by 128x128_to192
      __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0
      // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
-    if (C4.w[2] == 0 && (C4.w[1] < __bid_ten2k128[18].w[1] || 
-    (C4.w[1] == __bid_ten2k128[18].w[1] && C4.w[0] < __bid_ten2k128[18].w[0]))) { 
+    if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] ||
+                        (C4.w[1] == ten2k128[18].w[1]
+                         && C4.w[0] < ten2k128[18].w[0]))) {
        // 18 = 38 - 20 = q1+q2-1 - 20
        // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
        q4 = 38; // 38 = q1 + q2 - 1
@@ -1175,11 +1225,11 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
        __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
      }
      // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
-    if (C4.w[2] < __bid_ten2k256[q1 + q2 - 40].w[2] ||
-        (C4.w[2] == __bid_ten2k256[q1 + q2 - 40].w[2] &&
-        (C4.w[1] < __bid_ten2k256[q1 + q2 - 40].w[1] ||
-         (C4.w[1] == __bid_ten2k256[q1 + q2 - 40].w[1] &&
-         C4.w[0] < __bid_ten2k256[q1 + q2 - 40].w[0])))) {
+    if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
+        (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
+         (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
+          (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
+           C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) {
        // if (C4.w[2] == 0) // q4 = 39, necessarily
        //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
        // else
@@ -1200,10 +1250,12 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
        __mul_128x128_to_256 (C4, C1, C2);
      }
      // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
-    if (C4.w[3] == 0 && (C4.w[2] < __bid_ten2k256[18].w[2] || 
-        (C4.w[2] == __bid_ten2k256[18].w[2] && (C4.w[1] < __bid_ten2k256[18].w[1] || 
-        (C4.w[1] == __bid_ten2k256[18].w[1] && C4.w[0] < __bid_ten2k256[18].w[0]))))) { 
-        // 18 = 57 - 39 = q1+q2-1 - 39
+    if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] ||
+                        (C4.w[2] == ten2k256[18].w[2]
+                         && (C4.w[1] < ten2k256[18].w[1]
+                             || (C4.w[1] == ten2k256[18].w[1]
+                                 && C4.w[0] < ten2k256[18].w[0]))))) {
+      // 18 = 57 - 39 = q1+q2-1 - 39
        // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
        q4 = 57; // 57 = q1 + q2 - 1
      } else {
@@ -1221,13 +1273,13 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
      // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
      __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
      // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
-    if (C4.w[3] < __bid_ten2k256[q1 + q2 - 40].w[3] ||
-        (C4.w[3] == __bid_ten2k256[q1 + q2 - 40].w[3] &&
-        (C4.w[2] < __bid_ten2k256[q1 + q2 - 40].w[2] ||
-         (C4.w[2] == __bid_ten2k256[q1 + q2 - 40].w[2] &&
-         (C4.w[1] < __bid_ten2k256[q1 + q2 - 40].w[1] ||
-          (C4.w[1] == __bid_ten2k256[q1 + q2 - 40].w[1] &&
-          C4.w[0] < __bid_ten2k256[q1 + q2 - 40].w[0])))))) {
+    if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] ||
+        (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] &&
+         (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
+          (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
+           (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
+            (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
+             C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) {
        // if (C4.w[3] == 0) // q4 = 58, necessarily
        //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
        // else
@@ -1251,25 +1303,25 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
        if (q4 <= 38) {
          P128.w[1] = C4.w[1];
          P128.w[0] = C4.w[0];
-        __bid_round128_19_38 (q4, x0, P128, &res, &incr_exp,
-                        &is_midpoint_lt_even, &is_midpoint_gt_even,
-                        &is_inexact_lt_midpoint,
-                        &is_inexact_gt_midpoint);
+        round128_19_38 (q4, x0, P128, &res, &incr_exp,
+                       &is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint);
        } else if (q4 <= 57) { // 35 <= q4 <= 57
          P192.w[2] = C4.w[2];
          P192.w[1] = C4.w[1];
          P192.w[0] = C4.w[0];
-        __bid_round192_39_57 (q4, x0, P192, &R192, &incr_exp,
-                        &is_midpoint_lt_even, &is_midpoint_gt_even,
-                        &is_inexact_lt_midpoint,
-                        &is_inexact_gt_midpoint);
+        round192_39_57 (q4, x0, P192, &R192, &incr_exp,
+                       &is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint);
          res.w[0] = R192.w[0];
          res.w[1] = R192.w[1];
        } else { // if (q4 <= 68)
-        __bid_round256_58_76 (q4, x0, C4, &R256, &incr_exp,
-                        &is_midpoint_lt_even, &is_midpoint_gt_even,
-                        &is_inexact_lt_midpoint,
-                        &is_inexact_gt_midpoint);
+        round256_58_76 (q4, x0, C4, &R256, &incr_exp,
+                       &is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint);
          res.w[0] = R256.w[0];
          res.w[1] = R256.w[1];
        }
@@ -1290,15 +1342,15 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
          // res = (C4 * 10^scale) * 10^expmax
          if (q4 <= 19) { // C4 fits in 64 bits
            if (scale <= 19) { // 10^scale fits in 64 bits
-            // 64 x 64 C4.w[0] * __bid_ten2k64[scale]
-            __mul_64x64_to_128MACH (res, C4.w[0], __bid_ten2k64[scale]);
+            // 64 x 64 C4.w[0] * ten2k64[scale]
+            __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]);
            } else { // 10^scale fits in 128 bits
-            // 64 x 128 C4.w[0] * __bid_ten2k128[scale - 20]
-            __mul_128x64_to_128 (res, C4.w[0], __bid_ten2k128[scale - 20]);
+            // 64 x 128 C4.w[0] * ten2k128[scale - 20]
+            __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]);
            }
          } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
-          // 64 x 128 __bid_ten2k64[scale] * CC43
-          __mul_128x64_to_128 (res, __bid_ten2k64[scale], C4);
+          // 64 x 128 ten2k64[scale] * CC43
+          __mul_128x64_to_128 (res, ten2k64[scale], C4);
          }
          e4 = e4 - scale; // expmax
          q4 = q4 + scale;
@@ -1320,12 +1372,17 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
        } else {
          res.w[1] = p_sign | res.w[1];
          rounding_correction (rnd_mode,
-                             is_inexact_lt_midpoint,
-                             is_inexact_gt_midpoint,
-                             is_midpoint_lt_even, is_midpoint_gt_even,
-                             e4, &res, pfpsf);
+                            is_inexact_lt_midpoint,
+                            is_inexact_gt_midpoint,
+                            is_midpoint_lt_even, is_midpoint_gt_even,
+                            e4, &res, pfpsf);
        }
        *pfpsf |= save_fpsf;
+      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+      BID_SWAP128 (res);
        BID_RETURN (res)
      }
      // check for underflow
@@ -1345,13 +1402,13 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
          // the number of decimal digits in res is q4
          if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
            if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
-            __bid_round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
-                          &is_midpoint_lt_even, &is_midpoint_gt_even,
-                          &is_inexact_lt_midpoint,
-                          &is_inexact_gt_midpoint);
+            round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
+                         &is_midpoint_lt_even, &is_midpoint_gt_even,
+                         &is_inexact_lt_midpoint,
+                         &is_inexact_gt_midpoint);
              if (incr_exp) {
                // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
-              R64 = __bid_ten2k64[q4 - x0];
+              R64 = ten2k64[q4 - x0];
              }
              // res.w[1] = 0; (from above)
              res.w[0] = R64;
@@ -1359,19 +1416,19 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
              // 19 <= q4 <= 38
              P128.w[1] = res.w[1];
              P128.w[0] = res.w[0];
-            __bid_round128_19_38 (q4, x0, P128, &res, &incr_exp,
-                            &is_midpoint_lt_even, &is_midpoint_gt_even,
-                            &is_inexact_lt_midpoint,
-                            &is_inexact_gt_midpoint);
+            round128_19_38 (q4, x0, P128, &res, &incr_exp,
+                           &is_midpoint_lt_even, &is_midpoint_gt_even,
+                           &is_inexact_lt_midpoint,
+                           &is_inexact_gt_midpoint);
              if (incr_exp) {
                // increase coefficient by a factor of 10; this will be <= 10^33
                // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
                if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
-                // res.w[1] = 0;
-                res.w[0] = __bid_ten2k64[q4 - x0];
+               // res.w[1] = 0;
+               res.w[0] = ten2k64[q4 - x0];
                } else { // 20 <= q4 - x0 <= 37
-                res.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0];
-                res.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1];
+               res.w[0] = ten2k128[q4 - x0 - 20].w[0];
+               res.w[1] = ten2k128[q4 - x0 - 20].w[1];
                }
              }
            }
@@ -1380,10 +1437,10 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
            // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
            // determine relationship with 1/2 ulp
            if (q4 <= 19) {
-            if (res.w[0] < __bid_midpoint64[q4 - 1]) { // < 1/2 ulp
+            if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
                lt_half_ulp = 1;
                is_inexact_lt_midpoint = 1;
-            } else if (res.w[0] == __bid_midpoint64[q4 - 1]) { // = 1/2 ulp
+            } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
                eq_half_ulp = 1;
                is_midpoint_gt_even = 1;
              } else { // > 1/2 ulp
@@ -1391,13 +1448,13 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
                is_inexact_gt_midpoint = 1;
              }
            } else { // if (q4 <= 34)
-            if (res.w[1] < __bid_midpoint128[q4 - 20].w[1] ||
-                (res.w[1] == __bid_midpoint128[q4 - 20].w[1] &&
-                res.w[0] < __bid_midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
+            if (res.w[1] < midpoint128[q4 - 20].w[1] || 
+                (res.w[1] == midpoint128[q4 - 20].w[1] && 
+                res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
                lt_half_ulp = 1;
                is_inexact_lt_midpoint = 1;
-            } else if (res.w[1] == __bid_midpoint128[q4 - 20].w[1] &&
-                res.w[0] == __bid_midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
+            } else if (res.w[1] == midpoint128[q4 - 20].w[1] && 
+                res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
                eq_half_ulp = 1;
                is_midpoint_gt_even = 1;
              } else { // > 1/2 ulp
@@ -1424,7 +1481,7 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
          }
          // avoid a double rounding error
          if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 
-                       is_midpoint_lt_even) { // double rounding error upward
+            is_midpoint_lt_even) { // double rounding error upward
            // res = res - 1
            res.w[0]--;
            if (res.w[0] == 0xffffffffffffffffull)
@@ -1444,7 +1501,7 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
            is_midpoint_gt_even = 0;
            is_inexact_gt_midpoint = 1;
          } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
-                   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
+                  !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
            // if this second rounding was exact the result may still be 
            // inexact because of the first rounding
            if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
@@ -1454,14 +1511,14 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
              is_inexact_lt_midpoint = 1;
            }
          } else if (is_midpoint_gt_even &&
-                   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
+                  (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
            // pulled up to a midpoint
            is_inexact_lt_midpoint = 1;
            is_inexact_gt_midpoint = 0;
            is_midpoint_lt_even = 0;
            is_midpoint_gt_even = 0;
          } else if (is_midpoint_lt_even &&
-                   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
+                  (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
            // pulled down to a midpoint
            is_inexact_lt_midpoint = 0;
            is_inexact_gt_midpoint = 1;
@@ -1483,20 +1540,21 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
              ; // res and e4 are unchanged
            } else if (q4 <= 19) { // C4 fits in 64 bits
              if (scale <= 19) { // 10^scale fits in 64 bits
-              // 64 x 64 res.w[0] * __bid_ten2k64[scale]
-              __mul_64x64_to_128MACH (res, res.w[0], __bid_ten2k64[scale]);
+              // 64 x 64 res.w[0] * ten2k64[scale]
+              __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]);
              } else { // 10^scale fits in 128 bits
-              // 64 x 128 res.w[0] * __bid_ten2k128[scale - 20]
-              __mul_128x64_to_128 (res, res.w[0], __bid_ten2k128[scale - 20]);
+              // 64 x 128 res.w[0] * ten2k128[scale - 20]
+              __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]);
              }
            } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
-            // 64 x 128 __bid_ten2k64[scale] * C3
-            __mul_128x64_to_128 (res, __bid_ten2k64[scale], res);
+            // 64 x 128 ten2k64[scale] * C3
+            __mul_128x64_to_128 (res, ten2k64[scale], res);
            }
            // subtract scale from the exponent
            e4 = e4 - scale;
          }
        }
+
        // check for inexact result
        if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
            is_midpoint_lt_even || is_midpoint_gt_even) {
@@ -1507,29 +1565,44 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
        res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
        if (rnd_mode != ROUNDING_TO_NEAREST) {
          rounding_correction (rnd_mode,
-                             is_inexact_lt_midpoint,
-                             is_inexact_gt_midpoint,
-                             is_midpoint_lt_even, is_midpoint_gt_even,
-                             e4, &res, pfpsf);
+                            is_inexact_lt_midpoint,
+                            is_inexact_gt_midpoint,
+                            is_midpoint_lt_even, is_midpoint_gt_even,
+                            e4, &res, pfpsf);
        }
        *pfpsf |= save_fpsf;
+      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+      BID_SWAP128 (res);
        BID_RETURN (res)
      }
+    // no overflow, and no underflow for rounding to nearest
+    res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
+
+    if (rnd_mode != ROUNDING_TO_NEAREST) {
+      rounding_correction (rnd_mode,
+                          is_inexact_lt_midpoint,
+                          is_inexact_gt_midpoint,
+                          is_midpoint_lt_even, is_midpoint_gt_even,
+                          e4, &res, pfpsf);
+      // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
+      if (e4 == expmin) {
+        if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
+            ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
+             res.w[0] < 0x38c15b0a00000000ull)) {
+          is_tiny = 1;
+        }
+      }
+    }
  
-    // no overflow and no underflow
      if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
          is_midpoint_lt_even || is_midpoint_gt_even) {
        // set the inexact flag
        *pfpsf |= INEXACT_EXCEPTION;
-    }
-    res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
-
-    if (rnd_mode != ROUNDING_TO_NEAREST) {
-      rounding_correction (rnd_mode,
-                           is_inexact_lt_midpoint,
-                           is_inexact_gt_midpoint,
-                           is_midpoint_lt_even, is_midpoint_gt_even,
-                           e4, &res, pfpsf);
+      if (is_tiny)
+        *pfpsf |= UNDERFLOW_EXCEPTION;
      }
  
      if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact
@@ -1554,21 +1627,26 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
            ; // leave res unchanged
          } else if (q4 <= 19) { // x * y fits in 64 bits
            if (scale <= 19) { // 10^scale fits in 64 bits
-            // 64 x 64 C3.w[0] * __bid_ten2k64[scale] 
-            __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]);
+            // 64 x 64 C3.w[0] * ten2k64[scale] 
+            __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
            } else { // 10^scale fits in 128 bits 
-            // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20]
-            __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]);
+            // 64 x 128 C3.w[0] * ten2k128[scale - 20]
+            __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
            }
            res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
          } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
-          // 64 x 128 __bid_ten2k64[scale] * C3 
-          __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3);
+          // 64 x 128 ten2k64[scale] * C3 
+          __mul_128x64_to_128 (res, ten2k64[scale], C3);
            res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
          }
        } // else leave the result as it is, because p_exp <= z_exp
      }
      *pfpsf |= save_fpsf;
+    *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+    *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+    *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+    *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+    BID_SWAP128 (res);
      BID_RETURN (res)
    } // else we have f * f + f
  
@@ -1578,7 +1656,7 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z
  delta_ge_zero:
    if (delta >= 0) {
  
-    if (p34 <= delta - 1 || // Case (1')
+    if (p34 <= delta - 1 ||    // Case (1')
          (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A)
        // check for overflow, which can occur only in Case (1')
        if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) {
@@ -1602,27 +1680,32 @@ delta_ge_zero:
            } else {
              if (q3 <= 19) { // C3 fits in 64 bits
                if (scale <= 19) { // 10^scale fits in 64 bits
-                // 64 x 64 C3.w[0] * __bid_ten2k64[scale]
-                __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]);
+               // 64 x 64 C3.w[0] * ten2k64[scale]
+               __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
                } else { // 10^scale fits in 128 bits
-                // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20]
-                __mul_128x64_to_128 (res, C3.w[0],
-                                     __bid_ten2k128[scale - 20]);
+               // 64 x 128 C3.w[0] * ten2k128[scale - 20]
+               __mul_128x64_to_128 (res, C3.w[0],
+                                    ten2k128[scale - 20]);
                }
              } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
-              // 64 x 128 __bid_ten2k64[scale] * C3
-              __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3);
+              // 64 x 128 ten2k64[scale] * C3
+              __mul_128x64_to_128 (res, ten2k64[scale], C3);
              }
              // the coefficient in res has q3 + scale = p34 digits
            }
            e3 = e3 - scale;
            res.w[1] = z_sign | res.w[1];
            rounding_correction (rnd_mode,
-                               is_inexact_lt_midpoint,
-                               is_inexact_gt_midpoint,
-                               is_midpoint_lt_even, is_midpoint_gt_even,
-                               e3, &res, pfpsf);
+                              is_inexact_lt_midpoint,
+                              is_inexact_gt_midpoint,
+                              is_midpoint_lt_even, is_midpoint_gt_even,
+                              e3, &res, pfpsf);
          }
+        *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+        *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+        *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+        *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+        BID_SWAP128 (res);
          BID_RETURN (res)
        }
        // res = z
@@ -1638,15 +1721,15 @@ delta_ge_zero:
            res.w[0] = C3.w[0];
          } else if (q3 <= 19) { // z fits in 64 bits
            if (scale <= 19) { // 10^scale fits in 64 bits
-            // 64 x 64 C3.w[0] * __bid_ten2k64[scale]
-            __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]);
+            // 64 x 64 C3.w[0] * ten2k64[scale]
+            __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
            } else { // 10^scale fits in 128 bits
-            // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20]
-            __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]);
+            // 64 x 128 C3.w[0] * ten2k128[scale - 20]
+            __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
            }
          } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
-          // 64 x 128 __bid_ten2k64[scale] * C3
-          __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3);
+          // 64 x 128 ten2k64[scale] * C3
+          __mul_128x64_to_128 (res, ten2k64[scale], C3);
          }
          // the coefficient in res has q3 + scale digits
          // subtract scale from the exponent
@@ -1667,11 +1750,11 @@ delta_ge_zero:
        if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) {
          // there is a gap of exactly one digit between the scaled C3 and C4
          // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
-        if ((q3 <= 19 && C3.w[0] != __bid_ten2k64[q3 - 1]) || 
-            (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != __bid_ten2k64[19])) || 
-            (q3 >= 21 && (C3.w[1] != __bid_ten2k128[q3 - 21].w[1] || 
-            C3.w[0] != __bid_ten2k128[q3 - 21].w[0]))) { 
-            // C3 * 10^ scale != 10^(q3-1)
+        if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) ||
+            (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) ||
+            (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] ||
+                         C3.w[0] != ten2k128[q3 - 21].w[0]))) {
+          // C3 * 10^ scale != 10^(q3-1)
            // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
            // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
            is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value
@@ -1684,35 +1767,35 @@ delta_ge_zero:
              // if q4 > 1 then truncate C4 from q4 digits to 1 digit; 
              // x = q4-1, 1 <= x <= 67 and check if this operation is exact
              if (q4 <= 18) { // 2 <= q4 <= 18
-              __bid_round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
-                            &is_midpoint_lt_even, &is_midpoint_gt_even,
-                            &is_inexact_lt_midpoint,
-                            &is_inexact_gt_midpoint);
+              round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
+                           &is_midpoint_lt_even, &is_midpoint_gt_even,
+                           &is_inexact_lt_midpoint,
+                           &is_inexact_gt_midpoint);
              } else if (q4 <= 38) {
                P128.w[1] = C4.w[1];
                P128.w[0] = C4.w[0];
-              __bid_round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
-                              &is_midpoint_lt_even,
-                              &is_midpoint_gt_even,
-                              &is_inexact_lt_midpoint,
-                              &is_inexact_gt_midpoint);
+              round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
+                             &is_midpoint_lt_even,
+                             &is_midpoint_gt_even,
+                             &is_inexact_lt_midpoint,
+                             &is_inexact_gt_midpoint);
                R64 = R128.w[0]; // one decimal digit
              } else if (q4 <= 57) {
                P192.w[2] = C4.w[2];
                P192.w[1] = C4.w[1];
                P192.w[0] = C4.w[0];
-              __bid_round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
-                              &is_midpoint_lt_even,
-                              &is_midpoint_gt_even,
-                              &is_inexact_lt_midpoint,
-                              &is_inexact_gt_midpoint);
+              round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
+                             &is_midpoint_lt_even,
+                             &is_midpoint_gt_even,
+                             &is_inexact_lt_midpoint,
+                             &is_inexact_gt_midpoint);
                R64 = R192.w[0]; // one decimal digit
              } else { // if (q4 <= 68)
-              __bid_round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
-                              &is_midpoint_lt_even,
-                              &is_midpoint_gt_even,
-                              &is_inexact_lt_midpoint,
-                              &is_inexact_gt_midpoint);
+              round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
+                             &is_midpoint_lt_even,
+                             &is_midpoint_gt_even,
+                             &is_inexact_lt_midpoint,
+                             &is_inexact_gt_midpoint);
                R64 = R256.w[0]; // one decimal digit
              }
              if (incr_exp) {
@@ -1725,7 +1808,7 @@ delta_ge_zero:
              is_midpoint_lt_even = 1;
              is_midpoint_gt_even = 0;
            } else if ((e3 == expmin) ||
-                     R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
+                    R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
              // result does not change
              is_inexact_lt_midpoint = 0;
              is_inexact_gt_midpoint = 1;
@@ -1739,10 +1822,10 @@ delta_ge_zero:
              // result decremented is 10^(q3+scale) - 1
              if ((q3 + scale) <= 19) {
                res.w[1] = 0;
-              res.w[0] = __bid_ten2k64[q3 + scale];
+              res.w[0] = ten2k64[q3 + scale];
              } else { // if ((q3 + scale + 1) <= 35)
-              res.w[1] = __bid_ten2k128[q3 + scale - 20].w[1];
-              res.w[0] = __bid_ten2k128[q3 + scale - 20].w[0];
+              res.w[1] = ten2k128[q3 + scale - 20].w[1];
+              res.w[0] = ten2k128[q3 + scale - 20].w[0];
              }
              res.w[0] = res.w[0] - 1; // borrow never occurs
              z_exp = z_exp - EXP_P1;
@@ -1756,7 +1839,7 @@ delta_ge_zero:
                *pfpsf |= UNDERFLOW_EXCEPTION;
              }
            }
-        }        // end 10^(q3+scale-1)
+        } // end 10^(q3+scale-1)
          // set the inexact flag
          *pfpsf |= INEXACT_EXCEPTION;
        } else {
@@ -1799,22 +1882,26 @@ delta_ge_zero:
        //        endif
        //      endif
        //    endif 
-      if (((e3 == expmin) && ((q3 + scale) < p34)) || 
-          ((e3 == expmin) && ((q3 + scale) == p34) && 
-          ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull) && // 10^33_high
-          (res.w[0] == 0x38c15b0a00000000ull) && // 10^33_low
-          (z_sign != p_sign) &&
-          ((!z_sign && rnd_mode != ROUNDING_UP) || 
+      if ((e3 == expmin && (q3 + scale) < p34) || 
+          (e3 == expmin && (q3 + scale) == p34 && 
+          (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&  // 10^33_high
+          res.w[0] == 0x38c15b0a00000000ull && // 10^33_low
+          z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) || 
            (z_sign && rnd_mode != ROUNDING_DOWN)))) {
          *pfpsf |= UNDERFLOW_EXCEPTION;
        }
        if (rnd_mode != ROUNDING_TO_NEAREST) {
          rounding_correction (rnd_mode,
-                             is_inexact_lt_midpoint,
-                             is_inexact_gt_midpoint,
-                             is_midpoint_lt_even, is_midpoint_gt_even,
-                             e3, &res, pfpsf);
+                            is_inexact_lt_midpoint,
+                            is_inexact_gt_midpoint,
+                            is_midpoint_lt_even, is_midpoint_gt_even,
+                            e3, &res, pfpsf);
        }
+      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+      BID_SWAP128 (res);
        BID_RETURN (res)
  
      } else if (p34 == delta) { // Case (1''B)
@@ -1829,15 +1916,15 @@ delta_ge_zero:
          res.w[0] = C3.w[0];
        } else if (q3 <= 19) { // z fits in 64 bits
          if (scale <= 19) { // 10^scale fits in 64 bits
-          // 64 x 64 C3.w[0] * __bid_ten2k64[scale]
-          __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]);
+          // 64 x 64 C3.w[0] * ten2k64[scale]
+          __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
          } else { // 10^scale fits in 128 bits
-          // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20]
-          __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]);
+          // 64 x 128 C3.w[0] * ten2k128[scale - 20]
+          __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
          }
        } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
-        // 64 x 128 __bid_ten2k64[scale] * C3
-        __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3);
+        // 64 x 128 ten2k64[scale] * C3
+        __mul_128x64_to_128 (res, ten2k64[scale], C3);
        }
        // subtract scale from the exponent
        z_exp = z_exp - ((UINT64) scale << 49);
@@ -1847,55 +1934,55 @@ delta_ge_zero:
        // determine whether x * y is less than, equal to, or greater than 
        // 1/2 ulp (z)
        if (q4 <= 19) {
-        if (C4.w[0] < __bid_midpoint64[q4 - 1]) { // < 1/2 ulp
+        if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
            lt_half_ulp = 1;
-        } else if (C4.w[0] == __bid_midpoint64[q4 - 1]) { // = 1/2 ulp
+        } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
            eq_half_ulp = 1;
          } else { // > 1/2 ulp
            gt_half_ulp = 1;
          }
        } else if (q4 <= 38) {
-        if (C4.w[2] == 0 && (C4.w[1] < __bid_midpoint128[q4 - 20].w[1] || 
-            (C4.w[1] == __bid_midpoint128[q4 - 20].w[1] && 
-            C4.w[0] < __bid_midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
+        if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] || 
+            (C4.w[1] == midpoint128[q4 - 20].w[1] && 
+            C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
            lt_half_ulp = 1;
-        } else if (C4.w[2] == 0 && C4.w[1] == __bid_midpoint128[q4 - 20].w[1] && 
-            C4.w[0] == __bid_midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
+        } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] && 
+            C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
            eq_half_ulp = 1;
          } else { // > 1/2 ulp
            gt_half_ulp = 1;
          }
        } else if (q4 <= 58) {
-        if (C4.w[3] == 0 && (C4.w[2] < __bid_midpoint192[q4 - 39].w[2] || 
-            (C4.w[2] == __bid_midpoint192[q4 - 39].w[2] && 
-            C4.w[1] < __bid_midpoint192[q4 - 39].w[1]) || 
-            (C4.w[2] == __bid_midpoint192[q4 - 39].w[2] && 
-            C4.w[1] == __bid_midpoint192[q4 - 39].w[1] && 
-            C4.w[0] < __bid_midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
+        if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] || 
+            (C4.w[2] == midpoint192[q4 - 39].w[2] && 
+            C4.w[1] < midpoint192[q4 - 39].w[1]) || 
+            (C4.w[2] == midpoint192[q4 - 39].w[2] && 
+            C4.w[1] == midpoint192[q4 - 39].w[1] && 
+            C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
            lt_half_ulp = 1;
-        } else if (C4.w[3] == 0 && C4.w[2] == __bid_midpoint192[q4 - 39].w[2] && 
-            C4.w[1] == __bid_midpoint192[q4 - 39].w[1] && 
-            C4.w[0] == __bid_midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
+        } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] && 
+            C4.w[1] == midpoint192[q4 - 39].w[1] && 
+            C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
            eq_half_ulp = 1;
          } else { // > 1/2 ulp
            gt_half_ulp = 1;
          }
        } else {
-        if (C4.w[3] < __bid_midpoint256[q4 - 59].w[3] || 
-            (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && 
-            C4.w[2] < __bid_midpoint256[q4 - 59].w[2]) || 
-            (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && 
-            C4.w[2] == __bid_midpoint256[q4 - 59].w[2] && 
-            C4.w[1] < __bid_midpoint256[q4 - 59].w[1]) || 
-            (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && 
-            C4.w[2] == __bid_midpoint256[q4 - 59].w[2] && 
-            C4.w[1] == __bid_midpoint256[q4 - 59].w[1] && 
-            C4.w[0] < __bid_midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
+        if (C4.w[3] < midpoint256[q4 - 59].w[3] || 
+            (C4.w[3] == midpoint256[q4 - 59].w[3] && 
+            C4.w[2] < midpoint256[q4 - 59].w[2]) || 
+            (C4.w[3] == midpoint256[q4 - 59].w[3] && 
+            C4.w[2] == midpoint256[q4 - 59].w[2] && 
+            C4.w[1] < midpoint256[q4 - 59].w[1]) || 
+            (C4.w[3] == midpoint256[q4 - 59].w[3] && 
+            C4.w[2] == midpoint256[q4 - 59].w[2] && 
+            C4.w[1] == midpoint256[q4 - 59].w[1] && 
+            C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
            lt_half_ulp = 1;
-        } else if (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && 
-            C4.w[2] == __bid_midpoint256[q4 - 59].w[2] && 
-            C4.w[1] == __bid_midpoint256[q4 - 59].w[1] && 
-            C4.w[0] == __bid_midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
+        } else if (C4.w[3] == midpoint256[q4 - 59].w[3] && 
+            C4.w[2] == midpoint256[q4 - 59].w[2] && 
+            C4.w[1] == midpoint256[q4 - 59].w[1] && 
+            C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
            eq_half_ulp = 1;
          } else { // > 1/2 ulp
            gt_half_ulp = 1;
@@ -1942,14 +2029,20 @@ delta_ge_zero:
            res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
            res.w[0] = 0x0000000000000000ull;
            *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
+          *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+          *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+          *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+          *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+          BID_SWAP128 (res);
            BID_RETURN (res)
          }
          if (rnd_mode != ROUNDING_TO_NEAREST) {
            rounding_correction (rnd_mode,
-                               is_inexact_lt_midpoint,
-                               is_inexact_gt_midpoint,
-                               is_midpoint_lt_even, is_midpoint_gt_even,
-                               e3, &res, pfpsf);
+                              is_inexact_lt_midpoint,
+                              is_inexact_gt_midpoint,
+                              is_midpoint_lt_even, is_midpoint_gt_even,
+                              e3, &res, pfpsf);
+          z_exp = res.w[1] & MASK_EXP;
          }
        } else { // if (p_sign != z_sign)
          // consider two cases, because C3 * 10^scale = 10^33 is a special case
@@ -1985,23 +2078,29 @@ delta_ge_zero:
                *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
              } else {
                rounding_correction (rnd_mode,
-                                   is_inexact_lt_midpoint,
-                                   is_inexact_gt_midpoint,
-                                   is_midpoint_lt_even,
-                                   is_midpoint_gt_even, e3, &res,
-                                   pfpsf);
+                                  is_inexact_lt_midpoint,
+                                  is_inexact_gt_midpoint,
+                                  is_midpoint_lt_even,
+                                  is_midpoint_gt_even, e3, &res,
+                                  pfpsf);
              }
+            *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+            *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+            *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+            *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+            BID_SWAP128 (res);
              BID_RETURN (res)
            }
            // set the inexact flag
            *pfpsf |= INEXACT_EXCEPTION;
            if (rnd_mode != ROUNDING_TO_NEAREST) {
              rounding_correction (rnd_mode,
-                                 is_inexact_lt_midpoint,
-                                 is_inexact_gt_midpoint,
-                                 is_midpoint_lt_even,
-                                 is_midpoint_gt_even, e3, &res, pfpsf);
+                                is_inexact_lt_midpoint,
+                                is_inexact_gt_midpoint,
+                                is_midpoint_lt_even,
+                                is_midpoint_gt_even, e3, &res, pfpsf);
            }
+          z_exp = res.w[1] & MASK_EXP;
          } else { // if C3 * 10^scale = 10^33
            e3 = (z_exp >> 49) - 6176;
            if (e3 > expmin) {
@@ -2019,118 +2118,122 @@ delta_ge_zero:
                // if q4 > 1 then truncate C4 from q4 digits to 1 digit; 
                // x = q4-1, 1 <= x <= 67 and check if this operation is exact
                if (q4 <= 18) { // 2 <= q4 <= 18
-                __bid_round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
-                              &is_midpoint_lt_even,
-                              &is_midpoint_gt_even,
-                              &is_inexact_lt_midpoint,
-                              &is_inexact_gt_midpoint);
+               round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
+                             &is_midpoint_lt_even,
+                             &is_midpoint_gt_even,
+                             &is_inexact_lt_midpoint,
+                             &is_inexact_gt_midpoint);
                } else if (q4 <= 38) {
-                P128.w[1] = C4.w[1];
-                P128.w[0] = C4.w[0];
-                __bid_round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
-                                &is_midpoint_lt_even,
-                                &is_midpoint_gt_even,
-                                &is_inexact_lt_midpoint,
-                                &is_inexact_gt_midpoint);
-                R64 = R128.w[0]; // one decimal digit
+               P128.w[1] = C4.w[1];
+               P128.w[0] = C4.w[0];
+               round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
+                               &is_midpoint_lt_even,
+                               &is_midpoint_gt_even,
+                               &is_inexact_lt_midpoint,
+                               &is_inexact_gt_midpoint);
+               R64 = R128.w[0]; // one decimal digit
                } else if (q4 <= 57) {
-                P192.w[2] = C4.w[2];
-                P192.w[1] = C4.w[1];
-                P192.w[0] = C4.w[0];
-                __bid_round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
-                                &is_midpoint_lt_even,
-                                &is_midpoint_gt_even,
-                                &is_inexact_lt_midpoint,
-                                &is_inexact_gt_midpoint);
-                R64 = R192.w[0]; // one decimal digit
+               P192.w[2] = C4.w[2];
+               P192.w[1] = C4.w[1];
+               P192.w[0] = C4.w[0];
+               round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
+                               &is_midpoint_lt_even,
+                               &is_midpoint_gt_even,
+                               &is_inexact_lt_midpoint,
+                               &is_inexact_gt_midpoint);
+               R64 = R192.w[0]; // one decimal digit
                } else { // if (q4 <= 68)
-                __bid_round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
-                                &is_midpoint_lt_even,
-                                &is_midpoint_gt_even,
-                                &is_inexact_lt_midpoint,
-                                &is_inexact_gt_midpoint);
-                R64 = R256.w[0]; // one decimal digit
+               round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
+                               &is_midpoint_lt_even,
+                               &is_midpoint_gt_even,
+                               &is_inexact_lt_midpoint,
+                               &is_inexact_gt_midpoint);
+               R64 = R256.w[0]; // one decimal digit
                }
                if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
-                  !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
-                // the result is exact: 10^34 - R64
-                // incr_exp = 0 with certainty
-                z_exp = z_exp - EXP_P1;
-                e3 = e3 - 1;
-                res.w[1] =
-                  z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
-                res.w[0] = 0x378d8e6400000000ull - R64;
+                 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
+               // the result is exact: 10^34 - R64
+               // incr_exp = 0 with certainty
+               z_exp = z_exp - EXP_P1;
+               e3 = e3 - 1;
+               res.w[1] =
+                 z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
+               res.w[0] = 0x378d8e6400000000ull - R64;
                } else {
-                // We want R64 to be the top digit of C4, but we actually 
-                // obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
-                // because the top digit is (C4 * 10^(-q4+1))RZ
-                // however, if incr_exp = 1 then R64 = 10 with certainty
-                if (incr_exp) {
-                  R64 = 10;
-                }
-                // the result is inexact as C4 has more than 1 significant digit
-                // and C3 * 10^scale = 10^33
-                // example of case that is treated here:
-                // 100...0 * 10^e3 - 0.41 * 10^e3 =
-                // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
-                // note that (e3 > expmin}
-                // in order to round, subtract R64 from 10^34 and then compare
-                // C4 - R64 * 10^(q4-1) with 1/2 ulp
-                // calculate 10^34 - R64
-                res.w[1] = 0x0001ed09bead87c0ull;
-                res.w[0] = 0x378d8e6400000000ull - R64;
-                z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
-                // calculate C4 - R64 * 10^(q4-1); this is a rare case and
-                // R64 is small, 1 <= R64 <= 9
-                e3 = e3 - 1;
-                if (is_inexact_lt_midpoint) {
-                  is_inexact_lt_midpoint = 0;
-                  is_inexact_gt_midpoint = 1;
-                } else if (is_inexact_gt_midpoint) {
-                  is_inexact_gt_midpoint = 0;
-                  is_inexact_lt_midpoint = 1;
-                } else if (is_midpoint_lt_even) {
-                  is_midpoint_lt_even = 0;
-                  is_midpoint_gt_even = 1;
-                } else if (is_midpoint_gt_even) {
-                  is_midpoint_gt_even = 0;
-                  is_midpoint_lt_even = 1;
-                } else {
-                  ;
-                }
-                // the result is always inexact, and never tiny
-                // check for overflow for RN
-                if (e3 > expmax) {
-                  if (rnd_mode == ROUNDING_TO_NEAREST) {
-                    res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
-                    res.w[0] = 0x0000000000000000ull;
-                    *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
-                  } else {
-                    rounding_correction (rnd_mode,
-                                         is_inexact_lt_midpoint,
-                                         is_inexact_gt_midpoint,
-                                         is_midpoint_lt_even,
-                                         is_midpoint_gt_even, e3, &res,
-                                         pfpsf);
-                  }
-                  BID_RETURN (res)
-                }
-                // set the inexact flag
-                *pfpsf |= INEXACT_EXCEPTION;
-                res.w[1] =
-                  z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
-                if (rnd_mode != ROUNDING_TO_NEAREST) {
-                  rounding_correction (rnd_mode,
-                                       is_inexact_lt_midpoint,
-                                       is_inexact_gt_midpoint,
-                                       is_midpoint_lt_even,
-                                       is_midpoint_gt_even, e3, &res,
-                                       pfpsf);
-                }
-                z_exp = res.w[1] & MASK_EXP;
-                e3 = (z_exp >> 49) - 6176;
-              }        // end result is inexact
-            }        // end q4 > 1
+               // We want R64 to be the top digit of C4, but we actually 
+               // obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
+               // because the top digit is (C4 * 10^(-q4+1))RZ
+               // however, if incr_exp = 1 then R64 = 10 with certainty
+               if (incr_exp) {
+                 R64 = 10;
+               }
+               // the result is inexact as C4 has more than 1 significant digit
+               // and C3 * 10^scale = 10^33
+               // example of case that is treated here:
+               // 100...0 * 10^e3 - 0.41 * 10^e3 =
+               // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
+               // note that (e3 > expmin}
+               // in order to round, subtract R64 from 10^34 and then compare
+               // C4 - R64 * 10^(q4-1) with 1/2 ulp
+               // calculate 10^34 - R64
+               res.w[1] = 0x0001ed09bead87c0ull;
+               res.w[0] = 0x378d8e6400000000ull - R64;
+               z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
+               // calculate C4 - R64 * 10^(q4-1); this is a rare case and
+               // R64 is small, 1 <= R64 <= 9
+               e3 = e3 - 1;
+               if (is_inexact_lt_midpoint) {
+                 is_inexact_lt_midpoint = 0;
+                 is_inexact_gt_midpoint = 1;
+               } else if (is_inexact_gt_midpoint) {
+                 is_inexact_gt_midpoint = 0;
+                 is_inexact_lt_midpoint = 1;
+               } else if (is_midpoint_lt_even) {
+                 is_midpoint_lt_even = 0;
+                 is_midpoint_gt_even = 1;
+               } else if (is_midpoint_gt_even) {
+                 is_midpoint_gt_even = 0;
+                 is_midpoint_lt_even = 1;
+               } else {
+                 ;
+               }
+               // the result is always inexact, and never tiny
+               // check for overflow for RN
+               if (e3 > expmax) {
+                 if (rnd_mode == ROUNDING_TO_NEAREST) {
+                   res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
+                   res.w[0] = 0x0000000000000000ull;
+                   *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
+                 } else {
+                   rounding_correction (rnd_mode,
+                                        is_inexact_lt_midpoint,
+                                        is_inexact_gt_midpoint,
+                                        is_midpoint_lt_even,
+                                        is_midpoint_gt_even, e3, &res,
+                                        pfpsf);
+                 }
+                 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+                 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+                 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+                 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+                 BID_SWAP128 (res);
+                 BID_RETURN (res)
+               }
+               // set the inexact flag
+               *pfpsf |= INEXACT_EXCEPTION;
+               res.w[1] =
+                 z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
+               if (rnd_mode != ROUNDING_TO_NEAREST) {
+                 rounding_correction (rnd_mode,
+                                      is_inexact_lt_midpoint,
+                                      is_inexact_gt_midpoint,
+                                      is_midpoint_lt_even,
+                                      is_midpoint_gt_even, e3, &res,
+                                      pfpsf);
+               }
+               z_exp = res.w[1] & MASK_EXP;
+              } // end result is inexact
+            } // end q4 > 1
            } else { // if (e3 = emin)
              // if e3 = expmin the result is also tiny (the condition for
              // tininess is C4 > 050...0 [q4 digits] which is met because
@@ -2161,27 +2264,33 @@ delta_ge_zero:
  
              if (rnd_mode != ROUNDING_TO_NEAREST) {
                rounding_correction (rnd_mode,
-                                   is_inexact_lt_midpoint,
-                                   is_inexact_gt_midpoint,
-                                   is_midpoint_lt_even,
-                                   is_midpoint_gt_even, e3, &res,
-                                   pfpsf);
+                  is_inexact_lt_midpoint,
+                  is_inexact_gt_midpoint,
+                  is_midpoint_lt_even,
+                  is_midpoint_gt_even, e3, &res,
+                  pfpsf);
+              z_exp = res.w[1] & MASK_EXP;
              }
-          }        // end e3 = emin
+          } // end e3 = emin
            // set the inexact flag (if the result was not exact)
            if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
                is_midpoint_lt_even || is_midpoint_gt_even)
              *pfpsf |= INEXACT_EXCEPTION;
-        }        // end 10^33
-      }        // end if (p_sign != z_sign)
+        } // end 10^33
+      } // end if (p_sign != z_sign)
        res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
+      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+      BID_SWAP128 (res);
        BID_RETURN (res)
  
      } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
          (q3 <= delta && delta + q4 <= p34) || // Case (3)
          (delta < q3 && p34 < delta + q4) || // Case (4)
          (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5)
-        (delta + q4 < q3)) &&// Case (6)
+        (delta + q4 < q3)) && // Case (6)
          !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6)
  
        // the result has the sign of z
@@ -2200,15 +2309,15 @@ delta_ge_zero:
          scale = q3 - delta - q4; // 1 <= scale <= 33
          if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
            if (scale <= 19) { // 10^scale fits in 64 bits
-            // 64 x 64 C4.w[0] * __bid_ten2k64[scale]
-            __mul_64x64_to_128MACH (P128, C4.w[0], __bid_ten2k64[scale]);
+            // 64 x 64 C4.w[0] * ten2k64[scale]
+            __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]);
            } else { // 10^scale fits in 128 bits
-            // 64 x 128 C4.w[0] * __bid_ten2k128[scale - 20]
-            __mul_128x64_to_128 (P128, C4.w[0], __bid_ten2k128[scale - 20]);
+            // 64 x 128 C4.w[0] * ten2k128[scale - 20]
+            __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]);
            }
          } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
-          // 64 x 128 __bid_ten2k64[scale] * C4
-          __mul_128x64_to_128 (P128, __bid_ten2k64[scale], C4);
+          // 64 x 128 ten2k64[scale] * C4
+          __mul_128x64_to_128 (P128, ten2k64[scale], C4);
          }
          C4.w[0] = P128.w[0];
          C4.w[1] = P128.w[1];
@@ -2237,15 +2346,15 @@ delta_ge_zero:
          res.w[0] = C3.w[0];
        } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits
          if (scale <= 19) { // 10^scale fits in 64 bits
-          // 64 x 64 C3.w[0] * __bid_ten2k64[scale]
-          __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]);
+          // 64 x 64 C3.w[0] * ten2k64[scale]
+          __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
          } else { // 10^scale fits in 128 bits
-          // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20]
-          __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]);
+          // 64 x 128 C3.w[0] * ten2k128[scale - 20]
+          __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
          }
        } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
-        // 64 x 128 __bid_ten2k64[scale] * C3
-        __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3);
+        // 64 x 128 ten2k64[scale] * C3
+        __mul_128x64_to_128 (res, ten2k64[scale], C3);
        }
        // e3 is already calculated
        e3 = e3 - scale;
@@ -2260,17 +2369,17 @@ delta_ge_zero:
        // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
        // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
        if (x0 == 0) { // this could happen only if we return to case2_repeat, or
-        // for Case (6)
+        // for Case (3) or Case (6)
          R128.w[1] = C4.w[1];
          R128.w[0] = C4.w[0];
        } else if (q4 <= 18) {
          // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
-        __bid_round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
-                      &is_midpoint_lt_even, &is_midpoint_gt_even,
-                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
+        round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
+            &is_midpoint_lt_even, &is_midpoint_gt_even,
+            &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
          if (incr_exp) {
            // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
-          R64 = __bid_ten2k64[q4 - x0];
+          R64 = ten2k64[q4 - x0];
          }
          R128.w[1] = 0;
          R128.w[0] = R64;
@@ -2278,18 +2387,18 @@ delta_ge_zero:
          // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
          P128.w[1] = C4.w[1];
          P128.w[0] = C4.w[0];
-        __bid_round128_19_38 (q4, x0, P128, &R128, &incr_exp,
-                        &is_midpoint_lt_even, &is_midpoint_gt_even,
-                        &is_inexact_lt_midpoint,
-                        &is_inexact_gt_midpoint);
+        round128_19_38 (q4, x0, P128, &R128, &incr_exp,
+            &is_midpoint_lt_even, &is_midpoint_gt_even,
+            &is_inexact_lt_midpoint,
+            &is_inexact_gt_midpoint);
          if (incr_exp) {
            // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
            if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
-            R128.w[0] = __bid_ten2k64[q4 - x0];
+            R128.w[0] = ten2k64[q4 - x0];
              // R128.w[1] stays 0
            } else { // 20 <= q4 - x0 <= 37
-            R128.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0];
-            R128.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1];
+            R128.w[0] = ten2k128[q4 - x0 - 20].w[0];
+            R128.w[1] = ten2k128[q4 - x0 - 20].w[1];
            }
          }
        } else if (q4 <= 57) {
@@ -2297,20 +2406,20 @@ delta_ge_zero:
          P192.w[2] = C4.w[2];
          P192.w[1] = C4.w[1];
          P192.w[0] = C4.w[0];
-        __bid_round192_39_57 (q4, x0, P192, &R192, &incr_exp,
-                        &is_midpoint_lt_even, &is_midpoint_gt_even,
-                        &is_inexact_lt_midpoint,
-                        &is_inexact_gt_midpoint);
+        round192_39_57 (q4, x0, P192, &R192, &incr_exp,
+            &is_midpoint_lt_even, &is_midpoint_gt_even,
+            &is_inexact_lt_midpoint,
+            &is_inexact_gt_midpoint);
          // R192.w[2] is always 0
          if (incr_exp) {
            // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
            if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
-            R192.w[0] = __bid_ten2k64[q4 - x0];
+            R192.w[0] = ten2k64[q4 - x0];
              // R192.w[1] stays 0
              // R192.w[2] stays 0
            } else { // 20 <= q4 - x0 <= 33
-            R192.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0];
-            R192.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1];
+            R192.w[0] = ten2k128[q4 - x0 - 20].w[0];
+            R192.w[1] = ten2k128[q4 - x0 - 20].w[1];
              // R192.w[2] stays 0
            }
          }
@@ -2318,21 +2427,21 @@ delta_ge_zero:
          R128.w[0] = R192.w[0];
        } else {
          // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
-        __bid_round256_58_76 (q4, x0, C4, &R256, &incr_exp,
-                        &is_midpoint_lt_even, &is_midpoint_gt_even,
-                        &is_inexact_lt_midpoint,
-                        &is_inexact_gt_midpoint);
+        round256_58_76 (q4, x0, C4, &R256, &incr_exp,
+            &is_midpoint_lt_even, &is_midpoint_gt_even,
+            &is_inexact_lt_midpoint,
+            &is_inexact_gt_midpoint);
          // R256.w[3] and R256.w[2] are always 0
          if (incr_exp) {
            // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
            if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19  
-            R256.w[0] = __bid_ten2k64[q4 - x0];
+            R256.w[0] = ten2k64[q4 - x0];
              // R256.w[1] stays 0
              // R256.w[2] stays 0
              // R256.w[3] stays 0
            } else { // 20 <= q4 - x0 <= 33 
-            R256.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0];
-            R256.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1];
+            R256.w[0] = ten2k128[q4 - x0 - 20].w[0];
+            R256.w[1] = ten2k128[q4 - x0 - 20].w[1];
              // R256.w[2] stays 0
              // R256.w[3] stays 0
            }
@@ -2352,7 +2461,7 @@ delta_ge_zero:
          // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
          if (res.w[1] > 0x0001ed09bead87c0ull ||
              (res.w[1] == 0x0001ed09bead87c0ull &&
-            res.w[0] > 0x378d8e63ffffffffull)) {
+             res.w[0] > 0x378d8e63ffffffffull)) {
            // avoid double rounding error
            is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
            is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
@@ -2364,10 +2473,10 @@ delta_ge_zero:
            is_midpoint_gt_even = 0;
            P128.w[1] = res.w[1];
            P128.w[0] = res.w[0];
-          __bid_round128_19_38 (35, 1, P128, &res, &incr_exp,
-                          &is_midpoint_lt_even, &is_midpoint_gt_even,
-                          &is_inexact_lt_midpoint,
-                          &is_inexact_gt_midpoint);
+          round128_19_38 (35, 1, P128, &res, &incr_exp,
+              &is_midpoint_lt_even, &is_midpoint_gt_even,
+              &is_inexact_lt_midpoint,
+              &is_inexact_gt_midpoint);
            // incr_exp is 0 with certainty in this case
            // avoid a double rounding error
            if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 
@@ -2391,8 +2500,8 @@ delta_ge_zero:
              is_midpoint_gt_even = 0;
              is_inexact_gt_midpoint = 1;
            } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
-                     !is_inexact_lt_midpoint
-                     && !is_inexact_gt_midpoint) {
+                    !is_inexact_lt_midpoint
+                    && !is_inexact_gt_midpoint) {
              // if this second rounding was exact the result may still be 
              // inexact because of the first rounding
              if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
@@ -2402,14 +2511,16 @@ delta_ge_zero:
                is_inexact_lt_midpoint = 1;
              }
            } else if (is_midpoint_gt_even &&
-              (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
+                    (is_inexact_gt_midpoint0
+                     || is_midpoint_lt_even0)) {
              // pulled up to a midpoint
              is_inexact_lt_midpoint = 1;
              is_inexact_gt_midpoint = 0;
              is_midpoint_lt_even = 0;
              is_midpoint_gt_even = 0;
            } else if (is_midpoint_lt_even &&
-              (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
+                    (is_inexact_lt_midpoint0
+                     || is_midpoint_gt_even0)) {
              // pulled down to a midpoint
              is_inexact_lt_midpoint = 0;
              is_inexact_gt_midpoint = 1;
@@ -2423,7 +2534,7 @@ delta_ge_zero:
            if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
                !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
              if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
-                is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
+               is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
                is_inexact_lt_midpoint = 1;
              }
            }
@@ -2438,14 +2549,14 @@ delta_ge_zero:
                is_midpoint_lt_even = 1;
                res.w[0]++;
                if (res.w[0] == 0x0)
-                res.w[1]++;
+               res.w[1]++;
                // check for rounding overflow
                if (res.w[1] == 0x0001ed09bead87c0ull &&
-                  res.w[0] == 0x378d8e6400000000ull) {
-                // res = 10^34 => rounding overflow
-                res.w[1] = 0x0000314dc6448d93ull;
-                res.w[0] = 0x38c15b0a00000000ull; // 10^33
-                e3++;
+                 res.w[0] == 0x378d8e6400000000ull) {
+               // res = 10^34 => rounding overflow
+               res.w[1] = 0x0000314dc6448d93ull;
+               res.w[0] = 0x38c15b0a00000000ull; // 10^33
+               e3++;
                }
              } else if (is_midpoint_lt_even) {
                // res = res - 1
@@ -2453,18 +2564,23 @@ delta_ge_zero:
                is_midpoint_gt_even = 1;
                res.w[0]--;
                if (res.w[0] == 0xffffffffffffffffull)
-                res.w[1]--;
+               res.w[1]--;
                // if the result is pure zero, the sign depends on the rounding 
                // mode (x*y and z had opposite signs)
                if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
-                if (rnd_mode != ROUNDING_DOWN)
-                  z_sign = 0x0000000000000000ull;
-                else
-                  z_sign = 0x8000000000000000ull;
-                // the exponent is max (e3, expmin)
-                res.w[1] = 0x0;
-                res.w[0] = 0x0;
-                BID_RETURN (res)
+               if (rnd_mode != ROUNDING_DOWN)
+                 z_sign = 0x0000000000000000ull;
+               else
+                 z_sign = 0x8000000000000000ull;
+               // the exponent is max (e3, expmin)
+               res.w[1] = 0x0;
+               res.w[0] = 0x0;
+               *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+               *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+               *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+               *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+               BID_SWAP128 (res);
+               BID_RETURN (res)
                }
              } else {
                ;
@@ -2482,11 +2598,13 @@ delta_ge_zero:
            res.w[1]--; // borrow
          // if res < 10^33 and exp > expmin need to decrease x0 and 
          // increase scale by 1
-        if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull || 
-            (res.w[1] == 0x0000314dc6448d93ull && 
-            res.w[0] < 0x38c15b0a00000000ull)) || 
-            (is_inexact_lt_midpoint && res.w[1] == 0x0000314dc6448d93ull && 
-            res.w[0] == 0x38c15b0a00000000ull)) && x0 >= 1) {
+        if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull ||
+                            (res.w[1] == 0x0000314dc6448d93ull &&
+                             res.w[0] < 0x38c15b0a00000000ull)) ||
+                           (is_inexact_lt_midpoint
+                            && res.w[1] == 0x0000314dc6448d93ull
+                            && res.w[0] == 0x38c15b0a00000000ull))
+            && x0 >= 1) {
            x0 = x0 - 1;
            // first restore e3, otherwise it will be too small
            e3 = e3 + scale;
@@ -2498,7 +2616,7 @@ delta_ge_zero:
            incr_exp = 0;
            goto case2_repeat;
          }
-        // else this is the result rounded with unbounded exponent
+        // else this is the result rounded with unbounded exponent;
          // because the result has opposite sign to that of C4 which was 
          // rounded, need to change the rounding indicators
          if (is_inexact_lt_midpoint) {
@@ -2525,7 +2643,7 @@ delta_ge_zero:
                res.w[1]++;
              // check for rounding overflow
              if (res.w[1] == 0x0001ed09bead87c0ull &&
-                res.w[0] == 0x378d8e6400000000ull) {
+               res.w[0] == 0x378d8e6400000000ull) {
                // res = 10^34 => rounding overflow
                res.w[1] = 0x0000314dc6448d93ull;
                res.w[0] = 0x38c15b0a00000000ull; // 10^33
@@ -2540,12 +2658,17 @@ delta_ge_zero:
              // mode (x*y and z had opposite signs)
              if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
                if (rnd_mode != ROUNDING_DOWN)
-                z_sign = 0x0000000000000000ull;
+               z_sign = 0x0000000000000000ull;
                else
-                z_sign = 0x8000000000000000ull;
+               z_sign = 0x8000000000000000ull;
                // the exponent is max (e3, expmin)
                res.w[1] = 0x0;
                res.w[0] = 0x0;
+              *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+              *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+              *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+              *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+              BID_SWAP128 (res);
                BID_RETURN (res)
              }
            } else {
@@ -2556,7 +2679,13 @@ delta_ge_zero:
          }
        }
        // check for underflow
-      if (e3 < expmin) {
+      if (e3 == expmin) { // and if significand < 10^33 => result is tiny
+        if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
+            ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
+             res.w[0] < 0x38c15b0a00000000ull)) {
+          is_tiny = 1;
+        }
+      } else if (e3 < expmin) {
          // the result is tiny, so we must truncate more of res
          is_tiny = 1;
          x0 = expmin - e3;
@@ -2572,21 +2701,21 @@ delta_ge_zero:
          if (res.w[1] == 0x0) {
            // between 1 and 19 digits
            for (ind = 1; ind <= 19; ind++) {
-            if (res.w[0] < __bid_ten2k64[ind]) {
+            if (res.w[0] < ten2k64[ind]) {
                break;
              }
            }
            // ind digits
-        } else if ((res.w[1] < __bid_ten2k128[0].w[1] ||
-                    (res.w[1] == __bid_ten2k128[0].w[1]
-                    && res.w[0] < __bid_ten2k128[0].w[0]))) {
+        } else if (res.w[1] < ten2k128[0].w[1] ||
+                  (res.w[1] == ten2k128[0].w[1]
+                   && res.w[0] < ten2k128[0].w[0])) {
            // 20 digits
            ind = 20;
          } else { // between 21 and 38 digits
            for (ind = 1; ind <= 18; ind++) {
-            if (res.w[1] < __bid_ten2k128[ind].w[1] ||
-                (res.w[1] == __bid_ten2k128[ind].w[1] &&
-                res.w[0] < __bid_ten2k128[ind].w[0])) {
+            if (res.w[1] < ten2k128[ind].w[1] ||
+               (res.w[1] == ten2k128[ind].w[1] &&
+                res.w[0] < ten2k128[ind].w[0])) {
                break;
              }
            }
@@ -2595,7 +2724,7 @@ delta_ge_zero:
          }
  
          // at this point ind >= x0; because delta >= 2 on this path, the case
-        // ind = x0 cab occur only in Case (2) or case (3), when C3 has one
+        // ind = x0 can occur only in Case (2) or case (3), when C3 has one
          // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin), 
          // the signs of x * y and z are opposite, and through cancellation 
          // the most significant decimal digit in res has the weight
@@ -2611,13 +2740,13 @@ delta_ge_zero:
            is_inexact_gt_midpoint = 1;
          } else if (ind <= 18) { // check that 2 <= ind
            // 2 <= ind <= 18, 1 <= x0 <= 17
-          __bid_round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
-                        &is_midpoint_lt_even, &is_midpoint_gt_even,
-                        &is_inexact_lt_midpoint,
-                        &is_inexact_gt_midpoint);
+          round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
+                       &is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint);
            if (incr_exp) {
-            // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
-            R64 = __bid_ten2k64[q4 - x0];
+            // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
+            R64 = ten2k64[ind - x0];
            }
            res.w[1] = 0;
            res.w[0] = R64;
@@ -2625,18 +2754,18 @@ delta_ge_zero:
            // 19 <= ind <= 38
            P128.w[1] = res.w[1];
            P128.w[0] = res.w[0];
-          __bid_round128_19_38 (ind, x0, P128, &res, &incr_exp,
-                          &is_midpoint_lt_even, &is_midpoint_gt_even,
-                          &is_inexact_lt_midpoint,
-                          &is_inexact_gt_midpoint);
+          round128_19_38 (ind, x0, P128, &res, &incr_exp,
+                         &is_midpoint_lt_even, &is_midpoint_gt_even,
+                         &is_inexact_lt_midpoint,
+                         &is_inexact_gt_midpoint);
            if (incr_exp) {
-            // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
+            // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
              if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19
-              res.w[0] = __bid_ten2k64[ind - x0];
+              res.w[0] = ten2k64[ind - x0];
                // res.w[1] stays 0
              } else { // 20 <= ind - x0 <= 37
-              res.w[0] = __bid_ten2k128[ind - x0 - 20].w[0];
-              res.w[1] = __bid_ten2k128[ind - x0 - 20].w[1];
+              res.w[0] = ten2k128[ind - x0 - 20].w[0];
+              res.w[1] = ten2k128[ind - x0 - 20].w[1];
              }
            }
          }
@@ -2662,7 +2791,7 @@ delta_ge_zero:
            is_midpoint_gt_even = 0;
            is_inexact_gt_midpoint = 1;
          } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
-                   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
+                  !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
            // if this second rounding was exact the result may still be 
            // inexact because of the first rounding
            if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
@@ -2672,14 +2801,14 @@ delta_ge_zero:
              is_inexact_lt_midpoint = 1;
            }
          } else if (is_midpoint_gt_even &&
-                   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
+                  (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
            // pulled up to a midpoint
            is_inexact_lt_midpoint = 1;
            is_inexact_gt_midpoint = 0;
            is_midpoint_lt_even = 0;
            is_midpoint_gt_even = 0;
          } else if (is_midpoint_lt_even &&
-                   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
+                  (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
            // pulled down to a midpoint
            is_inexact_lt_midpoint = 0;
            is_inexact_gt_midpoint = 1;
@@ -2697,6 +2826,8 @@ delta_ge_zero:
              is_inexact_lt_midpoint = 1;
            }
          }
+      } else {
+        ; // not underflow
        }
        // check for inexact result
        if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
@@ -2723,11 +2854,16 @@ delta_ge_zero:
        }
        if (rnd_mode != ROUNDING_TO_NEAREST) {
          rounding_correction (rnd_mode,
-                             is_inexact_lt_midpoint,
-                             is_inexact_gt_midpoint,
-                             is_midpoint_lt_even, is_midpoint_gt_even,
-                             e3, &res, pfpsf);
+                            is_inexact_lt_midpoint,
+                            is_inexact_gt_midpoint,
+                            is_midpoint_lt_even, is_midpoint_gt_even,
+                            e3, &res, pfpsf);
        }
+      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+      BID_SWAP128 (res);
        BID_RETURN (res)
  
      } else {
@@ -2758,9 +2894,9 @@ delta_ge_zero:
          ind = e3;
          e3 = e4;
          e4 = ind;
-        tmp64 = z_sign;
+        tmp_sign = z_sign;
          z_sign = p_sign;
-        p_sign = tmp64;
+        p_sign = tmp_sign;
        } else { // from Cases (2), (3), (4), (5)
          // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be 
          // scaled up by q4 + delta - q3; this is the same as in Cases (15), 
@@ -2768,9 +2904,14 @@ delta_ge_zero:
          delta = -delta;
        }
        add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
-                     rnd_mode, &is_midpoint_lt_even,
-                     &is_midpoint_gt_even, &is_inexact_lt_midpoint,
-                     &is_inexact_gt_midpoint, pfpsf, &res);
+                    rnd_mode, &is_midpoint_lt_even,
+                    &is_midpoint_gt_even, &is_inexact_lt_midpoint,
+                    &is_inexact_gt_midpoint, pfpsf, &res);
+      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+      BID_SWAP128 (res);
        BID_RETURN (res)
  
      }
@@ -2787,25 +2928,25 @@ delta_ge_zero:
        if (q4 <= 38) {
          P128.w[1] = C4.w[1];
          P128.w[0] = C4.w[0];
-        __bid_round128_19_38 (q4, x0, P128, &res, &incr_exp,
-                        &is_midpoint_lt_even, &is_midpoint_gt_even,
-                        &is_inexact_lt_midpoint,
-                        &is_inexact_gt_midpoint);
+        round128_19_38 (q4, x0, P128, &res, &incr_exp,
+                       &is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint);
        } else if (q4 <= 57) { // 35 <= q4 <= 57
          P192.w[2] = C4.w[2];
          P192.w[1] = C4.w[1];
          P192.w[0] = C4.w[0];
-        __bid_round192_39_57 (q4, x0, P192, &R192, &incr_exp,
-                        &is_midpoint_lt_even, &is_midpoint_gt_even,
-                        &is_inexact_lt_midpoint,
-                        &is_inexact_gt_midpoint);
+        round192_39_57 (q4, x0, P192, &R192, &incr_exp,
+                       &is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint);
          res.w[0] = R192.w[0];
          res.w[1] = R192.w[1];
        } else { // if (q4 <= 68)
-        __bid_round256_58_76 (q4, x0, C4, &R256, &incr_exp,
-                        &is_midpoint_lt_even, &is_midpoint_gt_even,
-                        &is_inexact_lt_midpoint,
-                        &is_inexact_gt_midpoint);
+        round256_58_76 (q4, x0, C4, &R256, &incr_exp,
+                       &is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint);
          res.w[0] = R256.w[0];
          res.w[1] = R256.w[1];
        }
@@ -2833,34 +2974,34 @@ delta_ge_zero:
                is_inexact_gt_midpoint = 1;
              } else { // if (delta == p34 + 1)
                if (q3 <= 19) {
-                if (C3.w[0] < __bid_midpoint64[q3 - 1]) { // C3 < 1/2 ulp
-                  // res = 10^33, unchanged
-                  is_inexact_gt_midpoint = 1;
-                } else if (C3.w[0] == __bid_midpoint64[q3 - 1]) { // C3 = 1/2 ulp
-                  // res = 10^33, unchanged
-                  is_midpoint_lt_even = 1;
-                } else { // if (C3.w[0] > __bid_midpoint64[q3-1]), C3 > 1/2 ulp
-                  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
-                  res.w[0] = 0x378d8e63ffffffffull;
-                  e4 = e4 - 1;
-                  is_inexact_lt_midpoint = 1;
-                }
+               if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp
+                 // res = 10^33, unchanged
+                 is_inexact_gt_midpoint = 1;
+               } else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp
+                 // res = 10^33, unchanged
+                 is_midpoint_lt_even = 1;
+               } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
+                 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
+                 res.w[0] = 0x378d8e63ffffffffull;
+                 e4 = e4 - 1;
+                 is_inexact_lt_midpoint = 1;
+               }
                } else { // if (20 <= q3 <=34)
-                if (C3.w[1] < __bid_midpoint128[q3 - 20].w[1] || 
-                    (C3.w[1] == __bid_midpoint128[q3 - 20].w[1] && 
-                    C3.w[0] < __bid_midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
-                  // res = 10^33, unchanged
-                  is_inexact_gt_midpoint = 1;
-                } else if (C3.w[1] == __bid_midpoint128[q3 - 20].w[1] && 
-                    C3.w[0] == __bid_midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
-                  // res = 10^33, unchanged
-                  is_midpoint_lt_even = 1;
-                } else { // if (C3 > __bid_midpoint128[q3-20]), C3 > 1/2 ulp
-                  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
-                  res.w[0] = 0x378d8e63ffffffffull;
-                  e4 = e4 - 1;
-                  is_inexact_lt_midpoint = 1;
-                }
+               if (C3.w[1] < midpoint128[q3 - 20].w[1] || 
+                    (C3.w[1] == midpoint128[q3 - 20].w[1] && 
+                    C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
+                 // res = 10^33, unchanged
+                 is_inexact_gt_midpoint = 1;
+               } else if (C3.w[1] == midpoint128[q3 - 20].w[1] && 
+                    C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
+                 // res = 10^33, unchanged
+                 is_midpoint_lt_even = 1;
+               } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
+                 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
+                 res.w[0] = 0x378d8e63ffffffffull;
+                 e4 = e4 - 1;
+                 is_inexact_lt_midpoint = 1;
+               }
                }
              }
            }
@@ -2911,16 +3052,21 @@ delta_ge_zero:
        }
        if (rnd_mode != ROUNDING_TO_NEAREST) {
          rounding_correction (rnd_mode,
-                             is_inexact_lt_midpoint,
-                             is_inexact_gt_midpoint,
-                             is_midpoint_lt_even, is_midpoint_gt_even,
-                             e4, &res, pfpsf);
+                            is_inexact_lt_midpoint,
+                            is_inexact_gt_midpoint,
+                            is_midpoint_lt_even, is_midpoint_gt_even,
+                            e4, &res, pfpsf);
        }
        if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
            is_midpoint_lt_even || is_midpoint_gt_even) {
          // set the inexact flag
          *pfpsf |= INEXACT_EXCEPTION;
        }
+      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+      BID_SWAP128 (res);
        BID_RETURN (res)
  
      } else if ((q4 <= p34 && p34 <= delta) || // Case (8)
@@ -2952,12 +3098,12 @@ delta_ge_zero:
        ind = e3;
        e3 = e4;
        e4 = ind;
-      tmp64 = z_sign;
+      tmp_sign = z_sign;
        z_sign = p_sign;
-      p_sign = tmp64;
-      tmp64 = z_exp;
+      p_sign = tmp_sign;
+      tmp.ui64 = z_exp;
        z_exp = p_exp;
-      p_exp = tmp64;
+      p_exp = tmp.ui64;
        goto delta_ge_zero;
  
      } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11)
@@ -2967,16 +3113,16 @@ delta_ge_zero:
        // 1 <= x0 <= q3 - 1 <= p34 - 1 
        x0 = e4 - e3; // or x0 = delta + q3 - q4
        if (q3 <= 18) { // 2 <= q3 <= 18
-        __bid_round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
-                      &is_midpoint_lt_even, &is_midpoint_gt_even,
-                      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
+        round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
+                     &is_midpoint_lt_even, &is_midpoint_gt_even,
+                     &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
          // C3.w[1] = 0;
          C3.w[0] = R64;
        } else if (q3 <= 38) {
-        __bid_round128_19_38 (q3, x0, C3, &R128, &incr_exp,
-                        &is_midpoint_lt_even, &is_midpoint_gt_even,
-                        &is_inexact_lt_midpoint,
-                        &is_inexact_gt_midpoint);
+        round128_19_38 (q3, x0, C3, &R128, &incr_exp,
+                       &is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint);
          C3.w[1] = R128.w[1];
          C3.w[0] = R128.w[0];
        }
@@ -2987,7 +3133,7 @@ delta_ge_zero:
          // 64 x 128 -> 128
          P128.w[1] = C3.w[1];
          P128.w[0] = C3.w[0];
-        __mul_64x128_to_128 (C3, __bid_ten2k64[1], P128);
+        __mul_64x128_to_128 (C3, ten2k64[1], P128);
        }
        e3 = e3 + x0; // this is e4
        // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3; 
@@ -3026,10 +3172,10 @@ delta_ge_zero:
              if (R256.w[0] == 0x0) {
                R256.w[1]++;
                if (R256.w[1] == 0x0) {
-                R256.w[2]++;
-                if (R256.w[2] == 0x0) {
-                  R256.w[3]++;
-                }
+               R256.w[2]++;
+               if (R256.w[2] == 0x0) {
+                 R256.w[3]++;
+               }
                }
              }
              // no check for rounding overflow - R256 was a difference
@@ -3039,10 +3185,10 @@ delta_ge_zero:
              if (R256.w[0] == 0xffffffffffffffffull) {
                R256.w[1]--;
                if (R256.w[1] == 0xffffffffffffffffull) {
-                R256.w[2]--;
-                if (R256.w[2] == 0xffffffffffffffffull) {
-                  R256.w[3]--;
-                }
+               R256.w[2]--;
+               if (R256.w[2] == 0xffffffffffffffffull) {
+                 R256.w[3]--;
+               }
                }
              }
            } else {
@@ -3053,7 +3199,7 @@ delta_ge_zero:
          }
        }
        // determine the number of decimal digits in R256
-      ind = __bid_nr_digits256 (R256); // ind >= p34
+      ind = nr_digits256 (R256); // ind >= p34
        // if R256 is sum, then ind > p34; if R256 is a difference, then 
        // ind >= p34; this means that we can calculate the result rounded to
        // the destination precision, with unbounded exponent, starting from R256
@@ -3086,25 +3232,25 @@ delta_ge_zero:
          if (ind <= 38) {
            P128.w[1] = R256.w[1];
            P128.w[0] = R256.w[0];
-          __bid_round128_19_38 (ind, x0, P128, &R128, &incr_exp,
-                          &is_midpoint_lt_even, &is_midpoint_gt_even,
-                          &is_inexact_lt_midpoint,
-                          &is_inexact_gt_midpoint);
+          round128_19_38 (ind, x0, P128, &R128, &incr_exp,
+                         &is_midpoint_lt_even, &is_midpoint_gt_even,
+                         &is_inexact_lt_midpoint,
+                         &is_inexact_gt_midpoint);
          } else if (ind <= 57) {
            P192.w[2] = R256.w[2];
            P192.w[1] = R256.w[1];
            P192.w[0] = R256.w[0];
-          __bid_round192_39_57 (ind, x0, P192, &R192, &incr_exp,
-                          &is_midpoint_lt_even, &is_midpoint_gt_even,
-                          &is_inexact_lt_midpoint,
-                          &is_inexact_gt_midpoint);
+          round192_39_57 (ind, x0, P192, &R192, &incr_exp,
+                         &is_midpoint_lt_even, &is_midpoint_gt_even,
+                         &is_inexact_lt_midpoint,
+                         &is_inexact_gt_midpoint);
            R128.w[1] = R192.w[1];
            R128.w[0] = R192.w[0];
          } else { // if (ind <= 68)
-          __bid_round256_58_76 (ind, x0, R256, &R256, &incr_exp,
-                          &is_midpoint_lt_even, &is_midpoint_gt_even,
-                          &is_inexact_lt_midpoint,
-                          &is_inexact_gt_midpoint);
+          round256_58_76 (ind, x0, R256, &R256, &incr_exp,
+                         &is_midpoint_lt_even, &is_midpoint_gt_even,
+                         &is_inexact_lt_midpoint,
+                         &is_inexact_gt_midpoint);
            R128.w[1] = R256.w[1];
            R128.w[0] = R256.w[0];
          }
@@ -3129,7 +3275,7 @@ delta_ge_zero:
            // not possible in Cases (2)-(6) or (15)-(17) which may get here
            // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
            if (res.w[1] == 0x0000314dc6448d93ull && 
-              res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
+            res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
              res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
              res.w[0] = 0x378d8e63ffffffffull;
              e4--;
@@ -3143,7 +3289,7 @@ delta_ge_zero:
            is_midpoint_gt_even = 0;
            is_inexact_gt_midpoint = 1;
          } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
-                   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
+                  !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
            // if this second rounding was exact the result may still be
            // inexact because of the first rounding
            if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
@@ -3153,14 +3299,14 @@ delta_ge_zero:
              is_inexact_lt_midpoint = 1;
            }
          } else if (is_midpoint_gt_even &&
-                   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
+                  (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
            // pulled up to a midpoint
            is_inexact_lt_midpoint = 1;
            is_inexact_gt_midpoint = 0;
            is_midpoint_lt_even = 0;
            is_midpoint_gt_even = 0;
          } else if (is_midpoint_lt_even &&
-                   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
+                  (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
            // pulled down to a midpoint
            is_inexact_lt_midpoint = 0;
            is_inexact_gt_midpoint = 1;
@@ -3183,10 +3329,10 @@ delta_ge_zero:
          P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1];
          P128.w[0] = res.w[0];
          rounding_correction (rnd_mode,
-                             is_inexact_lt_midpoint,
-                             is_inexact_gt_midpoint,
-                             is_midpoint_lt_even, is_midpoint_gt_even,
-                             0, &P128, pfpsf);
+                            is_inexact_lt_midpoint,
+                            is_inexact_gt_midpoint,
+                            is_midpoint_lt_even, is_midpoint_gt_even,
+                            0, &P128, pfpsf);
          scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
          // the number of digits in the significand is p34 = 34
          if (e4 + scale < expmin) {
@@ -3213,8 +3359,13 @@ delta_ge_zero:
          res.w[1] = p_sign | 0x7800000000000000ull;
          res.w[0] = 0x0000000000000000ull;
          *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
+        *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+        *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+        *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+        *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+        BID_SWAP128 (res);
          BID_RETURN (res)
-      }        // else not overflow or not RN, so continue
+      } // else not overflow or not RN, so continue
  
        // from this point on this is similar to the last part of the computation
        // for Cases (15), (16), (17)
@@ -3249,10 +3400,10 @@ delta_ge_zero:
            R128.w[1] = res.w[1] & MASK_COEFF;
            R128.w[0] = res.w[0];
            if (ind <= 19) {
-            if (R128.w[0] < __bid_midpoint64[ind - 1]) { // < 1/2 ulp
+            if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
                lt_half_ulp = 1;
                is_inexact_lt_midpoint = 1;
-            } else if (R128.w[0] == __bid_midpoint64[ind - 1]) { // = 1/2 ulp
+            } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
                eq_half_ulp = 1;
                is_midpoint_gt_even = 1;
              } else { // > 1/2 ulp
@@ -3260,13 +3411,13 @@ delta_ge_zero:
                is_inexact_gt_midpoint = 1;
              }
            } else { // if (ind <= 38)
-            if (R128.w[1] < __bid_midpoint128[ind - 20].w[1] || 
-                (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && 
-                R128.w[0] < __bid_midpoint128[ind - 20].w[0])) { // < 1/2 ulp
+            if (R128.w[1] < midpoint128[ind - 20].w[1] || 
+                (R128.w[1] == midpoint128[ind - 20].w[1] && 
+                R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
                lt_half_ulp = 1;
                is_inexact_lt_midpoint = 1;
-            } else if (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && 
-                R128.w[0] == __bid_midpoint128[ind - 20].w[0]) { // = 1/2 ulp
+            } else if (R128.w[1] == midpoint128[ind - 20].w[1] && 
+                R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
                eq_half_ulp = 1;
                is_midpoint_gt_even = 1;
              } else { // > 1/2 ulp
@@ -3289,19 +3440,19 @@ delta_ge_zero:
            // round the ind-digit result to ind - x0 digits
  
            if (ind <= 18) { // 2 <= ind <= 18
-            __bid_round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
-                          &is_midpoint_lt_even, &is_midpoint_gt_even,
-                          &is_inexact_lt_midpoint,
-                          &is_inexact_gt_midpoint);
+            round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
+                         &is_midpoint_lt_even, &is_midpoint_gt_even,
+                         &is_inexact_lt_midpoint,
+                         &is_inexact_gt_midpoint);
              res.w[1] = 0x0;
              res.w[0] = R64;
            } else if (ind <= 38) {
              P128.w[1] = res.w[1] & MASK_COEFF;
              P128.w[0] = res.w[0];
-            __bid_round128_19_38 (ind, x0, P128, &res, &incr_exp,
-                            &is_midpoint_lt_even, &is_midpoint_gt_even,
-                            &is_inexact_lt_midpoint,
-                            &is_inexact_gt_midpoint);
+            round128_19_38 (ind, x0, P128, &res, &incr_exp,
+                           &is_midpoint_lt_even, &is_midpoint_gt_even,
+                           &is_inexact_lt_midpoint,
+                           &is_inexact_gt_midpoint);
            }
            e4 = e4 + x0; // expmin
            // we want the exponent to be expmin, so if incr_exp = 1 then
@@ -3310,13 +3461,14 @@ delta_ge_zero:
              // 64 x 128 -> 128
              P128.w[1] = res.w[1] & MASK_COEFF;
              P128.w[0] = res.w[0];
-            __mul_64x128_to_128 (res, __bid_ten2k64[1], P128);
+            __mul_64x128_to_128 (res, ten2k64[1], P128);
            }
            res.w[1] =
-            p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
+            p_sign | ((UINT64) (e4 + 6176) << 49) | (res.
+                                                    w[1] & MASK_COEFF);
            // avoid a double rounding error
            if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 
-              is_midpoint_lt_even) { // double rounding error upward
+                is_midpoint_lt_even) { // double rounding error upward
              // res = res - 1
              res.w[0]--;
              if (res.w[0] == 0xffffffffffffffffull)
@@ -3328,7 +3480,7 @@ delta_ge_zero:
              is_midpoint_lt_even = 0;
              is_inexact_lt_midpoint = 1;
            } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 
-              is_midpoint_gt_even) { // double rounding error downward
+                is_midpoint_gt_even) { // double rounding error downward
              // res = res + 1
              res.w[0]++;
              if (res.w[0] == 0)
@@ -3336,7 +3488,8 @@ delta_ge_zero:
              is_midpoint_gt_even = 0;
              is_inexact_gt_midpoint = 1;
            } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
-              !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
+                    !is_inexact_lt_midpoint
+                    && !is_inexact_gt_midpoint) {
              // if this second rounding was exact the result may still be 
              // inexact because of the first rounding
              if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
@@ -3346,14 +3499,16 @@ delta_ge_zero:
                is_inexact_lt_midpoint = 1;
              }
            } else if (is_midpoint_gt_even &&
-                     (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
+                    (is_inexact_gt_midpoint0
+                     || is_midpoint_lt_even0)) {
              // pulled up to a midpoint
              is_inexact_lt_midpoint = 1;
              is_inexact_gt_midpoint = 0;
              is_midpoint_lt_even = 0;
              is_midpoint_gt_even = 0;
            } else if (is_midpoint_lt_even &&
-                     (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
+                    (is_inexact_lt_midpoint0
+                     || is_midpoint_gt_even0)) {
              // pulled down to a midpoint
              is_inexact_lt_midpoint = 0;
              is_inexact_gt_midpoint = 1;
@@ -3368,10 +3523,10 @@ delta_ge_zero:
        // apply correction if not rounding to nearest
        if (rnd_mode != ROUNDING_TO_NEAREST) {
          rounding_correction (rnd_mode,
-                             is_inexact_lt_midpoint,
-                             is_inexact_gt_midpoint,
-                             is_midpoint_lt_even, is_midpoint_gt_even,
-                             e4, &res, pfpsf);
+                            is_inexact_lt_midpoint,
+                            is_inexact_gt_midpoint,
+                            is_midpoint_lt_even, is_midpoint_gt_even,
+                            e4, &res, pfpsf);
        }
        if (is_midpoint_lt_even || is_midpoint_gt_even ||
            is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
@@ -3380,7 +3535,11 @@ delta_ge_zero:
          if (is_tiny)
            *pfpsf |= UNDERFLOW_EXCEPTION;
        }
-
+      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+      BID_SWAP128 (res);
        BID_RETURN (res)
  
      } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15)
@@ -3391,18 +3550,916 @@ delta_ge_zero:
        // unbounded exponent
  
        add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
-                     rnd_mode, &is_midpoint_lt_even,
-                     &is_midpoint_gt_even, &is_inexact_lt_midpoint,
-                     &is_inexact_gt_midpoint, pfpsf, &res);
-
+              rnd_mode, &is_midpoint_lt_even,
+              &is_midpoint_gt_even, &is_inexact_lt_midpoint,
+              &is_inexact_gt_midpoint, pfpsf, &res);
+      *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+      *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+      *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+      *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+      BID_SWAP128 (res);
        BID_RETURN (res)
  
      } else {
        ;
      }
  
-  }        // end if delta < 0
+  } // end if delta < 0
  
+  *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
+  *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
+  *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
+  *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
+  BID_SWAP128 (res);
    BID_RETURN (res)
  
  }
+
+
+#if DECIMAL_CALL_BY_REFERENCE
+void
+bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
+            _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+            _EXC_INFO_PARAM) {
+  UINT128 x = *px, y = *py, z = *pz;
+#if !DECIMAL_GLOBAL_ROUNDING
+  unsigned int rnd_mode = *prnd_mode;
+#endif
+#else
+UINT128
+bid128_fma (UINT128 x, UINT128 y, UINT128 z
+            _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+            _EXC_INFO_PARAM) {
+#endif
+  int is_midpoint_lt_even, is_midpoint_gt_even,
+    is_inexact_lt_midpoint, is_inexact_gt_midpoint;
+  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
+
+#if DECIMAL_CALL_BY_REFERENCE
+  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
+                 &res, &x, &y, &z
+                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                 _EXC_INFO_ARG);
+#else
+  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint, x, y,
+                       z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                       _EXC_INFO_ARG);
+#endif
+  BID_RETURN (res);
+}
+
+
+#if DECIMAL_CALL_BY_REFERENCE
+void
+bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz
+               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+               _EXC_INFO_PARAM) {
+  UINT64 x = *px, y = *py, z = *pz;
+#if !DECIMAL_GLOBAL_ROUNDING
+  unsigned int rnd_mode = *prnd_mode;
+#endif
+#else
+UINT128
+bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z
+               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+               _EXC_INFO_PARAM) {
+#endif
+  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
+    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
+  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
+  UINT128 x1, y1, z1;
+
+#if DECIMAL_CALL_BY_REFERENCE
+  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
+                 &res, &x1, &y1, &z1
+                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                 _EXC_INFO_ARG);
+#else
+  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint, x1, y1,
+                       z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                       _EXC_INFO_ARG);
+#endif
+  BID_RETURN (res);
+}
+
+
+#if DECIMAL_CALL_BY_REFERENCE
+void
+bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
+               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+               _EXC_INFO_PARAM) {
+  UINT64 x = *px, y = *py;
+  UINT128 z = *pz;
+#if !DECIMAL_GLOBAL_ROUNDING
+  unsigned int rnd_mode = *prnd_mode;
+#endif
+#else
+UINT128
+bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z
+               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+               _EXC_INFO_PARAM) {
+#endif
+  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
+    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
+  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
+  UINT128 x1, y1;
+
+#if DECIMAL_CALL_BY_REFERENCE
+  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
+                 &res, &x1, &y1, &z
+                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                 _EXC_INFO_ARG);
+#else
+  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint, x1, y1,
+                       z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                       _EXC_INFO_ARG);
+#endif
+  BID_RETURN (res);
+}
+
+
+#if DECIMAL_CALL_BY_REFERENCE
+void
+bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
+               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+               _EXC_INFO_PARAM) {
+  UINT64 x = *px, z = *pz;
+#if !DECIMAL_GLOBAL_ROUNDING
+  unsigned int rnd_mode = *prnd_mode;
+#endif
+#else
+UINT128
+bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z
+               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+               _EXC_INFO_PARAM) {
+#endif
+  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
+    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
+  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
+  UINT128 x1, z1;
+
+#if DECIMAL_CALL_BY_REFERENCE
+  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
+                 &res, &x1, py, &z1
+                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                 _EXC_INFO_ARG);
+#else
+  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint, x1, y,
+                       z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                       _EXC_INFO_ARG);
+#endif
+  BID_RETURN (res);
+}
+
+
+#if DECIMAL_CALL_BY_REFERENCE
+void
+bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
+               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+               _EXC_INFO_PARAM) {
+  UINT64 x = *px;
+#if !DECIMAL_GLOBAL_ROUNDING
+  unsigned int rnd_mode = *prnd_mode;
+#endif
+#else
+UINT128
+bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z
+               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+               _EXC_INFO_PARAM) {
+#endif
+  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
+    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
+  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
+  UINT128 x1;
+
+#if DECIMAL_CALL_BY_REFERENCE
+  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
+                 &res, &x1, py, pz
+                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                 _EXC_INFO_ARG);
+#else
+  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint, x1, y,
+                       z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                       _EXC_INFO_ARG);
+#endif
+  BID_RETURN (res);
+}
+
+
+#if DECIMAL_CALL_BY_REFERENCE
+void
+bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
+               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+               _EXC_INFO_PARAM) {
+  UINT64 y = *py, z = *pz;
+#if !DECIMAL_GLOBAL_ROUNDING
+  unsigned int rnd_mode = *prnd_mode;
+#endif
+#else
+UINT128
+bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z
+               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+               _EXC_INFO_PARAM) {
+#endif
+  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
+    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
+  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
+  UINT128 y1, z1;
+
+#if DECIMAL_CALL_BY_REFERENCE
+  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
+                 &res, px, &y1, &z1
+                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                 _EXC_INFO_ARG);
+#else
+  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint, x, y1,
+                       z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                       _EXC_INFO_ARG);
+#endif
+  BID_RETURN (res);
+}
+
+
+#if DECIMAL_CALL_BY_REFERENCE
+void
+bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
+               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+               _EXC_INFO_PARAM) {
+  UINT64 y = *py;
+#if !DECIMAL_GLOBAL_ROUNDING
+  unsigned int rnd_mode = *prnd_mode;
+#endif
+#else
+UINT128
+bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z
+               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+               _EXC_INFO_PARAM) {
+#endif
+  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
+    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
+  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
+  UINT128 y1;
+
+#if DECIMAL_CALL_BY_REFERENCE
+  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
+                 &res, px, &y1, pz
+                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                 _EXC_INFO_ARG);
+#else
+  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint, x, y1,
+                       z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                       _EXC_INFO_ARG);
+#endif
+  BID_RETURN (res);
+}
+
+
+#if DECIMAL_CALL_BY_REFERENCE
+void
+bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
+               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+               _EXC_INFO_PARAM) {
+  UINT64 z = *pz;
+#if !DECIMAL_GLOBAL_ROUNDING
+  unsigned int rnd_mode = *prnd_mode;
+#endif
+#else
+UINT128
+bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z
+               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+               _EXC_INFO_PARAM) {
+#endif
+  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
+    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
+  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
+  UINT128 z1;
+
+#if DECIMAL_CALL_BY_REFERENCE
+  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
+                 &res, px, py, &z1
+                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                 _EXC_INFO_ARG);
+#else
+  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
+                       &is_inexact_lt_midpoint,
+                       &is_inexact_gt_midpoint, x, y,
+                       z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                       _EXC_INFO_ARG);
+#endif
+  BID_RETURN (res);
+}
+
+// Note: bid128qqq_fma is represented by bid128_fma
+
+// Note: bid64ddd_fma is represented by bid64_fma
+
+#if DECIMAL_CALL_BY_REFERENCE
+void
+bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
+              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+              _EXC_INFO_PARAM) {
+  UINT64 x = *px, y = *py;
+#if !DECIMAL_GLOBAL_ROUNDING
+  unsigned int rnd_mode = *prnd_mode;
+#endif
+#else
+UINT64
+bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z
+              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+              _EXC_INFO_PARAM) {
+#endif
+  UINT64 res1 = 0xbaddbaddbaddbaddull;
+  UINT128 x1, y1;
+
+#if DECIMAL_CALL_BY_REFERENCE
+  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid64qqq_fma (&res1, &x1, &y1, pz
+               _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+               _EXC_INFO_ARG);
+#else
+  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  res1 = bid64qqq_fma (x1, y1, z
+                      _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                      _EXC_INFO_ARG);
+#endif
+  BID_RETURN (res1);
+}
+
+
+#if DECIMAL_CALL_BY_REFERENCE
+void
+bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
+              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+              _EXC_INFO_PARAM) {
+  UINT64 x = *px, z = *pz;
+#if !DECIMAL_GLOBAL_ROUNDING
+  unsigned int rnd_mode = *prnd_mode;
+#endif
+#else
+UINT64
+bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z
+              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+              _EXC_INFO_PARAM) {
+#endif
+  UINT64 res1 = 0xbaddbaddbaddbaddull;
+  UINT128 x1, z1;
+
+#if DECIMAL_CALL_BY_REFERENCE
+  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid64qqq_fma (&res1, &x1, py, &z1
+               _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+               _EXC_INFO_ARG);
+#else
+  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  res1 = bid64qqq_fma (x1, y, z1
+                      _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                      _EXC_INFO_ARG);
+#endif
+  BID_RETURN (res1);
+}
+
+
+#if DECIMAL_CALL_BY_REFERENCE
+void
+bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
+              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+              _EXC_INFO_PARAM) {
+  UINT64 x = *px;
+#if !DECIMAL_GLOBAL_ROUNDING
+  unsigned int rnd_mode = *prnd_mode;
+#endif
+#else
+UINT64
+bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z
+              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+              _EXC_INFO_PARAM) {
+#endif
+  UINT64 res1 = 0xbaddbaddbaddbaddull;
+  UINT128 x1;
+
+#if DECIMAL_CALL_BY_REFERENCE
+  bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid64qqq_fma (&res1, &x1, py, pz
+               _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+               _EXC_INFO_ARG);
+#else
+  x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  res1 = bid64qqq_fma (x1, y, z
+                      _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                      _EXC_INFO_ARG);
+#endif
+  BID_RETURN (res1);
+}
+
+
+#if DECIMAL_CALL_BY_REFERENCE
+void
+bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
+              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+              _EXC_INFO_PARAM) {
+  UINT64 y = *py, z = *pz;
+#if !DECIMAL_GLOBAL_ROUNDING
+  unsigned int rnd_mode = *prnd_mode;
+#endif
+#else
+UINT64
+bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z
+              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+              _EXC_INFO_PARAM) {
+#endif
+  UINT64 res1 = 0xbaddbaddbaddbaddull;
+  UINT128 y1, z1;
+
+#if DECIMAL_CALL_BY_REFERENCE
+  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid64qqq_fma (&res1, px, &y1, &z1
+               _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+               _EXC_INFO_ARG);
+#else
+  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  res1 = bid64qqq_fma (x, y1, z1
+                      _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                      _EXC_INFO_ARG);
+#endif
+  BID_RETURN (res1);
+}
+
+
+#if DECIMAL_CALL_BY_REFERENCE
+void
+bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
+              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+              _EXC_INFO_PARAM) {
+  UINT64 y = *py;
+#if !DECIMAL_GLOBAL_ROUNDING
+  unsigned int rnd_mode = *prnd_mode;
+#endif
+#else
+UINT64
+bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z
+              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+              _EXC_INFO_PARAM) {
+#endif
+  UINT64 res1 = 0xbaddbaddbaddbaddull;
+  UINT128 y1;
+
+#if DECIMAL_CALL_BY_REFERENCE
+  bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid64qqq_fma (&res1, px, &y1, pz
+               _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+               _EXC_INFO_ARG);
+#else
+  y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  res1 = bid64qqq_fma (x, y1, z
+                      _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                      _EXC_INFO_ARG);
+#endif
+  BID_RETURN (res1);
+}
+
+
+#if DECIMAL_CALL_BY_REFERENCE
+void
+bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
+              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+              _EXC_INFO_PARAM) {
+  UINT64 z = *pz;
+#if !DECIMAL_GLOBAL_ROUNDING
+  unsigned int rnd_mode = *prnd_mode;
+#endif
+#else
+UINT64
+bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z
+              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+              _EXC_INFO_PARAM) {
+#endif
+  UINT64 res1 = 0xbaddbaddbaddbaddull;
+  UINT128 z1;
+
+#if DECIMAL_CALL_BY_REFERENCE
+  bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  bid64qqq_fma (&res1, px, py, &z1
+               _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+               _EXC_INFO_ARG);
+#else
+  z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
+  res1 = bid64qqq_fma (x, y, z1
+                      _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                      _EXC_INFO_ARG);
+#endif
+  BID_RETURN (res1);
+}
+
+
+#if DECIMAL_CALL_BY_REFERENCE
+void
+bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
+              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+              _EXC_INFO_PARAM) {
+  UINT128 x = *px, y = *py, z = *pz;
+#if !DECIMAL_GLOBAL_ROUNDING
+  unsigned int rnd_mode = *prnd_mode;
+#endif
+#else
+UINT64
+bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z
+              _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
+              _EXC_INFO_PARAM) {
+#endif
+  int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0,
+    is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
+  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
+    is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
+  int incr_exp;
+  UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
+  UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
+  UINT64 res1 = 0xbaddbaddbaddbaddull;
+  unsigned int save_fpsf; // needed because of the call to bid128_ext_fma
+  UINT64 sign;
+  UINT64 exp;
+  int unbexp;
+  UINT128 C;
+  BID_UI64DOUBLE tmp;
+  int nr_bits;
+  int q, x0;
+  int scale;
+  int lt_half_ulp = 0, eq_half_ulp = 0;
+
+  // Note: for rounding modes other than RN or RA, the result can be obtained
+  // by rounding first to BID128 and then to BID64
+
+  save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
+  *pfpsf = 0;
+
+#if DECIMAL_CALL_BY_REFERENCE
+  bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
+                 &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0,
+                 &res, &x, &y, &z
+                 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                 _EXC_INFO_ARG);
+#else
+  res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
+                       &is_inexact_lt_midpoint0,
+                       &is_inexact_gt_midpoint0, x, y,
+                       z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
+                       _EXC_INFO_ARG);
+#endif
+
+  if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) || 
+      (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible
+      ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN)
+      ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity  
+#if DECIMAL_CALL_BY_REFERENCE
+    bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
+#else
+    res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
+#endif
+    // determine the unbiased exponent of the result
+    unbexp = ((res1 >> 53) & 0x3ff) - 398;
+
+    // if subnormal, res1  must have exp = -398
+    // if tiny and inexact set underflow and inexact status flags
+    if (!((res1 & MASK_NAN) == MASK_NAN) &&    // res1 not NaN
+        (unbexp == -398)
+        && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull)
+        && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0
+            || is_midpoint_lt_even0 || is_midpoint_gt_even0)) {
+      // set the inexact flag and the underflow flag
+      *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
+    } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
+               is_midpoint_lt_even0 || is_midpoint_gt_even0) {
+      // set the inexact flag and the underflow flag
+      *pfpsf |= INEXACT_EXCEPTION;
+    }
+
+    *pfpsf |= save_fpsf;
+    BID_RETURN (res1);
+  } // else continue, and use rounding to nearest to round to 16 digits
+
+  // at this point the result is rounded to nearest (even or away) to 34 digits
+  // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
+  sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
+  exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits
+  unbexp = (exp >> 49) - 6176;
+  C.w[1] = res.w[HIGH_128W] & MASK_COEFF;
+  C.w[0] = res.w[LOW_128W];
+
+  if ((C.w[1] == 0x0 && C.w[0] == 0x0) ||      // result is zero
+      (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) { 
+      // clear under/overflow
+#if DECIMAL_CALL_BY_REFERENCE
+    bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
+#else
+    res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
+#endif
+    *pfpsf |= save_fpsf;
+    BID_RETURN (res1);
+  } // else continue
+
+  // -398 - 34 <= unbexp <= 369 + 15
+  if (rnd_mode == ROUNDING_TIES_AWAY) {
+    // apply correction, if needed, to make the result rounded to nearest-even
+    if (is_midpoint_gt_even) {
+      // res = res - 1
+      res1--; // res1 is now even
+    } // else the result is already correctly rounded to nearest-even
+  }
+  // at this point the result is finite, non-zero canonical normal or subnormal,
+  // and in most cases overflow or underflow will not occur
+
+  // determine the number of digits q in the result
+  // q = nr. of decimal digits in x
+  // determine first the nr. of bits in x
+  if (C.w[1] == 0) {
+    if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53
+      // split the 64-bit value in two 32-bit halves to avoid rounding errors
+      if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32
+        tmp.d = (double) (C.w[0] >> 32); // exact conversion
+        nr_bits =
+          33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
+      } else { // x < 2^32
+        tmp.d = (double) (C.w[0]); // exact conversion
+        nr_bits =
+          1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
+      }
+    } else { // if x < 2^53
+      tmp.d = (double) C.w[0]; // exact conversion
+      nr_bits =
+        1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    }
+  } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
+    tmp.d = (double) C.w[1]; // exact conversion
+    nr_bits =
+      65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
+  }
+  q = nr_digits[nr_bits - 1].digits;
+  if (q == 0) {
+    q = nr_digits[nr_bits - 1].digits1;
+    if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi ||
+        (C.w[1] == nr_digits[nr_bits - 1].threshold_hi &&
+         C.w[0] >= nr_digits[nr_bits - 1].threshold_lo))
+      q++;
+  }
+  // if q > 16, round to nearest even to 16 digits (but for underflow it may 
+  // have to be truncated even more)
+  if (q > 16) {
+    x0 = q - 16;
+    if (q <= 18) {
+      round64_2_18 (q, x0, C.w[0], &res1, &incr_exp,
+                   &is_midpoint_lt_even, &is_midpoint_gt_even,
+                   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
+    } else { // 19 <= q <= 34
+      round128_19_38 (q, x0, C, &res128, &incr_exp,
+                     &is_midpoint_lt_even, &is_midpoint_gt_even,
+                     &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
+      res1 = res128.w[0]; // the result fits in 64 bits
+    }
+    unbexp = unbexp + x0;
+    if (incr_exp)
+      unbexp++;
+    q = 16; // need to set in case denormalization is necessary
+  } else {
+    // the result does not require a second rounding (and it must have 
+    // been exact in the first rounding, since q <= 16)
+    res1 = C.w[0];
+  }
+
+  // avoid a double rounding error
+  if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 
+      is_midpoint_lt_even) { // double rounding error upward
+    // res = res - 1 
+    res1--; // res1 becomes odd 
+    is_midpoint_lt_even = 0;
+    is_inexact_lt_midpoint = 1;
+    if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1
+      res1 = 0x002386f26fc0ffffull; // 10^16 - 1 
+      unbexp--;
+    }
+  } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 
+      is_midpoint_gt_even) { // double rounding error downward
+    // res = res + 1
+    res1++; // res1 becomes odd (so it cannot be 10^16)
+    is_midpoint_gt_even = 0;
+    is_inexact_gt_midpoint = 1;
+  } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
+             !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
+    // if this second rounding was exact the result may still be 
+    // inexact because of the first rounding
+    if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
+      is_inexact_gt_midpoint = 1;
+    }
+    if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
+      is_inexact_lt_midpoint = 1;
+    }
+  } else if (is_midpoint_gt_even &&
+             (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
+    // pulled up to a midpoint 
+    is_inexact_lt_midpoint = 1;
+    is_inexact_gt_midpoint = 0;
+    is_midpoint_lt_even = 0;
+    is_midpoint_gt_even = 0;
+  } else if (is_midpoint_lt_even &&
+             (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
+    // pulled down to a midpoint 
+    is_inexact_lt_midpoint = 0;
+    is_inexact_gt_midpoint = 1;
+    is_midpoint_lt_even = 0;
+    is_midpoint_gt_even = 0;
+  } else {
+    ;
+  }
+  // this is the result rounded correctly to nearest even, with unbounded exp. 
+
+  // check for overflow
+  if (q + unbexp > P16 + expmax16) {
+    res1 = sign | 0x7800000000000000ull;
+    *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
+    *pfpsf |= save_fpsf;
+    BID_RETURN (res1)
+  } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16
+    // not overflow; the result must be exact, and we can multiply res1 by
+    // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
+    scale = unbexp - expmax16;
+    res1 = res1 * ten2k64[scale]; // res1 * 10^scale
+    unbexp = expmax16; // unbexp - scale 
+  } else {
+    ; // continue
+  }
+
+  // check for underflow
+  if (q + unbexp < P16 + expmin16) {
+    if (unbexp < expmin16) {
+      // we must truncate more of res
+      x0 = expmin16 - unbexp; // x0 >= 1
+      is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
+      is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
+      is_midpoint_lt_even0 = is_midpoint_lt_even;
+      is_midpoint_gt_even0 = is_midpoint_gt_even;
+      is_inexact_lt_midpoint = 0;
+      is_inexact_gt_midpoint = 0;
+      is_midpoint_lt_even = 0;
+      is_midpoint_gt_even = 0;
+      // the number of decimal digits in res1 is q
+      if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits
+        // 2 <= q <= 16, 1 <= x0 <= 15
+        round64_2_18 (q, x0, res1, &res1, &incr_exp,
+                     &is_midpoint_lt_even, &is_midpoint_gt_even,
+                     &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
+        if (incr_exp) {
+          // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
+          res1 = ten2k64[q - x0];
+        }
+        unbexp = unbexp + x0; // expmin16
+      } else if (x0 == q) {
+        // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
+        // determine relationship with 1/2 ulp
+        // q <= 16
+        if (res1 < midpoint64[q - 1]) { // < 1/2 ulp
+          lt_half_ulp = 1;
+          is_inexact_lt_midpoint = 1;
+        } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp
+          eq_half_ulp = 1;
+          is_midpoint_gt_even = 1;
+        } else { // > 1/2 ulp
+          // gt_half_ulp = 1;
+          is_inexact_gt_midpoint = 1;
+        }
+        if (lt_half_ulp || eq_half_ulp) {
+          // res = +0.0 * 10^expmin16
+          res1 = 0x0000000000000000ull;
+        } else { // if (gt_half_ulp)
+          // res = +1 * 10^expmin16
+          res1 = 0x0000000000000001ull;
+        }
+        unbexp = expmin16;
+      } else { // if (x0 > q)
+        // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
+        res1 = 0x0000000000000000ull;
+        unbexp = expmin16;
+        is_inexact_lt_midpoint = 1;
+      }
+      // avoid a double rounding error
+      if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 
+          is_midpoint_lt_even) { // double rounding error upward
+        // res = res - 1
+        res1--; // res1 becomes odd
+        is_midpoint_lt_even = 0;
+        is_inexact_lt_midpoint = 1;
+      } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 
+          is_midpoint_gt_even) { // double rounding error downward
+        // res = res + 1
+        res1++; // res1 becomes odd
+        is_midpoint_gt_even = 0;
+        is_inexact_gt_midpoint = 1;
+      } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
+                !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
+        // if this rounding was exact the result may still be 
+        // inexact because of the previous roundings
+        if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
+          is_inexact_gt_midpoint = 1;
+        }
+        if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
+          is_inexact_lt_midpoint = 1;
+        }
+      } else if (is_midpoint_gt_even &&
+                (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
+        // pulled up to a midpoint
+        is_inexact_lt_midpoint = 1;
+        is_inexact_gt_midpoint = 0;
+        is_midpoint_lt_even = 0;
+        is_midpoint_gt_even = 0;
+      } else if (is_midpoint_lt_even &&
+                (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
+        // pulled down to a midpoint
+        is_inexact_lt_midpoint = 0;
+        is_inexact_gt_midpoint = 1;
+        is_midpoint_lt_even = 0;
+        is_midpoint_gt_even = 0;
+      } else {
+        ;
+      }
+    }
+    // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
+    // and the result is tiny and exact
+
+    // check for inexact result
+    if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
+        is_midpoint_lt_even || is_midpoint_gt_even ||
+        is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
+        is_midpoint_lt_even0 || is_midpoint_gt_even0) {
+      // set the inexact flag and the underflow flag
+      *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
+    }
+  } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
+             is_midpoint_lt_even || is_midpoint_gt_even) {
+    *pfpsf |= INEXACT_EXCEPTION;
+  }
+  // this is the result rounded correctly to nearest, with bounded exponent
+
+  if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction
+    // res = res + 1
+    res1++; // res1 is now odd
+  } // else the result is already correct
+
+  // assemble the result
+  if (res1 < 0x0020000000000000ull) { // res < 2^53
+    res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1;
+  } else { // res1 >= 2^53
+    res1 = sign | MASK_STEERING_BITS |
+      ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2);
+  }
+  *pfpsf |= save_fpsf;
+  BID_RETURN (res1);
+}