libgcc/config/libbid/bid_internal.h

   1 /* Copyright (C) 2007  Free Software Foundation, Inc.
   2
   3 This file is part of GCC.
   4
   5 GCC is free software; you can redistribute it and/or modify it under
   6 the terms of the GNU General Public License as published by the Free
   7 Software Foundation; either version 2, or (at your option) any later
   8 version.
   9
  10 In addition to the permissions in the GNU General Public License, the
  11 Free Software Foundation gives you unlimited permission to link the
  12 compiled version of this file into combinations with other programs,
  13 and to distribute those combinations without any restriction coming
  14 from the use of this file.  (The General Public License restrictions
  15 do apply in other respects; for example, they cover modification of
  16 the file, and distribution when not linked into a combine
  17 executable.)
  18
  19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  21 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  22 for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GCC; see the file COPYING.  If not, write to the Free
  26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  27 02110-1301, USA.  */
  28
  29 #ifndef __BIDECIMAL_H
  30 #define __BIDECIMAL_H
  31
  32 #include "bid_conf.h"
  33 #include "bid_functions.h"
  34
  35
  36 #define __BID_INLINE__ static __inline
  37
  38 /*********************************************************************
  39  *
  40  *      Logical Shift Macros
  41  *
  42  *********************************************************************/
  43
  44 #define __shr_128(Q, A, k)                        \
  45 {                                                 \
  46      (Q).w[0] = (A).w[0] >> k;                      \
  47          (Q).w[0] |= (A).w[1] << (64-k);               \
  48          (Q).w[1] = (A).w[1] >> k;                    \
  49 }
  50
  51 #define __shr_128_long(Q, A, k)                   \
  52 {                                                 \
  53         if((k)<64) {                                  \
  54      (Q).w[0] = (A).w[0] >> k;                    \
  55          (Q).w[0] |= (A).w[1] << (64-k);              \
  56          (Q).w[1] = (A).w[1] >> k;                    \
  57         }                                             \
  58         else {                                        \
  59          (Q).w[0] = (A).w[1]>>((k)-64);               \
  60          (Q).w[1] = 0;                                \
  61         }                                             \
  62 }
  63
  64 #define __shl_128_long(Q, A, k)                   \
  65 {                                                 \
  66         if((k)<64) {                                  \
  67      (Q).w[1] = (A).w[1] << k;                    \
  68          (Q).w[1] |= (A).w[0] >> (64-k);              \
  69          (Q).w[0] = (A).w[0] << k;                    \
  70         }                                             \
  71         else {                                        \
  72          (Q).w[1] = (A).w[0]<<((k)-64);               \
  73          (Q).w[0] = 0;                                \
  74         }                                             \
  75 }
  76
  77 #define __low_64(Q)  (Q).w[0]
  78 /*********************************************************************
  79  *
  80  *      String Macros
  81  *
  82  *********************************************************************/
  83 #define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
  84 /*********************************************************************
  85  *
  86  *      Compare Macros
  87  *
  88  *********************************************************************/
  89 // greater than
  90 //  return 0 if A<=B
  91 //  non-zero if A>B
  92 #define __unsigned_compare_gt_128(A, B)  \
  93     ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
  94 // greater-or-equal
  95 #define __unsigned_compare_ge_128(A, B)  \
  96     ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
  97 #define __test_equal_128(A, B)  (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
  98 // tighten exponent range
  99 #define __tight_bin_range_128(bp, P, bin_expon)  \
 100 {                                                \
 101 UINT64 M;                                        \
 102         M = 1;                                       \
 103         (bp) = (bin_expon);                          \
 104         if((bp)<63) {                                \
 105           M <<= ((bp)+1);                            \
 106           if((P).w[0] >= M) (bp)++; }                 \
 107         else if((bp)>64) {                           \
 108           M <<= ((bp)+1-64);                         \
 109           if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
 110               (bp)++; }                              \
 111         else if((P).w[1]) (bp)++;                    \
 112 }
 113 /*********************************************************************
 114  *
 115  *      Add/Subtract Macros
 116  *
 117  *********************************************************************/
 118 // add 64-bit value to 128-bit
 119 #define __add_128_64(R128, A128, B64)    \
 120 {                                        \
 121 UINT64 R64H;                             \
 122         R64H = (A128).w[1];                 \
 123         (R128).w[0] = (B64) + (A128).w[0];     \
 124         if((R128).w[0] < (B64))               \
 125           R64H ++;                           \
 126     (R128).w[1] = R64H;                  \
 127 }
 128 // subtract 64-bit value from 128-bit
 129 #define __sub_128_64(R128, A128, B64)    \
 130 {                                        \
 131 UINT64 R64H;                             \
 132         R64H = (A128).w[1];                  \
 133         if((A128).w[0] < (B64))               \
 134           R64H --;                           \
 135     (R128).w[1] = R64H;                  \
 136         (R128).w[0] = (A128).w[0] - (B64);     \
 137 }
 138 // add 128-bit value to 128-bit
 139 // assume no carry-out
 140 #define __add_128_128(R128, A128, B128)  \
 141 {                                        \
 142 UINT128 Q128;                            \
 143         Q128.w[1] = (A128).w[1]+(B128).w[1]; \
 144         Q128.w[0] = (B128).w[0] + (A128).w[0];  \
 145         if(Q128.w[0] < (B128).w[0])            \
 146           Q128.w[1] ++;                      \
 147     (R128).w[1] = Q128.w[1];             \
 148     (R128).w[0] = Q128.w[0];               \
 149 }
 150 #define __sub_128_128(R128, A128, B128)  \
 151 {                                        \
 152 UINT128 Q128;                            \
 153         Q128.w[1] = (A128).w[1]-(B128).w[1]; \
 154         Q128.w[0] = (A128).w[0] - (B128).w[0];  \
 155         if((A128).w[0] < (B128).w[0])          \
 156           Q128.w[1] --;                      \
 157     (R128).w[1] = Q128.w[1];             \
 158     (R128).w[0] = Q128.w[0];               \
 159 }
 160 #define __add_carry_out(S, CY, X, Y)    \
 161 {                                      \
 162 UINT64 X1=X;                           \
 163         S = X + Y;                         \
 164         CY = (S<X1) ? 1 : 0;                \
 165 }
 166 #define __add_carry_in_out(S, CY, X, Y, CI)    \
 167 {                                             \
 168 UINT64 X1;                                    \
 169         X1 = X + CI;                              \
 170         S = X1 + Y;                               \
 171         CY = ((S<X1) || (X1<CI)) ? 1 : 0;          \
 172 }
 173 #define __sub_borrow_out(S, CY, X, Y)    \
 174 {                                      \
 175 UINT64 X1=X;                           \
 176         S = X - Y;                         \
 177         CY = (S>X1) ? 1 : 0;                \
 178 }
 179 #define __sub_borrow_in_out(S, CY, X, Y, CI)    \
 180 {                                             \
 181 UINT64 X1, X0=X;                              \
 182         X1 = X - CI;                              \
 183         S = X1 - Y;                               \
 184         CY = ((S>X1) || (X1>X0)) ? 1 : 0;          \
 185 }
 186 // increment C128 and check for rounding overflow:
 187 // if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
 188 #define INCREMENT(C_128, exp)                                           \
 189 {                                                                       \
 190   C_128.w[0]++;                                                         \
 191   if (C_128.w[0] == 0) C_128.w[1]++;                                    \
 192   if (C_128.w[1] == 0x0001ed09bead87c0ull &&                            \
 193       C_128.w[0] == 0x378d8e6400000000ull) {                            \
 194     exp++;                                                              \
 195     C_128.w[1] = 0x0000314dc6448d93ull;                                 \
 196     C_128.w[0] = 0x38c15b0a00000000ull;                                 \
 197   }                                                                     \
 198 }
 199 // decrement C128 and check for rounding underflow, but only at the
 200 // boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
 201 // and decrement the exponent
 202 #define DECREMENT(C_128, exp)                                           \
 203 {                                                                       \
 204   C_128.w[0]--;                                                         \
 205   if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--;                \
 206   if (C_128.w[1] == 0x0000314dc6448d93ull &&                            \
 207       C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) {                 \
 208     exp--;                                                              \
 209     C_128.w[1] = 0x0001ed09bead87c0ull;                                 \
 210     C_128.w[0] = 0x378d8e63ffffffffull;                                 \
 211   }                                                                     \
 212 }
 213
 214  /*********************************************************************
 215  *
 216  *      Multiply Macros
 217  *
 218  *********************************************************************/
 219 #define __mul_64x64_to_64(P64, CX, CY)  (P64) = (CX) * (CY)
 220 /***************************************
 221  *  Signed, Full 64x64-bit Multiply
 222  ***************************************/
 223 #define __imul_64x64_to_128(P, CX, CY)  \
 224 {                                       \
 225 UINT64 SX, SY;                          \
 226    __mul_64x64_to_128(P, CX, CY);       \
 227                                         \
 228    SX = ((SINT64)(CX))>>63;             \
 229    SY = ((SINT64)(CY))>>63;             \
 230    SX &= CY;   SY &= CX;                \
 231                                         \
 232    (P).w[1] = (P).w[1] - SX - SY;       \
 233 }
 234 /***************************************
 235  *  Signed, Full 64x128-bit Multiply
 236  ***************************************/
 237 #define __imul_64x128_full(Ph, Ql, A, B)          \
 238 {                                                 \
 239 UINT128 ALBL, ALBH, QM2, QM;                      \
 240                                                   \
 241         __imul_64x64_to_128(ALBH, (A), (B).w[1]);     \
 242         __imul_64x64_to_128(ALBL, (A), (B).w[0]);      \
 243                                                   \
 244         (Ql).w[0] = ALBL.w[0];                          \
 245         QM.w[0] = ALBL.w[1];                           \
 246         QM.w[1] = ((SINT64)ALBL.w[1])>>63;            \
 247     __add_128_128(QM2, ALBH, QM);                 \
 248         (Ql).w[1] = QM2.w[0];                          \
 249     Ph = QM2.w[1];                                \
 250 }
 251 /*****************************************************
 252  *      Unsigned Multiply Macros
 253  *****************************************************/
 254 // get full 64x64bit product
 255 //
 256 #define __mul_64x64_to_128(P, CX, CY)   \
 257 {                                       \
 258 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
 259         CXH = (CX) >> 32;                     \
 260         CXL = (UINT32)(CX);                   \
 261         CYH = (CY) >> 32;                     \
 262         CYL = (UINT32)(CY);                   \
 263                                               \
 264     PM = CXH*CYL;                         \
 265         PH = CXH*CYH;                         \
 266         PL = CXL*CYL;                         \
 267         PM2 = CXL*CYH;                        \
 268         PH += (PM>>32);                       \
 269         PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
 270                                           \
 271         (P).w[1] = PH + (PM>>32);             \
 272         (P).w[0] = (PM<<32)+(UINT32)PL;       \
 273 }
 274 // get full 64x64bit product
 275 // Note:
 276 // This macro is used for CX < 2^61, CY < 2^61
 277 //
 278 #define __mul_64x64_to_128_fast(P, CX, CY)   \
 279 {                                       \
 280 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM;  \
 281         CXH = (CX) >> 32;                   \
 282         CXL = (UINT32)(CX);                 \
 283         CYH = (CY) >> 32;                   \
 284         CYL = (UINT32)(CY);                 \
 285                                             \
 286     PM = CXH*CYL;                       \
 287         PL = CXL*CYL;                       \
 288         PH = CXH*CYH;                       \
 289         PM += CXL*CYH;                      \
 290         PM += (PL>>32);                     \
 291                                         \
 292         (P).w[1] = PH + (PM>>32);           \
 293         (P).w[0] = (PM<<32)+(UINT32)PL;      \
 294 }
 295 // used for CX< 2^60
 296 #define __sqr64_fast(P, CX)   \
 297 {                                       \
 298 UINT64 CXH, CXL, PL, PH, PM;            \
 299         CXH = (CX) >> 32;                   \
 300         CXL = (UINT32)(CX);                 \
 301                                             \
 302     PM = CXH*CXL;                       \
 303         PL = CXL*CXL;                       \
 304         PH = CXH*CXH;                       \
 305         PM += PM;                           \
 306         PM += (PL>>32);                     \
 307                                         \
 308         (P).w[1] = PH + (PM>>32);           \
 309         (P).w[0] = (PM<<32)+(UINT32)PL;     \
 310 }
 311 // get full 64x64bit product
 312 // Note:
 313 // This implementation is used for CX < 2^61, CY < 2^61
 314 //
 315 #define __mul_64x64_to_64_high_fast(P, CX, CY)   \
 316 {                                       \
 317 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM;  \
 318         CXH = (CX) >> 32;                   \
 319         CXL = (UINT32)(CX);                 \
 320         CYH = (CY) >> 32;                   \
 321         CYL = (UINT32)(CY);                 \
 322                                             \
 323     PM = CXH*CYL;                       \
 324         PL = CXL*CYL;                       \
 325         PH = CXH*CYH;                       \
 326         PM += CXL*CYH;                      \
 327         PM += (PL>>32);                     \
 328                                         \
 329         (P) = PH + (PM>>32);                \
 330 }
 331 // get full 64x64bit product
 332 //
 333 #define __mul_64x64_to_128_full(P, CX, CY)     \
 334 {                                         \
 335 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
 336         CXH = (CX) >> 32;                     \
 337         CXL = (UINT32)(CX);                   \
 338         CYH = (CY) >> 32;                     \
 339         CYL = (UINT32)(CY);                   \
 340                                               \
 341     PM = CXH*CYL;                         \
 342         PH = CXH*CYH;                         \
 343         PL = CXL*CYL;                         \
 344         PM2 = CXL*CYH;                        \
 345         PH += (PM>>32);                       \
 346         PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
 347                                           \
 348         (P).w[1] = PH + (PM>>32);             \
 349         (P).w[0] = (PM<<32)+(UINT32)PL;        \
 350 }
 351 #define __mul_128x128_high(Q, A, B)               \
 352 {                                                 \
 353 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2;          \
 354                                                   \
 355         __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]);  \
 356         __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]);  \
 357         __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
 358         __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]);  \
 359                                                   \
 360     __add_128_128(QM, ALBH, AHBL);                \
 361     __add_128_64(QM2, QM, ALBL.w[1]);             \
 362     __add_128_64((Q), AHBH, QM2.w[1]);            \
 363 }
 364 #define __mul_128x128_full(Qh, Ql, A, B)          \
 365 {                                                 \
 366 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2;          \
 367                                                   \
 368         __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]);  \
 369         __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]);  \
 370         __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
 371         __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]);  \
 372                                                   \
 373     __add_128_128(QM, ALBH, AHBL);                \
 374         (Ql).w[0] = ALBL.w[0];                          \
 375     __add_128_64(QM2, QM, ALBL.w[1]);             \
 376     __add_128_64((Qh), AHBH, QM2.w[1]);           \
 377         (Ql).w[1] = QM2.w[0];                          \
 378 }
 379 #define __mul_128x128_low(Ql, A, B)               \
 380 {                                                 \
 381 UINT128 ALBL;                                     \
 382 UINT64 QM64;                                      \
 383                                                   \
 384         __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
 385         QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1];   \
 386                                                   \
 387         (Ql).w[0] = ALBL.w[0];                          \
 388         (Ql).w[1] = QM64 + ALBL.w[1];                 \
 389 }
 390 #define __mul_64x128_low(Ql, A, B)                \
 391 {                                                 \
 392   UINT128 ALBL, ALBH, QM2;                        \
 393   __mul_64x64_to_128(ALBH, (A), (B).w[1]);        \
 394   __mul_64x64_to_128(ALBL, (A), (B).w[0]);        \
 395   (Ql).w[0] = ALBL.w[0];                          \
 396   __add_128_64(QM2, ALBH, ALBL.w[1]);             \
 397   (Ql).w[1] = QM2.w[0];                           \
 398 }
 399 #define __mul_64x128_full(Ph, Ql, A, B)           \
 400 {                                                 \
 401 UINT128 ALBL, ALBH, QM2;                          \
 402                                                   \
 403         __mul_64x64_to_128(ALBH, (A), (B).w[1]);      \
 404         __mul_64x64_to_128(ALBL, (A), (B).w[0]);       \
 405                                                   \
 406         (Ql).w[0] = ALBL.w[0];                          \
 407     __add_128_64(QM2, ALBH, ALBL.w[1]);           \
 408         (Ql).w[1] = QM2.w[0];                          \
 409     Ph = QM2.w[1];                                \
 410 }
 411 #define __mul_64x128_to_192(Q, A, B)              \
 412 {                                                 \
 413 UINT128 ALBL, ALBH, QM2;                          \
 414                                                   \
 415         __mul_64x64_to_128(ALBH, (A), (B).w[1]);      \
 416         __mul_64x64_to_128(ALBL, (A), (B).w[0]);      \
 417                                                   \
 418         (Q).w[0] = ALBL.w[0];                         \
 419     __add_128_64(QM2, ALBH, ALBL.w[1]);           \
 420         (Q).w[1] = QM2.w[0];                          \
 421     (Q).w[2] = QM2.w[1];                          \
 422 }
 423 #define __mul_64x128_to192(Q, A, B)          \
 424 {                                             \
 425 UINT128 ALBL, ALBH, QM2;                      \
 426                                               \
 427     __mul_64x64_to_128(ALBH, (A), (B).w[1]);  \
 428     __mul_64x64_to_128(ALBL, (A), (B).w[0]);  \
 429                                               \
 430     (Q).w[0] = ALBL.w[0];                    \
 431     __add_128_64(QM2, ALBH, ALBL.w[1]);       \
 432     (Q).w[1] = QM2.w[0];                     \
 433     (Q).w[2] = QM2.w[1];                     \
 434 }
 435 #define __mul_128x128_to_256(P256, A, B)                         \
 436 {                                                                \
 437 UINT128 Qll, Qlh;                                                \
 438 UINT64 Phl, Phh, CY1, CY2;                                         \
 439                                                                  \
 440    __mul_64x128_full(Phl, Qll, A.w[0], B);                       \
 441    __mul_64x128_full(Phh, Qlh, A.w[1], B);                       \
 442   (P256).w[0] = Qll.w[0];                                        \
 443            __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]);      \
 444            __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1);    \
 445            (P256).w[3] = Phh + CY2;                                   \
 446 }
 447 //
 448 // For better performance, will check A.w[1] against 0,
 449 //                         but not B.w[1]
 450 // Use this macro accordingly
 451 #define __mul_128x128_to_256_check_A(P256, A, B)                   \
 452 {                                                                  \
 453 UINT128 Qll, Qlh;                                                  \
 454 UINT64 Phl, Phh, CY1, CY2;                                           \
 455                                                                    \
 456    __mul_64x128_full(Phl, Qll, A.w[0], B);                          \
 457   (P256).w[0] = Qll.w[0];                                        \
 458    if(A.w[1])  {                                                   \
 459            __mul_64x128_full(Phh, Qlh, A.w[1], B);                     \
 460            __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]);      \
 461            __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1);   \
 462            (P256).w[3] = Phh + CY2;   }                              \
 463    else  {                                                         \
 464            (P256).w[1] = Qll.w[1];                                  \
 465            (P256).w[2] = Phl;                                       \
 466            (P256).w[3] = 0;  }                                      \
 467 }
 468 #define __mul_64x192_to_256(lP, lA, lB)                      \
 469 {                                                         \
 470 UINT128 lP0,lP1,lP2;                                      \
 471 UINT64 lC;                                                 \
 472         __mul_64x64_to_128(lP0, lA, (lB).w[0]);              \
 473         __mul_64x64_to_128(lP1, lA, (lB).w[1]);              \
 474         __mul_64x64_to_128(lP2, lA, (lB).w[2]);              \
 475         (lP).w[0] = lP0.w[0];                                \
 476         __add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]);      \
 477         __add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
 478         (lP).w[3] = lP2.w[1] + lC;                           \
 479 }
 480 #define __mul_64x256_to_320(P, A, B)                    \
 481 {                                                       \
 482 UINT128 lP0,lP1,lP2,lP3;                                \
 483 UINT64 lC;                                               \
 484         __mul_64x64_to_128(lP0, A, (B).w[0]);             \
 485         __mul_64x64_to_128(lP1, A, (B).w[1]);             \
 486         __mul_64x64_to_128(lP2, A, (B).w[2]);             \
 487         __mul_64x64_to_128(lP3, A, (B).w[3]);             \
 488         (P).w[0] = lP0.w[0];                               \
 489         __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]);      \
 490         __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
 491         __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
 492         (P).w[4] = lP3.w[1] + lC;                          \
 493 }
 494 #define __mul_192x192_to_384(P, A, B)                          \
 495 {                                                              \
 496 UINT256 P0,P1,P2;                                              \
 497 UINT64 CY;                                                      \
 498         __mul_64x192_to_256(P0, (A).w[0], B);                   \
 499         __mul_64x192_to_256(P1, (A).w[1], B);                   \
 500         __mul_64x192_to_256(P2, (A).w[2], B);                   \
 501         (P).w[0] = P0.w[0];                                  \
 502         __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
 503         __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
 504         __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
 505         (P).w[4] = P1.w[3] + CY;                              \
 506         __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
 507         __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
 508         __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
 509         (P).w[5] = P2.w[3] + CY;                              \
 510 }
 511 #define __mul_64x320_to_384(P, A, B)                    \
 512 {                                                       \
 513 UINT128 lP0,lP1,lP2,lP3,lP4;                            \
 514 UINT64 lC;                                               \
 515         __mul_64x64_to_128(lP0, A, (B).w[0]);             \
 516         __mul_64x64_to_128(lP1, A, (B).w[1]);             \
 517         __mul_64x64_to_128(lP2, A, (B).w[2]);             \
 518         __mul_64x64_to_128(lP3, A, (B).w[3]);             \
 519         __mul_64x64_to_128(lP4, A, (B).w[4]);             \
 520         (P).w[0] = lP0.w[0];                               \
 521         __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]);      \
 522         __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
 523         __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
 524         __add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
 525         (P).w[5] = lP4.w[1] + lC;                          \
 526 }
 527 // A*A
 528 // Full 128x128-bit product
 529 #define __sqr128_to_256(P256, A)                                 \
 530 {                                                                \
 531 UINT128 Qll, Qlh, Qhh;                                           \
 532 UINT64 TMP_C1, TMP_C2;                                 \
 533                                                                  \
 534    __mul_64x64_to_128(Qhh, A.w[1], A.w[1]);                      \
 535    __mul_64x64_to_128(Qlh, A.w[0], A.w[1]);                      \
 536    Qhh.w[1] += (Qlh.w[1]>>63);                                   \
 537    Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63);                \
 538    Qlh.w[0] += Qlh.w[0];                                         \
 539    __mul_64x64_to_128(Qll, A.w[0], A.w[0]);                      \
 540                                                                  \
 541    __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]);      \
 542    (P256).w[0] = Qll.w[0];                                       \
 543    __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1);    \
 544    (P256).w[3] = Qhh.w[1]+TMP_C2;                                         \
 545 }
 546 #define __mul_128x128_to_256_low_high(PQh, PQl, A, B)            \
 547 {                                                                \
 548 UINT128 Qll, Qlh;                                                \
 549 UINT64 Phl, Phh, C1, C2;                                         \
 550                                                                  \
 551    __mul_64x128_full(Phl, Qll, A.w[0], B);                       \
 552    __mul_64x128_full(Phh, Qlh, A.w[1], B);                       \
 553   (PQl).w[0] = Qll.w[0];                                        \
 554            __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]);      \
 555            __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1);    \
 556            (PQh).w[1] = Phh + C2;                                   \
 557 }
 558 #define __mul_256x256_to_512(P, A, B)                          \
 559 {                                                              \
 560 UINT512 P0,P1,P2,P3;                                           \
 561 UINT64 CY;                                                      \
 562         __mul_64x256_to_320(P0, (A).w[0], B);                   \
 563         __mul_64x256_to_320(P1, (A).w[1], B);                   \
 564         __mul_64x256_to_320(P2, (A).w[2], B);                   \
 565         __mul_64x256_to_320(P3, (A).w[3], B);                   \
 566         (P).w[0] = P0.w[0];                                  \
 567         __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
 568         __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
 569         __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
 570         __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
 571         (P).w[5] = P1.w[4] + CY;                              \
 572         __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
 573         __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
 574         __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
 575         __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY);   \
 576         (P).w[6] = P2.w[4] + CY;                              \
 577         __add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]);     \
 578         __add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY);   \
 579         __add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY);   \
 580         __add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY);   \
 581         (P).w[7] = P3.w[4] + CY;                              \
 582 }
 583 #define __mul_192x256_to_448(P, A, B)                          \
 584 {                                                              \
 585 UINT512 P0,P1,P2;                                           \
 586 UINT64 CY;                                                      \
 587         __mul_64x256_to_320(P0, (A).w[0], B);                   \
 588         __mul_64x256_to_320(P1, (A).w[1], B);                   \
 589         __mul_64x256_to_320(P2, (A).w[2], B);                   \
 590         (P).w[0] = P0.w[0];                                  \
 591         __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
 592         __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
 593         __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
 594         __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
 595         (P).w[5] = P1.w[4] + CY;                              \
 596         __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
 597         __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
 598         __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
 599         __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY);   \
 600         (P).w[6] = P2.w[4] + CY;                              \
 601 }
 602 #define __mul_320x320_to_640(P, A, B)                          \
 603 {                                                              \
 604 UINT512 P0,P1,P2,P3;                                           \
 605 UINT64 CY;                                                     \
 606         __mul_256x256_to_512((P), (A), B);                   \
 607         __mul_64x256_to_320(P1, (A).w[4], B);                   \
 608         __mul_64x256_to_320(P2, (B).w[4], A);                   \
 609         __mul_64x64_to_128(P3, (A).w[4], (B).w[4]);               \
 610         __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]);      \
 611         __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
 612         __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
 613         __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
 614         __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
 615         P3.w[1] += CY;                                       \
 616         __add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]);      \
 617         __add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
 618         __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
 619         __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
 620         __add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
 621         (P).w[9] = P3.w[1] + CY;                             \
 622 }
 623 #define __mul_384x384_to_768(P, A, B)                          \
 624 {                                                              \
 625 UINT512 P0,P1,P2,P3;                                           \
 626 UINT64 CY;                                                     \
 627         __mul_320x320_to_640((P), (A), B);                         \
 628         __mul_64x320_to_384(P1, (A).w[5], B);                   \
 629         __mul_64x320_to_384(P2, (B).w[5], A);                   \
 630         __mul_64x64_to_128(P3, (A).w[5], (B).w[5]);               \
 631         __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]);      \
 632         __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
 633         __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
 634         __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
 635         __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
 636         __add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
 637         P3.w[1] += CY;                                       \
 638         __add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]);      \
 639         __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
 640         __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
 641         __add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
 642         __add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
 643         __add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
 644         (P).w[11] = P3.w[1] + CY;                             \
 645 }
 646 #define __mul_64x128_short(Ql, A, B)              \
 647 {                                                 \
 648 UINT64 ALBH_L;                                    \
 649                                                   \
 650         __mul_64x64_to_64(ALBH_L, (A),(B).w[1]);      \
 651         __mul_64x64_to_128((Ql), (A), (B).w[0]);       \
 652                                                   \
 653         (Ql).w[1] += ALBH_L;                          \
 654 }
 655 #define __scale128_10(D,_TMP)                            \
 656 {                                                        \
 657 UINT128 _TMP2,_TMP8;                                     \
 658           _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63);       \
 659           _TMP2.w[0] = _TMP.w[0]<<1;                         \
 660           _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61);       \
 661           _TMP8.w[0] = _TMP.w[0]<<3;                         \
 662           __add_128_128(D, _TMP2, _TMP8);                    \
 663 }
 664 // 64x64-bit product
 665 #define __mul_64x64_to_128MACH(P128, CX64, CY64)  \
 666 {                                                  \
 667   UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2;     \
 668   CXH = (CX64) >> 32;                              \
 669   CXL = (UINT32)(CX64);                            \
 670   CYH = (CY64) >> 32;                              \
 671   CYL = (UINT32)(CY64);                            \
 672   PM = CXH*CYL;                                    \
 673   PH = CXH*CYH;                                    \
 674   PL = CXL*CYL;                                    \
 675   PM2 = CXL*CYH;                                   \
 676   PH += (PM>>32);                                  \
 677   PM = (UINT64)((UINT32)PM)+PM2+(PL>>32);          \
 678   (P128).w[1] = PH + (PM>>32);                     \
 679   (P128).w[0] = (PM<<32)+(UINT32)PL;                \
 680 }
 681 // 64x64-bit product
 682 #define __mul_64x64_to_128HIGH(P64, CX64, CY64)  \
 683 {                                                  \
 684   UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2;     \
 685   CXH = (CX64) >> 32;                              \
 686   CXL = (UINT32)(CX64);                            \
 687   CYH = (CY64) >> 32;                              \
 688   CYL = (UINT32)(CY64);                            \
 689   PM = CXH*CYL;                                    \
 690   PH = CXH*CYH;                                    \
 691   PL = CXL*CYL;                                    \
 692   PM2 = CXL*CYH;                                   \
 693   PH += (PM>>32);                                  \
 694   PM = (UINT64)((UINT32)PM)+PM2+(PL>>32);          \
 695   P64 = PH + (PM>>32);                     \
 696 }
 697 #define __mul_128x64_to_128(Q128, A64, B128)        \
 698 {                                                  \
 699   UINT64 ALBH_L;                                   \
 700   ALBH_L = (A64) * (B128).w[1];                    \
 701   __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]);   \
 702   (Q128).w[1] += ALBH_L;                           \
 703 }
 704 // might simplify by calculating just QM2.w[0]
 705 #define __mul_64x128_to_128(Ql, A, B)           \
 706 {                                                 \
 707   UINT128 ALBL, ALBH, QM2;                        \
 708   __mul_64x64_to_128(ALBH, (A), (B).w[1]);        \
 709   __mul_64x64_to_128(ALBL, (A), (B).w[0]);        \
 710   (Ql).w[0] = ALBL.w[0];                          \
 711   __add_128_64(QM2, ALBH, ALBL.w[1]);             \
 712   (Ql).w[1] = QM2.w[0];                           \
 713 }
 714 /*********************************************************************
 715  *
 716  *      BID Pack/Unpack Macros
 717  *
 718  *********************************************************************/
 719 /////////////////////////////////////////
 720 // BID64 definitions
 721 ////////////////////////////////////////
 722 #define DECIMAL_MAX_EXPON_64  767
 723 #define DECIMAL_EXPONENT_BIAS 398
 724 #define MAX_FORMAT_DIGITS     16
 725 /////////////////////////////////////////
 726 // BID128 definitions
 727 ////////////////////////////////////////
 728 #define DECIMAL_MAX_EXPON_128  12287
 729 #define DECIMAL_EXPONENT_BIAS_128  6176
 730 #define MAX_FORMAT_DIGITS_128      34
 731 /////////////////////////////////////////
 732 // BID32 definitions
 733 ////////////////////////////////////////
 734 #define DECIMAL_MAX_EXPON_32  191
 735 #define DECIMAL_EXPONENT_BIAS_32  101
 736 #define MAX_FORMAT_DIGITS_32      7
 737 ////////////////////////////////////////
 738 // Constant Definitions
 739 ///////////////////////////////////////
 740 #define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
 741 #define INFINITY_MASK64         0x7800000000000000ull
 742 #define NAN_MASK64              0x7c00000000000000ull
 743 #define SNAN_MASK64             0x7e00000000000000ull
 744 #define QUIET_MASK64            0xfdffffffffffffffull
 745 #define LARGE_COEFF_MASK64      0x0007ffffffffffffull
 746 #define LARGE_COEFF_HIGH_BIT64  0x0020000000000000ull
 747 #define SMALL_COEFF_MASK64      0x001fffffffffffffull
 748 #define EXPONENT_MASK64         0x3ff
 749 #define EXPONENT_SHIFT_LARGE64  51
 750 #define EXPONENT_SHIFT_SMALL64  53
 751 #define LARGEST_BID64           0x77fb86f26fc0ffffull
 752 #define SMALLEST_BID64          0xf7fb86f26fc0ffffull
 753 #define SMALL_COEFF_MASK128     0x0001ffffffffffffull
 754 #define LARGE_COEFF_MASK128     0x00007fffffffffffull
 755 #define EXPONENT_MASK128        0x3fff
 756 #define LARGEST_BID128_HIGH     0x5fffed09bead87c0ull
 757 #define LARGEST_BID128_LOW      0x378d8e63ffffffffull
 758 #define SPECIAL_ENCODING_MASK32 0x60000000ul
 759 #define INFINITY_MASK32         0x78000000ul
 760 #define LARGE_COEFF_MASK32      0x007ffffful
 761 #define LARGE_COEFF_HIGH_BIT32  0x00800000ul
 762 #define SMALL_COEFF_MASK32      0x001ffffful
 763 #define EXPONENT_MASK32         0xff
 764 #define LARGEST_BID32           0x77f8967f
 765 #define MASK_BINARY_EXPONENT  0x7ff0000000000000ull
 766 #define BINARY_EXPONENT_BIAS  0x3ff
 767 #define UPPER_EXPON_LIMIT     51
 768 // data needed for BID pack/unpack macros
 769 extern UINT64 __bid_round_const_table[][19];
 770 extern UINT128 __bid_reciprocals10_128[];
 771 extern int __bid_recip_scale[];
 772 extern UINT128 __bid_power10_table_128[];
 773 extern int __bid_estimate_decimal_digits[];
 774 extern int __bid_estimate_bin_expon[];
 775 extern UINT64 __bid_power10_index_binexp[];
 776 extern int __bid_short_recip_scale[];
 777 extern UINT64 __bid_reciprocals10_64[];
 778 extern UINT128 __bid_power10_index_binexp_128[];
 779 extern UINT128 __bid_round_const_table_128[][36];
 780
 781
 782 //////////////////////////////////////////////
 783 //  Status Flag Handling
 784 /////////////////////////////////////////////
 785 #define __set_status_flags(fpsc, status)  *(fpsc) |= status
 786 #define is_inexact(fpsc)  ((*(fpsc))&INEXACT_EXCEPTION)
 787
 788 __BID_INLINE__ UINT64
 789 unpack_BID64 (UINT64 * psign_x, int *pexponent_x,
 790               UINT64 * pcoefficient_x, UINT64 x) {
 791   UINT64 tmp, coeff;
 792
 793   *psign_x = x & 0x8000000000000000ull;
 794
 795   if ((x & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) {
 796     // special encodings
 797     // coefficient
 798     coeff = (x & LARGE_COEFF_MASK64) | LARGE_COEFF_HIGH_BIT64;
 799
 800     if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
 801       *pcoefficient_x = x;
 802       // check for non-canonical values
 803       if ((x & LARGE_COEFF_MASK64) >= 1000000000000000ull)
 804         *pcoefficient_x = x & (~LARGE_COEFF_MASK64);
 805       return 0; // NaN or Infinity
 806     }
 807     // check for non-canonical values
 808     if (coeff >= 10000000000000000ull)
 809       coeff = 0;
 810     *pcoefficient_x = coeff;
 811     // get exponent
 812     tmp = x >> EXPONENT_SHIFT_LARGE64;
 813     *pexponent_x = (int) (tmp & EXPONENT_MASK64);
 814     return 1;
 815   }
 816   // exponent
 817   tmp = x >> EXPONENT_SHIFT_SMALL64;
 818   *pexponent_x = (int) (tmp & EXPONENT_MASK64);
 819   // coefficient
 820   *pcoefficient_x = (x & SMALL_COEFF_MASK64);
 821
 822   return *pcoefficient_x;
 823 }
 824
 825 //
 826 //   BID64 pack macro (general form)
 827 //
 828 __BID_INLINE__ UINT64
 829 get_BID64 (UINT64 sgn, int expon, UINT64 coeff, int rmode,
 830            unsigned *fpsc) {
 831   UINT128 Stemp, Q_low;
 832   UINT64 QH, r, mask, C64, remainder_h, CY, carry;
 833   int extra_digits, amount, amount2;
 834   unsigned status;
 835
 836   if (coeff > 9999999999999999ull) {
 837     expon++;
 838     coeff = 1000000000000000ull;
 839   }
 840   // check for possible underflow/overflow
 841   if (((unsigned) expon) >= 3 * 256) {
 842     if (expon < 0) {
 843       // underflow
 844       if (expon + MAX_FORMAT_DIGITS < 0) {
 845 #ifdef SET_STATUS_FLAGS
 846         __set_status_flags (fpsc,
 847                             UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
 848 #endif
 849 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 850 #ifndef IEEE_ROUND_NEAREST
 851         if (rmode == ROUNDING_DOWN && sgn)
 852           return 0x8000000000000001ull;
 853         if (rmode == ROUNDING_UP && !sgn)
 854           return 1ull;
 855 #endif
 856 #endif
 857         // result is 0
 858         return sgn;
 859       }
 860 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 861 #ifndef IEEE_ROUND_NEAREST
 862       if (sgn && (unsigned) (rmode - 1) < 2)
 863         rmode = 3 - rmode;
 864 #endif
 865 #endif
 866       // get digits to be shifted out
 867       extra_digits = -expon;
 868       coeff += __bid_round_const_table[rmode][extra_digits];
 869
 870       // get coeff*(2^M[extra_digits])/10^extra_digits
 871       __mul_64x128_full (QH, Q_low, coeff,
 872                          __bid_reciprocals10_128[extra_digits]);
 873
 874       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
 875       amount = __bid_recip_scale[extra_digits];
 876
 877       C64 = QH >> amount;
 878
 879 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 880 #ifndef IEEE_ROUND_NEAREST
 881       if (rmode == 0) //ROUNDING_TO_NEAREST
 882 #endif
 883         if (C64 & 1) {
 884           // check whether fractional part of initial_P/10^extra_digits is exactly .5
 885
 886           // get remainder
 887           amount2 = 64 - amount;
 888           remainder_h = 0;
 889           remainder_h--;
 890           remainder_h >>= amount2;
 891           remainder_h = remainder_h & QH;
 892
 893           if (!remainder_h
 894               && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
 895                   || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
 896                       && Q_low.w[0] <
 897                       __bid_reciprocals10_128[extra_digits].w[0]))) {
 898             C64--;
 899           }
 900         }
 901 #endif
 902
 903 #ifdef SET_STATUS_FLAGS
 904
 905       if (is_inexact (fpsc))
 906         __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
 907       else {
 908         status = INEXACT_EXCEPTION;
 909         // get remainder
 910         remainder_h = QH << (64 - amount);
 911
 912         switch (rmode) {
 913         case ROUNDING_TO_NEAREST:
 914         case ROUNDING_TIES_AWAY:
 915           // test whether fractional part is 0
 916           if (remainder_h == 0x8000000000000000ull
 917               && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
 918                   || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
 919                       && Q_low.w[0] <
 920                       __bid_reciprocals10_128[extra_digits].w[0])))
 921             status = EXACT_STATUS;
 922           break;
 923         case ROUNDING_DOWN:
 924         case ROUNDING_TO_ZERO:
 925           if (!remainder_h
 926               && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
 927                   || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
 928                       && Q_low.w[0] <
 929                       __bid_reciprocals10_128[extra_digits].w[0])))
 930             status = EXACT_STATUS;
 931           break;
 932         default:
 933           // round up
 934           __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
 935                            __bid_reciprocals10_128[extra_digits].w[0]);
 936           __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
 937                               __bid_reciprocals10_128[extra_digits].w[1], CY);
 938           if ((remainder_h >> (64 - amount)) + carry >=
 939               (((UINT64) 1) << amount))
 940             status = EXACT_STATUS;
 941         }
 942
 943         if (status != EXACT_STATUS)
 944           __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
 945       }
 946
 947 #endif
 948
 949       return sgn | C64;
 950     }
 951     while (coeff < 1000000000000000ull && expon >= 3 * 256) {
 952       expon--;
 953       coeff = (coeff << 3) + (coeff << 1);
 954     }
 955     if (expon > DECIMAL_MAX_EXPON_64) {
 956 #ifdef SET_STATUS_FLAGS
 957       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
 958 #endif
 959       // overflow
 960       r = sgn | INFINITY_MASK64;
 961       switch (rmode) {
 962       case ROUNDING_DOWN:
 963         if (!sgn)
 964           r = LARGEST_BID64;
 965         break;
 966       case ROUNDING_TO_ZERO:
 967         r = sgn | LARGEST_BID64;
 968         break;
 969       case ROUNDING_UP:
 970         // round up
 971         if (sgn)
 972           r = SMALLEST_BID64;
 973       }
 974       return r;
 975     }
 976   }
 977
 978   mask = 1;
 979   mask <<= EXPONENT_SHIFT_SMALL64;
 980
 981   // check whether coefficient fits in 10*5+3 bits
 982   if (coeff < mask) {
 983     r = expon;
 984     r <<= EXPONENT_SHIFT_SMALL64;
 985     r |= (coeff | sgn);
 986     return r;
 987   }
 988   // special format
 989
 990   // eliminate the case coeff==10^16 after rounding
 991   if (coeff == 10000000000000000ull) {
 992     r = expon + 1;
 993     r <<= EXPONENT_SHIFT_SMALL64;
 994     r |= (1000000000000000ull | sgn);
 995     return r;
 996   }
 997
 998   r = expon;
 999   r <<= EXPONENT_SHIFT_LARGE64;
1000   r |= (sgn | SPECIAL_ENCODING_MASK64);
1001   // add coeff, without leading bits
1002   mask = (mask >> 2) - 1;
1003   coeff &= mask;
1004   r |= coeff;
1005
1006   return r;
1007 }
1008
1009
1010
1011
1012 //
1013 //   No overflow/underflow checking
1014 //
1015 __BID_INLINE__ UINT64
1016 fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1017   UINT64 r, mask;
1018
1019   mask = 1;
1020   mask <<= EXPONENT_SHIFT_SMALL64;
1021
1022   // check whether coefficient fits in 10*5+3 bits
1023   if (coeff < mask) {
1024     r = expon;
1025     r <<= EXPONENT_SHIFT_SMALL64;
1026     r |= (coeff | sgn);
1027     return r;
1028   }
1029   // special format
1030
1031   // eliminate the case coeff==10^16 after rounding
1032   if (coeff == 10000000000000000ull) {
1033     r = expon + 1;
1034     r <<= EXPONENT_SHIFT_SMALL64;
1035     r |= (1000000000000000ull | sgn);
1036     return r;
1037   }
1038
1039   r = expon;
1040   r <<= EXPONENT_SHIFT_LARGE64;
1041   r |= (sgn | SPECIAL_ENCODING_MASK64);
1042   // add coeff, without leading bits
1043   mask = (mask >> 2) - 1;
1044   coeff &= mask;
1045   r |= coeff;
1046
1047   return r;
1048 }
1049
1050
1051 //
1052 //   no underflow checking
1053 //
1054 __BID_INLINE__ UINT64
1055 fast_get_BID64_check_OF (UINT64 sgn, int expon, UINT64 coeff, int rmode,
1056                          unsigned *fpsc) {
1057   UINT64 r, mask;
1058
1059   if (((unsigned) expon) >= 3 * 256 - 1) {
1060     if ((expon == 3 * 256 - 1) && coeff == 10000000000000000ull) {
1061       expon = 3 * 256;
1062       coeff = 1000000000000000ull;
1063     }
1064
1065     if (((unsigned) expon) >= 3 * 256) {
1066       while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1067         expon--;
1068         coeff = (coeff << 3) + (coeff << 1);
1069       }
1070       if (expon > DECIMAL_MAX_EXPON_64) {
1071 #ifdef SET_STATUS_FLAGS
1072         __set_status_flags (fpsc,
1073                             OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1074 #endif
1075         // overflow
1076         r = sgn | INFINITY_MASK64;
1077         switch (rmode) {
1078         case ROUNDING_DOWN:
1079           if (!sgn)
1080             r = LARGEST_BID64;
1081           break;
1082         case ROUNDING_TO_ZERO:
1083           r = sgn | LARGEST_BID64;
1084           break;
1085         case ROUNDING_UP:
1086           // round up
1087           if (sgn)
1088             r = SMALLEST_BID64;
1089         }
1090         return r;
1091       }
1092     }
1093   }
1094
1095   mask = 1;
1096   mask <<= EXPONENT_SHIFT_SMALL64;
1097
1098   // check whether coefficient fits in 10*5+3 bits
1099   if (coeff < mask) {
1100     r = expon;
1101     r <<= EXPONENT_SHIFT_SMALL64;
1102     r |= (coeff | sgn);
1103     return r;
1104   }
1105   // special format
1106
1107   // eliminate the case coeff==10^16 after rounding
1108   if (coeff == 10000000000000000ull) {
1109     r = expon + 1;
1110     r <<= EXPONENT_SHIFT_SMALL64;
1111     r |= (1000000000000000ull | sgn);
1112     return r;
1113   }
1114
1115   r = expon;
1116   r <<= EXPONENT_SHIFT_LARGE64;
1117   r |= (sgn | SPECIAL_ENCODING_MASK64);
1118   // add coeff, without leading bits
1119   mask = (mask >> 2) - 1;
1120   coeff &= mask;
1121   r |= coeff;
1122
1123   return r;
1124 }
1125
1126
1127 //
1128 //   No overflow/underflow checking
1129 //   or checking for coefficients equal to 10^16 (after rounding)
1130 //
1131 __BID_INLINE__ UINT64
1132 very_fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1133   UINT64 r, mask;
1134
1135   mask = 1;
1136   mask <<= EXPONENT_SHIFT_SMALL64;
1137
1138   // check whether coefficient fits in 10*5+3 bits
1139   if (coeff < mask) {
1140     r = expon;
1141     r <<= EXPONENT_SHIFT_SMALL64;
1142     r |= (coeff | sgn);
1143     return r;
1144   }
1145   // special format
1146   r = expon;
1147   r <<= EXPONENT_SHIFT_LARGE64;
1148   r |= (sgn | SPECIAL_ENCODING_MASK64);
1149   // add coeff, without leading bits
1150   mask = (mask >> 2) - 1;
1151   coeff &= mask;
1152   r |= coeff;
1153
1154   return r;
1155 }
1156
1157 //
1158 //   No overflow/underflow checking or checking for coefficients above 2^53
1159 //
1160 __BID_INLINE__ UINT64
1161 very_fast_get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff) {
1162   // no UF/OF
1163   UINT64 r;
1164
1165   r = expon;
1166   r <<= EXPONENT_SHIFT_SMALL64;
1167   r |= (coeff | sgn);
1168   return r;
1169 }
1170
1171
1172 //
1173 // This pack macro is used when underflow is known to occur
1174 //
1175 __BID_INLINE__ UINT64
1176 get_BID64_UF (UINT64 sgn, int expon, UINT64 coeff, UINT64 R, int rmode,
1177               unsigned *fpsc) {
1178   UINT128 C128, Q_low, Stemp;
1179   UINT64 C64, remainder_h, QH, carry, CY;
1180   int extra_digits, amount, amount2;
1181   unsigned status;
1182
1183   // underflow
1184   if (expon + MAX_FORMAT_DIGITS < 0) {
1185 #ifdef SET_STATUS_FLAGS
1186     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1187 #endif
1188 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1189 #ifndef IEEE_ROUND_NEAREST
1190     if (rmode == ROUNDING_DOWN && sgn)
1191       return 0x8000000000000001ull;
1192     if (rmode == ROUNDING_UP && !sgn)
1193       return 1ull;
1194 #endif
1195 #endif
1196     // result is 0
1197     return sgn;
1198   }
1199   // 10*coeff
1200   coeff = (coeff << 3) + (coeff << 1);
1201 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1202 #ifndef IEEE_ROUND_NEAREST
1203   if (sgn && (unsigned) (rmode - 1) < 2)
1204     rmode = 3 - rmode;
1205 #endif
1206 #endif
1207   if (R)
1208     coeff |= 1;
1209   // get digits to be shifted out
1210   extra_digits = 1 - expon;
1211   C128.w[0] = coeff + __bid_round_const_table[rmode][extra_digits];
1212
1213   // get coeff*(2^M[extra_digits])/10^extra_digits
1214   __mul_64x128_full (QH, Q_low, C128.w[0],
1215                      __bid_reciprocals10_128[extra_digits]);
1216
1217   // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1218   amount = __bid_recip_scale[extra_digits];
1219
1220   C64 = QH >> amount;
1221   //__shr_128(C128, Q_high, amount);
1222
1223 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1224 #ifndef IEEE_ROUND_NEAREST
1225   if (rmode == 0) //ROUNDING_TO_NEAREST
1226 #endif
1227     if (C64 & 1) {
1228       // check whether fractional part of initial_P/10^extra_digits is exactly .5
1229
1230       // get remainder
1231       amount2 = 64 - amount;
1232       remainder_h = 0;
1233       remainder_h--;
1234       remainder_h >>= amount2;
1235       remainder_h = remainder_h & QH;
1236
1237       if (!remainder_h
1238           && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
1239               || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
1240                   && Q_low.w[0] <
1241                   __bid_reciprocals10_128[extra_digits].w[0]))) {
1242         C64--;
1243       }
1244     }
1245 #endif
1246
1247 #ifdef SET_STATUS_FLAGS
1248
1249   if (is_inexact (fpsc))
1250     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1251   else {
1252     status = INEXACT_EXCEPTION;
1253     // get remainder
1254     remainder_h = QH << (64 - amount);
1255
1256     switch (rmode) {
1257     case ROUNDING_TO_NEAREST:
1258     case ROUNDING_TIES_AWAY:
1259       // test whether fractional part is 0
1260       if (remainder_h == 0x8000000000000000ull
1261           && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
1262               || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
1263                   && Q_low.w[0] <
1264                   __bid_reciprocals10_128[extra_digits].w[0])))
1265         status = EXACT_STATUS;
1266       break;
1267     case ROUNDING_DOWN:
1268     case ROUNDING_TO_ZERO:
1269       if (!remainder_h
1270           && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
1271               || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
1272                   && Q_low.w[0] <
1273                   __bid_reciprocals10_128[extra_digits].w[0])))
1274         status = EXACT_STATUS;
1275       break;
1276     default:
1277       // round up
1278       __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1279                        __bid_reciprocals10_128[extra_digits].w[0]);
1280       __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1281                           __bid_reciprocals10_128[extra_digits].w[1], CY);
1282       if ((remainder_h >> (64 - amount)) + carry >=
1283           (((UINT64) 1) << amount))
1284         status = EXACT_STATUS;
1285     }
1286
1287     if (status != EXACT_STATUS)
1288       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1289   }
1290
1291 #endif
1292
1293   return sgn | C64;
1294
1295 }
1296
1297
1298
1299 //
1300 //   This pack macro doesnot check for coefficients above 2^53
1301 //
1302 __BID_INLINE__ UINT64
1303 get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff,
1304                           int rmode, unsigned *fpsc) {
1305   UINT128 C128, Q_low, Stemp;
1306   UINT64 r, mask, C64, remainder_h, QH, carry, CY;
1307   int extra_digits, amount, amount2;
1308   unsigned status;
1309
1310   // check for possible underflow/overflow
1311   if (((unsigned) expon) >= 3 * 256) {
1312     if (expon < 0) {
1313       // underflow
1314       if (expon + MAX_FORMAT_DIGITS < 0) {
1315 #ifdef SET_STATUS_FLAGS
1316         __set_status_flags (fpsc,
1317                             UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1318 #endif
1319 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1320 #ifndef IEEE_ROUND_NEAREST
1321         if (rmode == ROUNDING_DOWN && sgn)
1322           return 0x8000000000000001ull;
1323         if (rmode == ROUNDING_UP && !sgn)
1324           return 1ull;
1325 #endif
1326 #endif
1327         // result is 0
1328         return sgn;
1329       }
1330 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1331 #ifndef IEEE_ROUND_NEAREST
1332       if (sgn && (unsigned) (rmode - 1) < 2)
1333         rmode = 3 - rmode;
1334 #endif
1335 #endif
1336       // get digits to be shifted out
1337       extra_digits = -expon;
1338       C128.w[0] = coeff + __bid_round_const_table[rmode][extra_digits];
1339
1340       // get coeff*(2^M[extra_digits])/10^extra_digits
1341       __mul_64x128_full (QH, Q_low, C128.w[0],
1342                          __bid_reciprocals10_128[extra_digits]);
1343
1344       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1345       amount = __bid_recip_scale[extra_digits];
1346
1347       C64 = QH >> amount;
1348
1349 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1350 #ifndef IEEE_ROUND_NEAREST
1351       if (rmode == 0) //ROUNDING_TO_NEAREST
1352 #endif
1353         if (C64 & 1) {
1354           // check whether fractional part of initial_P/10^extra_digits is exactly .5
1355
1356           // get remainder
1357           amount2 = 64 - amount;
1358           remainder_h = 0;
1359           remainder_h--;
1360           remainder_h >>= amount2;
1361           remainder_h = remainder_h & QH;
1362
1363           if (!remainder_h
1364               && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
1365                   || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
1366                       && Q_low.w[0] <
1367                       __bid_reciprocals10_128[extra_digits].w[0]))) {
1368             C64--;
1369           }
1370         }
1371 #endif
1372
1373 #ifdef SET_STATUS_FLAGS
1374
1375       if (is_inexact (fpsc))
1376         __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1377       else {
1378         status = INEXACT_EXCEPTION;
1379         // get remainder
1380         remainder_h = QH << (64 - amount);
1381
1382         switch (rmode) {
1383         case ROUNDING_TO_NEAREST:
1384         case ROUNDING_TIES_AWAY:
1385           // test whether fractional part is 0
1386           if (remainder_h == 0x8000000000000000ull
1387               && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
1388                   || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
1389                       && Q_low.w[0] <
1390                       __bid_reciprocals10_128[extra_digits].w[0])))
1391             status = EXACT_STATUS;
1392           break;
1393         case ROUNDING_DOWN:
1394         case ROUNDING_TO_ZERO:
1395           if (!remainder_h
1396               && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
1397                   || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
1398                       && Q_low.w[0] <
1399                       __bid_reciprocals10_128[extra_digits].w[0])))
1400             status = EXACT_STATUS;
1401           break;
1402         default:
1403           // round up
1404           __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1405                            __bid_reciprocals10_128[extra_digits].w[0]);
1406           __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1407                               __bid_reciprocals10_128[extra_digits].w[1], CY);
1408           if ((remainder_h >> (64 - amount)) + carry >=
1409               (((UINT64) 1) << amount))
1410             status = EXACT_STATUS;
1411         }
1412
1413         if (status != EXACT_STATUS)
1414           __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1415       }
1416
1417 #endif
1418
1419       return sgn | C64;
1420     }
1421
1422     while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1423       expon--;
1424       coeff = (coeff << 3) + (coeff << 1);
1425     }
1426     if (expon > DECIMAL_MAX_EXPON_64) {
1427 #ifdef SET_STATUS_FLAGS
1428       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1429 #endif
1430       // overflow
1431       r = sgn | INFINITY_MASK64;
1432       switch (rmode) {
1433       case ROUNDING_DOWN:
1434         if (!sgn)
1435           r = LARGEST_BID64;
1436         break;
1437       case ROUNDING_TO_ZERO:
1438         r = sgn | LARGEST_BID64;
1439         break;
1440       case ROUNDING_UP:
1441         // round up
1442         if (sgn)
1443           r = SMALLEST_BID64;
1444       }
1445       return r;
1446     } else {
1447       mask = 1;
1448       mask <<= EXPONENT_SHIFT_SMALL64;
1449       if (coeff >= mask) {
1450         r = expon;
1451         r <<= EXPONENT_SHIFT_LARGE64;
1452         r |= (sgn | SPECIAL_ENCODING_MASK64);
1453         // add coeff, without leading bits
1454         mask = (mask >> 2) - 1;
1455         coeff &= mask;
1456         r |= coeff;
1457         return r;
1458       }
1459     }
1460   }
1461
1462   r = expon;
1463   r <<= EXPONENT_SHIFT_SMALL64;
1464   r |= (coeff | sgn);
1465
1466   return r;
1467 }
1468
1469
1470 /*****************************************************************************
1471 *
1472 *    BID128 pack/unpack macros
1473 *
1474 *****************************************************************************/
1475
1476 //
1477 //   Macro for handling BID128 underflow
1478 //         sticky bit given as additional argument
1479 //
1480 __BID_INLINE__ UINT128 *
1481 handle_UF_128_rem (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1482                    UINT64 R, unsigned *prounding_mode, unsigned *fpsc) {
1483   UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1, CQ2, CQ8;
1484   UINT64 carry, CY;
1485   int ed2, amount;
1486   unsigned rmode, status;
1487
1488   // UF occurs
1489   if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1490 #ifdef SET_STATUS_FLAGS
1491     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1492 #endif
1493     pres->w[1] = sgn;
1494     pres->w[0] = 0;
1495 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1496 #ifndef IEEE_ROUND_NEAREST
1497     if ((sgn && *prounding_mode == ROUNDING_DOWN)
1498         || (!sgn && *prounding_mode == ROUNDING_UP))
1499       pres->w[0] = 1ull;
1500 #endif
1501 #endif
1502     return pres;
1503   }
1504   // CQ *= 10
1505   CQ2.w[1] = (CQ.w[1] << 1) | (CQ.w[0] >> 63);
1506   CQ2.w[0] = CQ.w[0] << 1;
1507   CQ8.w[1] = (CQ.w[1] << 3) | (CQ.w[0] >> 61);
1508   CQ8.w[0] = CQ.w[0] << 3;
1509   __add_128_128 (CQ, CQ2, CQ8);
1510
1511   // add remainder
1512   if (R)
1513     CQ.w[0] |= 1;
1514
1515   ed2 = 1 - expon;
1516   // add rounding constant to CQ
1517 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1518 #ifndef IEEE_ROUND_NEAREST
1519   rmode = *prounding_mode;
1520   if (sgn && (unsigned) (rmode - 1) < 2)
1521     rmode = 3 - rmode;
1522 #else
1523   rmode = 0;
1524 #endif
1525 #else
1526   rmode = 0;
1527 #endif
1528   T128 = __bid_round_const_table_128[rmode][ed2];
1529   __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1530   CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1531
1532   TP128 = __bid_reciprocals10_128[ed2];
1533   __mul_128x128_full (Qh, Ql, CQ, TP128);
1534   amount = __bid_recip_scale[ed2];
1535
1536   if (amount >= 64) {
1537     CQ.w[0] = Qh.w[1] >> (amount - 64);
1538     CQ.w[1] = 0;
1539   } else {
1540     __shr_128 (CQ, Qh, amount);
1541   }
1542
1543   expon = 0;
1544
1545 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1546 #ifndef IEEE_ROUND_NEAREST
1547   if (!(*prounding_mode))
1548 #endif
1549     if (CQ.w[0] & 1) {
1550       // check whether fractional part of initial_P/10^ed1 is exactly .5
1551
1552       // get remainder
1553       __shl_128_long (Qh1, Qh, (128 - amount));
1554
1555       if (!Qh1.w[1] && !Qh1.w[0]
1556           && (Ql.w[1] < __bid_reciprocals10_128[ed2].w[1]
1557               || (Ql.w[1] == __bid_reciprocals10_128[ed2].w[1]
1558                   && Ql.w[0] < __bid_reciprocals10_128[ed2].w[0]))) {
1559         CQ.w[0]--;
1560       }
1561     }
1562 #endif
1563
1564 #ifdef SET_STATUS_FLAGS
1565
1566   if (is_inexact (fpsc))
1567     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1568   else {
1569     status = INEXACT_EXCEPTION;
1570     // get remainder
1571     __shl_128_long (Qh1, Qh, (128 - amount));
1572
1573     switch (rmode) {
1574     case ROUNDING_TO_NEAREST:
1575     case ROUNDING_TIES_AWAY:
1576       // test whether fractional part is 0
1577       if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1578           && (Ql.w[1] < __bid_reciprocals10_128[ed2].w[1]
1579               || (Ql.w[1] == __bid_reciprocals10_128[ed2].w[1]
1580                   && Ql.w[0] < __bid_reciprocals10_128[ed2].w[0])))
1581         status = EXACT_STATUS;
1582       break;
1583     case ROUNDING_DOWN:
1584     case ROUNDING_TO_ZERO:
1585       if ((!Qh1.w[1]) && (!Qh1.w[0])
1586           && (Ql.w[1] < __bid_reciprocals10_128[ed2].w[1]
1587               || (Ql.w[1] == __bid_reciprocals10_128[ed2].w[1]
1588                   && Ql.w[0] < __bid_reciprocals10_128[ed2].w[0])))
1589         status = EXACT_STATUS;
1590       break;
1591     default:
1592       // round up
1593       __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1594                        __bid_reciprocals10_128[ed2].w[0]);
1595       __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1596                           __bid_reciprocals10_128[ed2].w[1], CY);
1597       __shr_128_long (Qh, Qh1, (128 - amount));
1598       Tmp.w[0] = 1;
1599       Tmp.w[1] = 0;
1600       __shl_128_long (Tmp1, Tmp, amount);
1601       Qh.w[0] += carry;
1602       if (Qh.w[0] < carry)
1603         Qh.w[1]++;
1604       if (__unsigned_compare_ge_128 (Qh, Tmp1))
1605         status = EXACT_STATUS;
1606     }
1607
1608     if (status != EXACT_STATUS)
1609       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1610   }
1611
1612 #endif
1613
1614   pres->w[1] = sgn | CQ.w[1];
1615   pres->w[0] = CQ.w[0];
1616
1617   return pres;
1618
1619 }
1620
1621
1622 //
1623 //   Macro for handling BID128 underflow
1624 //
1625 __BID_INLINE__ UINT128 *
1626 handle_UF_128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1627                unsigned *prounding_mode, unsigned *fpsc) {
1628   UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1;
1629   UINT64 carry, CY;
1630   int ed2, amount;
1631   unsigned rmode, status;
1632
1633   // UF occurs
1634   if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1635 #ifdef SET_STATUS_FLAGS
1636     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1637 #endif
1638     pres->w[1] = sgn;
1639     pres->w[0] = 0;
1640 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1641 #ifndef IEEE_ROUND_NEAREST
1642     if ((sgn && *prounding_mode == ROUNDING_DOWN)
1643         || (!sgn && *prounding_mode == ROUNDING_UP))
1644       pres->w[0] = 1ull;
1645 #endif
1646 #endif
1647     return pres;
1648   }
1649
1650   ed2 = 0 - expon;
1651   // add rounding constant to CQ
1652 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1653 #ifndef IEEE_ROUND_NEAREST
1654   rmode = *prounding_mode;
1655   if (sgn && (unsigned) (rmode - 1) < 2)
1656     rmode = 3 - rmode;
1657 #else
1658   rmode = 0;
1659 #endif
1660 #else
1661   rmode = 0;
1662 #endif
1663
1664   T128 = __bid_round_const_table_128[rmode][ed2];
1665   __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1666   CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1667
1668   TP128 = __bid_reciprocals10_128[ed2];
1669   __mul_128x128_full (Qh, Ql, CQ, TP128);
1670   amount = __bid_recip_scale[ed2];
1671
1672   if (amount >= 64) {
1673     CQ.w[0] = Qh.w[1] >> (amount - 64);
1674     CQ.w[1] = 0;
1675   } else {
1676     __shr_128 (CQ, Qh, amount);
1677   }
1678
1679   expon = 0;
1680
1681 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1682 #ifndef IEEE_ROUND_NEAREST
1683   if (!(*prounding_mode))
1684 #endif
1685     if (CQ.w[0] & 1) {
1686       // check whether fractional part of initial_P/10^ed1 is exactly .5
1687
1688       // get remainder
1689       __shl_128_long (Qh1, Qh, (128 - amount));
1690
1691       if (!Qh1.w[1] && !Qh1.w[0]
1692           && (Ql.w[1] < __bid_reciprocals10_128[ed2].w[1]
1693               || (Ql.w[1] == __bid_reciprocals10_128[ed2].w[1]
1694                   && Ql.w[0] < __bid_reciprocals10_128[ed2].w[0]))) {
1695         CQ.w[0]--;
1696       }
1697     }
1698 #endif
1699
1700 #ifdef SET_STATUS_FLAGS
1701
1702   if (is_inexact (fpsc))
1703     __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1704   else {
1705     status = INEXACT_EXCEPTION;
1706     // get remainder
1707     __shl_128_long (Qh1, Qh, (128 - amount));
1708
1709     switch (rmode) {
1710     case ROUNDING_TO_NEAREST:
1711     case ROUNDING_TIES_AWAY:
1712       // test whether fractional part is 0
1713       if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1714           && (Ql.w[1] < __bid_reciprocals10_128[ed2].w[1]
1715               || (Ql.w[1] == __bid_reciprocals10_128[ed2].w[1]
1716                   && Ql.w[0] < __bid_reciprocals10_128[ed2].w[0])))
1717         status = EXACT_STATUS;
1718       break;
1719     case ROUNDING_DOWN:
1720     case ROUNDING_TO_ZERO:
1721       if ((!Qh1.w[1]) && (!Qh1.w[0])
1722           && (Ql.w[1] < __bid_reciprocals10_128[ed2].w[1]
1723               || (Ql.w[1] == __bid_reciprocals10_128[ed2].w[1]
1724                   && Ql.w[0] < __bid_reciprocals10_128[ed2].w[0])))
1725         status = EXACT_STATUS;
1726       break;
1727     default:
1728       // round up
1729       __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1730                        __bid_reciprocals10_128[ed2].w[0]);
1731       __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1732                           __bid_reciprocals10_128[ed2].w[1], CY);
1733       __shr_128_long (Qh, Qh1, (128 - amount));
1734       Tmp.w[0] = 1;
1735       Tmp.w[1] = 0;
1736       __shl_128_long (Tmp1, Tmp, amount);
1737       Qh.w[0] += carry;
1738       if (Qh.w[0] < carry)
1739         Qh.w[1]++;
1740       if (__unsigned_compare_ge_128 (Qh, Tmp1))
1741         status = EXACT_STATUS;
1742     }
1743
1744     if (status != EXACT_STATUS)
1745       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1746   }
1747
1748 #endif
1749
1750   pres->w[1] = sgn | CQ.w[1];
1751   pres->w[0] = CQ.w[0];
1752
1753   return pres;
1754
1755 }
1756
1757
1758
1759 //
1760 //  BID128 unpack, input passed by value
1761 //
1762 __BID_INLINE__ UINT64
1763 unpack_BID128_value (UINT64 * psign_x, int *pexponent_x,
1764                UINT128 * pcoefficient_x, UINT128 x) {
1765   UINT128 coeff, T33, T34;
1766   UINT64 ex;
1767
1768   *psign_x = (x.w[1]) & 0x8000000000000000ull;
1769
1770   // special encodings
1771   if ((x.w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1772     if ((x.w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1773       // non-canonical input
1774       pcoefficient_x->w[0] = 0;
1775       pcoefficient_x->w[1] = 0;
1776       ex = (x.w[1]) >> 47;
1777       *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1778       return 0;
1779     }
1780     // 10^33
1781     T33 = __bid_power10_table_128[33];
1782     coeff.w[0] = x.w[0];
1783     coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
1784     pcoefficient_x->w[0] = x.w[0];
1785     pcoefficient_x->w[1] = x.w[1];
1786     if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1787       pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
1788     return 0; // NaN or Infinity
1789   }
1790
1791   coeff.w[0] = x.w[0];
1792   coeff.w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
1793
1794   // 10^34
1795   T34 = __bid_power10_table_128[34];
1796   // check for non-canonical values
1797   if (__unsigned_compare_ge_128 (coeff, T34))
1798     coeff.w[0] = coeff.w[1] = 0;
1799
1800   pcoefficient_x->w[0] = coeff.w[0];
1801   pcoefficient_x->w[1] = coeff.w[1];
1802
1803   ex = (x.w[1]) >> 49;
1804   *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1805
1806   return coeff.w[0] | coeff.w[1];
1807 }
1808
1809
1810 //
1811 //  BID128 unpack, input pased by reference
1812 //
1813 __BID_INLINE__ UINT64
1814 unpack_BID128 (UINT64 * psign_x, int *pexponent_x,
1815                UINT128 * pcoefficient_x, UINT128 * px) {
1816   UINT128 coeff, T33, T34;
1817   UINT64 ex;
1818
1819   *psign_x = (px->w[1]) & 0x8000000000000000ull;
1820
1821   // special encodings
1822   if ((px->w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1823     if ((px->w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1824       // non-canonical input
1825       pcoefficient_x->w[0] = 0;
1826       pcoefficient_x->w[1] = 0;
1827       ex = (px->w[1]) >> 47;
1828       *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1829       return 0;
1830     }
1831     // 10^33
1832     T33 = __bid_power10_table_128[33];
1833     coeff.w[0] = px->w[0];
1834     coeff.w[1] = (px->w[1]) & LARGE_COEFF_MASK128;
1835     pcoefficient_x->w[0] = px->w[0];
1836     pcoefficient_x->w[1] = px->w[1];
1837     if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1838       pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
1839     return 0; // NaN or Infinity
1840   }
1841
1842   coeff.w[0] = px->w[0];
1843   coeff.w[1] = (px->w[1]) & SMALL_COEFF_MASK128;
1844
1845   // 10^34
1846   T34 = __bid_power10_table_128[34];
1847   // check for non-canonical values
1848   if (__unsigned_compare_ge_128 (coeff, T34))
1849     coeff.w[0] = coeff.w[1] = 0;
1850
1851   pcoefficient_x->w[0] = coeff.w[0];
1852   pcoefficient_x->w[1] = coeff.w[1];
1853
1854   ex = (px->w[1]) >> 49;
1855   *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1856
1857   return coeff.w[0] | coeff.w[1];
1858 }
1859
1860 //
1861 //   Pack macro checks for overflow, but not underflow
1862 //
1863 __BID_INLINE__ UINT128 *
1864 get_BID128_very_fast_OF (UINT128 * pres, UINT64 sgn, int expon,
1865                          UINT128 coeff, unsigned *prounding_mode,
1866                          unsigned *fpsc) {
1867   UINT128 T;
1868   UINT64 tmp, tmp2;
1869
1870   if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1871
1872     if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
1873       T = __bid_power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
1874       while (__unsigned_compare_gt_128 (T, coeff)
1875              && expon > DECIMAL_MAX_EXPON_128) {
1876         coeff.w[1] =
1877           (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
1878           (coeff.w[0] >> 63);
1879         tmp2 = coeff.w[0] << 3;
1880         coeff.w[0] = (coeff.w[0] << 1) + tmp2;
1881         if (coeff.w[0] < tmp2)
1882           coeff.w[1]++;
1883
1884         expon--;
1885       }
1886     }
1887     if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1888       // OF
1889 #ifdef SET_STATUS_FLAGS
1890       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1891 #endif
1892 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1893 #ifndef IEEE_ROUND_NEAREST
1894       if (*prounding_mode == ROUNDING_TO_ZERO
1895           || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
1896                                                          &&
1897                                                          *prounding_mode
1898                                                          ==
1899                                                          ROUNDING_DOWN))
1900       {
1901         pres->w[1] = sgn | LARGEST_BID128_HIGH;
1902         pres->w[0] = LARGEST_BID128_LOW;
1903       } else
1904 #endif
1905 #endif
1906       {
1907         pres->w[1] = sgn | INFINITY_MASK64;
1908         pres->w[0] = 0;
1909       }
1910       return pres;
1911     }
1912   }
1913
1914   pres->w[0] = coeff.w[0];
1915   tmp = expon;
1916   tmp <<= 49;
1917   pres->w[1] = sgn | tmp | coeff.w[1];
1918
1919   return pres;
1920 }
1921
1922
1923 //
1924 //   No overflow/underflow checks
1925 //   No checking for coefficient == 10^34 (rounding artifact)
1926 //
1927 __BID_INLINE__ UINT128 *
1928 get_BID128_very_fast (UINT128 * pres, UINT64 sgn, int expon,
1929                       UINT128 coeff) {
1930   UINT64 tmp;
1931
1932   pres->w[0] = coeff.w[0];
1933   tmp = expon;
1934   tmp <<= 49;
1935   pres->w[1] = sgn | tmp | coeff.w[1];
1936
1937   return pres;
1938 }
1939
1940 //
1941 //   No overflow/underflow checks
1942 //
1943 __BID_INLINE__ UINT128 *
1944 get_BID128_fast (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
1945   UINT64 tmp;
1946
1947   // coeff==10^34?
1948   if (coeff.w[1] == 0x0001ed09bead87c0ull
1949       && coeff.w[0] == 0x378d8e6400000000ull) {
1950     expon++;
1951     // set coefficient to 10^33
1952     coeff.w[1] = 0x0000314dc6448d93ull;
1953     coeff.w[0] = 0x38c15b0a00000000ull;
1954   }
1955
1956   pres->w[0] = coeff.w[0];
1957   tmp = expon;
1958   tmp <<= 49;
1959   pres->w[1] = sgn | tmp | coeff.w[1];
1960
1961   return pres;
1962 }
1963
1964 //
1965 //   General BID128 pack macro
1966 //
1967 __BID_INLINE__ UINT128 *
1968 get_BID128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff,
1969             unsigned *prounding_mode, unsigned *fpsc) {
1970   UINT128 T;
1971   UINT64 tmp, tmp2;
1972
1973   // coeff==10^34?
1974   if (coeff.w[1] == 0x0001ed09bead87c0ull
1975       && coeff.w[0] == 0x378d8e6400000000ull) {
1976     expon++;
1977     // set coefficient to 10^33
1978     coeff.w[1] = 0x0000314dc6448d93ull;
1979     coeff.w[0] = 0x38c15b0a00000000ull;
1980   }
1981   // check OF, UF
1982   if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1983     // check UF
1984     if (expon < 0)
1985       return handle_UF_128 (pres, sgn, expon, coeff, prounding_mode,
1986                             fpsc);
1987
1988     if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
1989       T = __bid_power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
1990       while (__unsigned_compare_gt_128 (T, coeff)
1991              && expon > DECIMAL_MAX_EXPON_128) {
1992         coeff.w[1] =
1993           (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
1994           (coeff.w[0] >> 63);
1995         tmp2 = coeff.w[0] << 3;
1996         coeff.w[0] = (coeff.w[0] << 1) + tmp2;
1997         if (coeff.w[0] < tmp2)
1998           coeff.w[1]++;
1999
2000         expon--;
2001       }
2002     }
2003     if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2004       // OF
2005 #ifdef SET_STATUS_FLAGS
2006       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2007 #endif
2008 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2009 #ifndef IEEE_ROUND_NEAREST
2010       if (*prounding_mode == ROUNDING_TO_ZERO
2011           || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
2012                                                          &&
2013                                                          *prounding_mode
2014                                                          ==
2015                                                          ROUNDING_DOWN))
2016       {
2017         pres->w[1] = sgn | LARGEST_BID128_HIGH;
2018         pres->w[0] = LARGEST_BID128_LOW;
2019       } else
2020 #endif
2021 #endif
2022       {
2023         pres->w[1] = sgn | INFINITY_MASK64;
2024         pres->w[0] = 0;
2025       }
2026       return pres;
2027     }
2028   }
2029
2030   pres->w[0] = coeff.w[0];
2031   tmp = expon;
2032   tmp <<= 49;
2033   pres->w[1] = sgn | tmp | coeff.w[1];
2034
2035   return pres;
2036 }
2037
2038
2039 //
2040 //  Macro used for conversions from string
2041 //        (no additional arguments given for rounding mode, status flags)
2042 //
2043 __BID_INLINE__ UINT128 *
2044 get_BID128_string (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
2045   UINT128 D2, D8;
2046   UINT64 tmp;
2047   unsigned rmode = 0, status;
2048
2049   // coeff==10^34?
2050   if (coeff.w[1] == 0x0001ed09bead87c0ull
2051       && coeff.w[0] == 0x378d8e6400000000ull) {
2052     expon++;
2053     // set coefficient to 10^33
2054     coeff.w[1] = 0x0000314dc6448d93ull;
2055     coeff.w[0] = 0x38c15b0a00000000ull;
2056   }
2057   // check OF, UF
2058   if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2059     // check UF
2060     if (expon < 0)
2061       return handle_UF_128 (pres, sgn, expon, coeff, &rmode, &status);
2062
2063     // OF
2064
2065     if (expon < DECIMAL_MAX_EXPON_128 + 34) {
2066       while (expon > DECIMAL_MAX_EXPON_128 &&
2067              (coeff.w[1] < __bid_power10_table_128[33].w[1] ||
2068               (coeff.w[1] == __bid_power10_table_128[33].w[1]
2069               && coeff.w[0] < __bid_power10_table_128[33].w[0]))) {
2070         D2.w[1] = (coeff.w[1] << 1) | (coeff.w[0] >> 63);
2071         D2.w[0] = coeff.w[0] << 1;
2072         D8.w[1] = (coeff.w[1] << 3) | (coeff.w[0] >> 61);
2073         D8.w[0] = coeff.w[0] << 3;
2074
2075         __add_128_128 (coeff, D2, D8);
2076         expon--;
2077       }
2078     } else if (!(coeff.w[0] | coeff.w[1]))
2079       expon = DECIMAL_MAX_EXPON_128;
2080
2081     if (expon > DECIMAL_MAX_EXPON_128) {
2082       pres->w[1] = sgn | INFINITY_MASK64;
2083       pres->w[0] = 0;
2084       switch (rmode) {
2085       case ROUNDING_DOWN:
2086         if (!sgn) {
2087           pres->w[1] = LARGEST_BID128_HIGH;
2088           pres->w[0] = LARGEST_BID128_LOW;
2089         }
2090         break;
2091       case ROUNDING_TO_ZERO:
2092         pres->w[1] = sgn | LARGEST_BID128_HIGH;
2093         pres->w[0] = LARGEST_BID128_LOW;
2094         break;
2095       case ROUNDING_UP:
2096         // round up
2097         if (sgn) {
2098           pres->w[1] = sgn | LARGEST_BID128_HIGH;
2099           pres->w[0] = LARGEST_BID128_LOW;
2100         }
2101         break;
2102       }
2103
2104       return pres;
2105     }
2106   }
2107
2108   pres->w[0] = coeff.w[0];
2109   tmp = expon;
2110   tmp <<= 49;
2111   pres->w[1] = sgn | tmp | coeff.w[1];
2112
2113   return pres;
2114 }
2115
2116
2117
2118 /*****************************************************************************
2119 *
2120 *    BID32 pack/unpack macros
2121 *
2122 *****************************************************************************/
2123
2124
2125 __BID_INLINE__ UINT32
2126 unpack_BID32 (UINT32 * psign_x, int *pexponent_x,
2127               UINT32 * pcoefficient_x, UINT32 x) {
2128   UINT32 tmp;
2129
2130   *psign_x = x & 0x80000000;
2131
2132   if ((x & SPECIAL_ENCODING_MASK32) == SPECIAL_ENCODING_MASK32) {
2133     // special encodings
2134     if ((x & INFINITY_MASK32) == INFINITY_MASK32)
2135       return 0; // NaN or Infinity
2136
2137     // coefficient
2138     *pcoefficient_x = (x & SMALL_COEFF_MASK32) | LARGE_COEFF_HIGH_BIT32;
2139     // check for non-canonical value
2140     if (*pcoefficient_x >= 10000000)
2141       *pcoefficient_x = 0;
2142     // get exponent
2143     tmp = x >> 21;
2144     *pexponent_x = tmp & EXPONENT_MASK32;
2145     return 1;
2146   }
2147   // exponent
2148   tmp = x >> 23;
2149   *pexponent_x = tmp & EXPONENT_MASK32;
2150   // coefficient
2151   *pcoefficient_x = (x & LARGE_COEFF_MASK32);
2152
2153   return *pcoefficient_x;
2154 }
2155
2156 //
2157 //   General pack macro for BID32
2158 //
2159 __BID_INLINE__ UINT32
2160 get_BID32 (UINT32 sgn, int expon, UINT64 coeff, int rmode,
2161            unsigned *fpsc) {
2162   UINT128 Q;
2163   UINT64 C64, remainder_h, carry, Stemp;
2164   UINT32 r, mask;
2165   int extra_digits, amount, amount2;
2166   unsigned status;
2167
2168   if (coeff > 9999999ull) {
2169     expon++;
2170     coeff = 1000000ull;
2171   }
2172   // check for possible underflow/overflow
2173   if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2174     if (expon < 0) {
2175       // underflow
2176       if (expon + MAX_FORMAT_DIGITS_32 < 0) {
2177 #ifdef SET_STATUS_FLAGS
2178         __set_status_flags (fpsc,
2179                             UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2180 #endif
2181 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2182 #ifndef IEEE_ROUND_NEAREST
2183         if (rmode == ROUNDING_DOWN && sgn)
2184           return 0x80000001;
2185         if (rmode == ROUNDING_UP && !sgn)
2186           return 1;
2187 #endif
2188 #endif
2189         // result is 0
2190         return sgn;
2191       }
2192       // get digits to be shifted out
2193 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2194       rmode = 0;
2195 #endif
2196 #ifdef IEEE_ROUND_NEAREST
2197       rmode = 0;
2198 #endif
2199
2200       extra_digits = -expon;
2201       coeff += __bid_round_const_table[rmode][extra_digits];
2202
2203       // get coeff*(2^M[extra_digits])/10^extra_digits
2204       __mul_64x64_to_128 (Q, coeff, __bid_reciprocals10_64[extra_digits]);
2205
2206       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
2207       amount = __bid_short_recip_scale[extra_digits];
2208
2209       C64 = Q.w[1] >> amount;
2210
2211 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2212 #ifndef IEEE_ROUND_NEAREST
2213       if (rmode == 0) //ROUNDING_TO_NEAREST
2214 #endif
2215         if (C64 & 1) {
2216           // check whether fractional part of initial_P/10^extra_digits is exactly .5
2217
2218           // get remainder
2219           amount2 = 64 - amount;
2220           remainder_h = 0;
2221           remainder_h--;
2222           remainder_h >>= amount2;
2223           remainder_h = remainder_h & Q.w[1];
2224
2225           if (!remainder_h && (Q.w[0] < __bid_reciprocals10_64[extra_digits])) {
2226             C64--;
2227           }
2228         }
2229 #endif
2230
2231 #ifdef SET_STATUS_FLAGS
2232
2233       if (is_inexact (fpsc))
2234         __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
2235       else {
2236         status = INEXACT_EXCEPTION;
2237         // get remainder
2238         remainder_h = Q.w[1] << (64 - amount);
2239
2240         switch (rmode) {
2241         case ROUNDING_TO_NEAREST:
2242         case ROUNDING_TIES_AWAY:
2243           // test whether fractional part is 0
2244           if (remainder_h == 0x8000000000000000ull
2245               && (Q.w[0] < __bid_reciprocals10_64[extra_digits]))
2246             status = EXACT_STATUS;
2247           break;
2248         case ROUNDING_DOWN:
2249         case ROUNDING_TO_ZERO:
2250           if (!remainder_h && (Q.w[0] < __bid_reciprocals10_64[extra_digits]))
2251             status = EXACT_STATUS;
2252           break;
2253         default:
2254           // round up
2255           __add_carry_out (Stemp, carry, Q.w[0],
2256                            __bid_reciprocals10_64[extra_digits]);
2257           if ((remainder_h >> (64 - amount)) + carry >=
2258               (((UINT64) 1) << amount))
2259             status = EXACT_STATUS;
2260         }
2261
2262         if (status != EXACT_STATUS)
2263           __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
2264       }
2265
2266 #endif
2267
2268       return sgn | (UINT32) C64;
2269     }
2270
2271     while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
2272       coeff = (coeff << 3) + (coeff << 1);
2273       expon--;
2274     }
2275     if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2276       __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2277       // overflow
2278       r = sgn | INFINITY_MASK32;
2279       switch (rmode) {
2280       case ROUNDING_DOWN:
2281         if (!sgn)
2282           r = LARGEST_BID32;
2283         break;
2284       case ROUNDING_TO_ZERO:
2285         r = sgn | LARGEST_BID32;
2286         break;
2287       case ROUNDING_UP:
2288         // round up
2289         if (sgn)
2290           r = sgn | LARGEST_BID32;
2291       }
2292       return r;
2293     }
2294   }
2295
2296   mask = 1 << 23;
2297
2298   // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2299   if (coeff < mask) {
2300     r = expon;
2301     r <<= 23;
2302     r |= ((UINT32) coeff | sgn);
2303     return r;
2304   }
2305   // special format
2306
2307   r = expon;
2308   r <<= 21;
2309   r |= (sgn | SPECIAL_ENCODING_MASK32);
2310   // add coeff, without leading bits
2311   mask = (1 << 21) - 1;
2312   r |= (((UINT32) coeff) & mask);
2313
2314   return r;
2315 }
2316
2317
2318
2319 //
2320 //   no overflow/underflow checks
2321 //
2322 __BID_INLINE__ UINT32
2323 very_fast_get_BID32 (UINT32 sgn, int expon, UINT32 coeff) {
2324   UINT32 r, mask;
2325
2326   mask = 1 << 23;
2327
2328   // check whether coefficient fits in 10*2+3 bits
2329   if (coeff < mask) {
2330     r = expon;
2331     r <<= 23;
2332     r |= (coeff | sgn);
2333     return r;
2334   }
2335   // special format
2336   r = expon;
2337   r <<= 21;
2338   r |= (sgn | SPECIAL_ENCODING_MASK32);
2339   // add coeff, without leading bits
2340   mask = (1 << 21) - 1;
2341   coeff &= mask;
2342   r |= coeff;
2343
2344   return r;
2345 }
2346
2347
2348
2349 /*************************************************************
2350  *
2351  *************************************************************/
2352 typedef
2353 ALIGN (16)
2354      struct {
2355        UINT64 w[6];
2356      } UINT384;
2357      typedef ALIGN (16)
2358      struct {
2359        UINT64 w[8];
2360      } UINT512;
2361
2362 // #define P                               34
2363 #define MASK_STEERING_BITS              0x6000000000000000ull
2364 #define MASK_BINARY_EXPONENT1           0x7fe0000000000000ull
2365 #define MASK_BINARY_SIG1                0x001fffffffffffffull
2366 #define MASK_BINARY_EXPONENT2           0x1ff8000000000000ull
2367     //used to take G[2:w+3] (sec 3.3)
2368 #define MASK_BINARY_SIG2                0x0007ffffffffffffull
2369     //used to mask out G4:T0 (sec 3.3)
2370 #define MASK_BINARY_OR2                 0x0020000000000000ull
2371     //used to prefix 8+G4 to T (sec 3.3)
2372 #define UPPER_EXPON_LIMIT               51
2373 #define MASK_EXP                        0x7ffe000000000000ull
2374 #define MASK_SPECIAL                    0x7800000000000000ull
2375 #define MASK_NAN                        0x7c00000000000000ull
2376 #define MASK_SNAN                       0x7e00000000000000ull
2377 #define MASK_ANY_INF                    0x7c00000000000000ull
2378 #define MASK_INF                        0x7800000000000000ull
2379 #define MASK_SIGN                       0x8000000000000000ull
2380 #define MASK_COEFF                      0x0001ffffffffffffull
2381 #define BIN_EXP_BIAS                    (0x1820ull << 49)
2382
2383 #define EXP_MIN                         0x0000000000000000ull
2384    // EXP_MIN = (-6176 + 6176) << 49
2385 #define EXP_MAX                         0x5ffe000000000000ull
2386   // EXP_MAX = (6111 + 6176) << 49
2387 #define EXP_MAX_P1                      0x6000000000000000ull
2388   // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
2389 #define EXP_P1                          0x0002000000000000ull
2390   // EXP_ P1= 1 << 49
2391 #define expmin                            -6176
2392   // min unbiased exponent
2393 #define expmax                            6111
2394   // max unbiased exponent
2395 #define expmin16                          -398
2396   // min unbiased exponent
2397 #define expmax16                          369
2398   // max unbiased exponent
2399
2400 #define SIGNMASK32 0x80000000
2401 #define BID64_SIG_MAX 0x002386F26FC0ffffull
2402 #define SIGNMASK64    0x8000000000000000ull
2403
2404 // typedef unsigned int FPSC; // floating-point status and control
2405         // bit31:
2406         // bit30:
2407         // bit29:
2408         // bit28:
2409         // bit27:
2410         // bit26:
2411         // bit25:
2412         // bit24:
2413         // bit23:
2414         // bit22:
2415         // bit21:
2416         // bit20:
2417         // bit19:
2418         // bit18:
2419         // bit17:
2420         // bit16:
2421         // bit15:
2422         // bit14: RC:2
2423         // bit13: RC:1
2424         // bit12: RC:0
2425         // bit11: PM
2426         // bit10: UM
2427         // bit9:  OM
2428         // bit8:  ZM
2429         // bit7:  DM
2430         // bit6:  IM
2431         // bit5:  PE
2432         // bit4:  UE
2433         // bit3:  OE
2434         // bit2:  ZE
2435         // bit1:  DE
2436         // bit0:  IE
2437
2438 #define ROUNDING_MODE_MASK      0x00007000
2439
2440      typedef struct _DEC_DIGITS {
2441        unsigned int digits;
2442        UINT64 threshold_hi;
2443        UINT64 threshold_lo;
2444        unsigned int digits1;
2445      } DEC_DIGITS;
2446
2447      extern DEC_DIGITS __bid_nr_digits[];
2448      extern UINT64 __bid_midpoint64[];
2449      extern UINT128 __bid_midpoint128[];
2450      extern UINT192 __bid_midpoint192[];
2451      extern UINT256 __bid_midpoint256[];
2452      extern UINT64 __bid_ten2k64[];
2453      extern UINT128 __bid_ten2k128[];
2454      extern UINT256 __bid_ten2k256[];
2455      extern UINT128 __bid_ten2mk128[];
2456      extern UINT64 __bid_ten2mk64[];
2457      extern UINT128 __bid_ten2mk128trunc[];
2458      extern int __bid_shiftright128[];
2459      extern UINT64 __bid_maskhigh128[];
2460      extern UINT64 __bid_maskhigh128M[];
2461      extern UINT64 __bid_maskhigh192M[];
2462      extern UINT64 __bid_maskhigh256M[];
2463      extern UINT64 __bid_one_half128[];
2464      extern UINT64 __bid_one_half128M[];
2465      extern UINT64 __bid_one_half192M[];
2466      extern UINT64 __bid_one_half256M[];
2467      extern UINT128 __bid_ten2mk128M[];
2468      extern UINT128 __bid_ten2mk128truncM[];
2469      extern UINT192 __bid_ten2mk192truncM[];
2470      extern UINT256 __bid_ten2mk256truncM[];
2471      extern int __bid_shiftright128M[];
2472      extern int __bid_shiftright192M[];
2473      extern int __bid_shiftright256M[];
2474      extern UINT192 __bid_ten2mk192M[];
2475      extern UINT256 __bid_ten2mk256M[];
2476      extern unsigned char __bid_char_table2[];
2477      extern unsigned char __bid_char_table3[];
2478
2479      extern UINT64 __bid_ten2m3k64[];
2480      extern unsigned int __bid_shift_ten2m3k64[];
2481      extern UINT128 __bid_ten2m3k128[];
2482      extern unsigned int __bid_shift_ten2m3k128[];
2483
2484
2485
2486 /***************************************************************************
2487  *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
2488  ***************************************************************************/
2489
2490      extern UINT64 __bid_Kx64[];
2491      extern unsigned int __bid_Ex64m64[];
2492      extern UINT64 __bid_half64[];
2493      extern UINT64 __bid_mask64[];
2494      extern UINT64 __bid_ten2mxtrunc64[];
2495
2496      extern UINT128 __bid_Kx128[];
2497      extern unsigned int __bid_Ex128m128[];
2498      extern UINT64 __bid_half128[];
2499      extern UINT64 __bid_mask128[];
2500      extern UINT128 __bid_ten2mxtrunc128[];
2501
2502      extern UINT192 __bid_Kx192[];
2503      extern unsigned int __bid_Ex192m192[];
2504      extern UINT64 __bid_half192[];
2505      extern UINT64 __bid_mask192[];
2506      extern UINT192 __bid_ten2mxtrunc192[];
2507
2508      extern UINT256 __bid_Kx256[];
2509      extern unsigned int __bid_Ex256m256[];
2510      extern UINT64 __bid_half256[];
2511      extern UINT64 __bid_mask256[];
2512      extern UINT256 __bid_ten2mxtrunc256[];
2513
2514      typedef union __bid64_128 {
2515        UINT64 b64;
2516        UINT128 b128;
2517      } BID64_128;
2518
2519      BID64_128 bid_fma (unsigned int P0,
2520                         BID64_128 x1, unsigned int P1,
2521                         BID64_128 y1, unsigned int P2,
2522                         BID64_128 z1, unsigned int P3,
2523                         unsigned int rnd_mode, FPSC * fpsc);
2524
2525 #define         P16     16
2526 #define         P34     34
2527
2528      union __int_double {
2529        UINT64 i;
2530        double d;
2531      };
2532      typedef union __int_double int_double;
2533
2534
2535      union __int_float {
2536        UINT32 i;
2537        float d;
2538      };
2539      typedef union __int_float int_float;
2540
2541 #define SWAP(A,B,T) {\
2542         T = A; \
2543         A = B; \
2544         B = T; \
2545 }
2546
2547 // this macro will find coefficient_x to be in [2^A, 2^(A+1) )
2548 // ie it knows that it is A bits long
2549 #define NUMBITS(A, coefficient_x, tempx){\
2550       temp_x.d=(float)coefficient_x;\
2551       A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
2552 }
2553
2554      enum class_types {
2555        signalingNaN,
2556        quietNaN,
2557        negativeInfinity,
2558        negativeNormal,
2559        negativeSubnormal,
2560        negativeZero,
2561        positiveZero,
2562        positiveSubnormal,
2563        positiveNormal,
2564        positiveInfinity
2565      };
2566
2567 #endif
2568
2569 typedef union { UINT32 ui32; float f; } BID_UI32FLOAT;
2570 typedef union { UINT64 ui64; double d; } BID_UI64DOUBLE;
2571 typedef union { UINT128 ui128; long double ld; } BID_UI128LONGDOUBLE;
2572