OSDN Git Service

config/
[pf3gnuchains/gcc-fork.git] / libgcc / config / libbid / inline_bid_add.h
1 /* Copyright (C) 2007  Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
8 version.
9
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file.  (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
17 executable.)
18
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
22 for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING.  If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.  */
28
29 /*****************************************************************************
30  *
31  *    Helper add functions (for fma)
32  *
33  *    __BID_INLINE__ UINT64 get_add64(
34  *        UINT64 sign_x, int exponent_x, UINT64 coefficient_x, 
35  *        UINT64 sign_y, int exponent_y, UINT64 coefficient_y, 
36  *                                       int rounding_mode)
37  *
38  *   __BID_INLINE__ UINT64 get_add128(
39  *                       UINT64 sign_x, int exponent_x, UINT64 coefficient_x, 
40  *                       UINT64 sign_y, int final_exponent_y, UINT128 CY, 
41  *                       int extra_digits, int rounding_mode)
42  *
43  *****************************************************************************
44  *
45  *  Algorithm description:
46  *
47  *  get_add64:  same as BID64 add, but arguments are unpacked and there 
48  *                                 are no special case checks
49  *
50  *  get_add128: add 64-bit coefficient to 128-bit product (which contains 
51  *                                        16+extra_digits decimal digits), 
52  *                         return BID64 result
53  *              - the exponents are compared and the two coefficients are 
54  *                properly aligned for addition/subtraction
55  *              - multiple paths are needed
56  *              - final result exponent is calculated and the lower term is
57  *                      rounded first if necessary, to avoid manipulating 
58  *                      coefficients longer than 128 bits 
59  *
60  ****************************************************************************/
61
62 #ifndef _INLINE_BID_ADD_H_
63 #define _INLINE_BID_ADD_H_
64
65 #include "bid_internal.h"
66
67 #define MAX_FORMAT_DIGITS     16
68 #define DECIMAL_EXPONENT_BIAS 398
69 #define MASK_BINARY_EXPONENT  0x7ff0000000000000ull
70 #define BINARY_EXPONENT_BIAS  0x3ff
71 #define UPPER_EXPON_LIMIT     51
72
73 ///////////////////////////////////////////////////////////////////////
74 //
75 // get_add64() is essentially the same as bid_add(), except that 
76 //             the arguments are unpacked
77 //
78 //////////////////////////////////////////////////////////////////////
79 __BID_INLINE__ UINT64
80 get_add64 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
81            UINT64 sign_y, int exponent_y, UINT64 coefficient_y,
82            int rounding_mode, unsigned *fpsc) {
83   UINT128 CA, CT, CT_new;
84   UINT64 sign_a, sign_b, coefficient_a, coefficient_b, sign_s, sign_ab,
85     rem_a;
86   UINT64 saved_ca, saved_cb, C0_64, C64, remainder_h, T1, carry, tmp,
87     C64_new;
88   int_double tempx;
89   int exponent_a, exponent_b, diff_dec_expon;
90   int bin_expon_ca, extra_digits, amount, scale_k, scale_ca;
91   unsigned rmode, status;
92
93   // sort arguments by exponent
94   if (exponent_x <= exponent_y) {
95     sign_a = sign_y;
96     exponent_a = exponent_y;
97     coefficient_a = coefficient_y;
98     sign_b = sign_x;
99     exponent_b = exponent_x;
100     coefficient_b = coefficient_x;
101   } else {
102     sign_a = sign_x;
103     exponent_a = exponent_x;
104     coefficient_a = coefficient_x;
105     sign_b = sign_y;
106     exponent_b = exponent_y;
107     coefficient_b = coefficient_y;
108   }
109
110   // exponent difference
111   diff_dec_expon = exponent_a - exponent_b;
112
113   /* get binary coefficients of x and y */
114
115   //--- get number of bits in the coefficients of x and y ---
116
117   tempx.d = (double) coefficient_a;
118   bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
119
120   if (!coefficient_a) {
121     return get_BID64 (sign_b, exponent_b, coefficient_b, rounding_mode,
122                       fpsc);
123   }
124   if (diff_dec_expon > MAX_FORMAT_DIGITS) {
125     // normalize a to a 16-digit coefficient
126
127     scale_ca = __bid_estimate_decimal_digits[bin_expon_ca];
128     if (coefficient_a >= __bid_power10_table_128[scale_ca].w[0])
129       scale_ca++;
130
131     scale_k = 16 - scale_ca;
132
133     coefficient_a *= __bid_power10_table_128[scale_k].w[0];
134
135     diff_dec_expon -= scale_k;
136     exponent_a -= scale_k;
137
138     /* get binary coefficients of x and y */
139
140     //--- get number of bits in the coefficients of x and y ---
141     tempx.d = (double) coefficient_a;
142     bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
143
144     if (diff_dec_expon > MAX_FORMAT_DIGITS) {
145 #ifdef SET_STATUS_FLAGS
146       if (coefficient_b) {
147         __set_status_flags (fpsc, INEXACT_EXCEPTION);
148       }
149 #endif
150
151 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
152 #ifndef IEEE_ROUND_NEAREST
153       if (((rounding_mode) & 3) && coefficient_b) // not ROUNDING_TO_NEAREST
154       {
155         switch (rounding_mode) {
156         case ROUNDING_DOWN:
157           if (sign_b) {
158             coefficient_a -= ((((SINT64) sign_a) >> 63) | 1);
159             if (coefficient_a < 1000000000000000ull) {
160               exponent_a--;
161               coefficient_a = 9999999999999999ull;
162             } else if (coefficient_a >= 10000000000000000ull) {
163               exponent_a++;
164               coefficient_a = 1000000000000000ull;
165             }
166           }
167           break;
168         case ROUNDING_UP:
169           if (!sign_b) {
170             coefficient_a += ((((SINT64) sign_a) >> 63) | 1);
171             if (coefficient_a < 1000000000000000ull) {
172               exponent_a--;
173               coefficient_a = 9999999999999999ull;
174             } else if (coefficient_a >= 10000000000000000ull) {
175               exponent_a++;
176               coefficient_a = 1000000000000000ull;
177             }
178           }
179           break;
180         default:        // RZ
181           if (sign_a != sign_b) {
182             coefficient_a--;
183             if (coefficient_a < 1000000000000000ull) {
184               exponent_a--;
185               coefficient_a = 9999999999999999ull;
186             }
187           }
188           break;
189         }
190       } else
191 #endif
192 #endif
193         // check special case here
194         if ((coefficient_a == 1000000000000000ull)
195             && (diff_dec_expon == MAX_FORMAT_DIGITS + 1)
196             && (sign_a ^ sign_b) && (coefficient_b > 5000000000000000ull)) {
197         coefficient_a = 9999999999999999ull;
198         exponent_a--;
199       }
200
201       return get_BID64 (sign_a, exponent_a, coefficient_a,
202                         rounding_mode, fpsc);
203     }
204   }
205   // test whether coefficient_a*10^(exponent_a-exponent_b)  may exceed 2^62
206   if (bin_expon_ca + __bid_estimate_bin_expon[diff_dec_expon] < 60) {
207     // coefficient_a*10^(exponent_a-exponent_b)<2^63
208
209     // multiply by 10^(exponent_a-exponent_b)
210     coefficient_a *= __bid_power10_table_128[diff_dec_expon].w[0];
211
212     // sign mask
213     sign_b = ((SINT64) sign_b) >> 63;
214     // apply sign to coeff. of b
215     coefficient_b = (coefficient_b + sign_b) ^ sign_b;
216
217     // apply sign to coefficient a
218     sign_a = ((SINT64) sign_a) >> 63;
219     coefficient_a = (coefficient_a + sign_a) ^ sign_a;
220
221     coefficient_a += coefficient_b;
222     // get sign
223     sign_s = ((SINT64) coefficient_a) >> 63;
224     coefficient_a = (coefficient_a + sign_s) ^ sign_s;
225     sign_s &= 0x8000000000000000ull;
226
227     // coefficient_a < 10^16 ?
228     if (coefficient_a < __bid_power10_table_128[MAX_FORMAT_DIGITS].w[0]) {
229 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
230 #ifndef IEEE_ROUND_NEAREST
231       if (rounding_mode == ROUNDING_DOWN && (!coefficient_a)
232           && sign_a != sign_b)
233         sign_s = 0x8000000000000000ull;
234 #endif
235 #endif
236       return get_BID64 (sign_s, exponent_b, coefficient_a,
237                         rounding_mode, fpsc);
238     }
239     // otherwise rounding is necessary
240
241     // already know coefficient_a<10^19
242     // coefficient_a < 10^17 ?
243     if (coefficient_a < __bid_power10_table_128[17].w[0])
244       extra_digits = 1;
245     else if (coefficient_a < __bid_power10_table_128[18].w[0])
246       extra_digits = 2;
247     else
248       extra_digits = 3;
249
250 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
251 #ifndef IEEE_ROUND_NEAREST
252     rmode = rounding_mode;
253     if (sign_s && (unsigned) (rmode - 1) < 2)
254       rmode = 3 - rmode;
255 #else
256     rmode = 0;
257 #endif
258 #else
259     rmode = 0;
260 #endif
261     coefficient_a += __bid_round_const_table[rmode][extra_digits];
262
263     // get P*(2^M[extra_digits])/10^extra_digits
264     __mul_64x64_to_128 (CT, coefficient_a,
265                         __bid_reciprocals10_64[extra_digits]);
266
267     // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
268     amount = __bid_short_recip_scale[extra_digits];
269     C64 = CT.w[1] >> amount;
270
271   } else {
272     // coefficient_a*10^(exponent_a-exponent_b) is large
273     sign_s = sign_a;
274
275 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
276 #ifndef IEEE_ROUND_NEAREST
277     rmode = rounding_mode;
278     if (sign_s && (unsigned) (rmode - 1) < 2)
279       rmode = 3 - rmode;
280 #else
281     rmode = 0;
282 #endif
283 #else
284     rmode = 0;
285 #endif
286
287     // check whether we can take faster path
288     scale_ca = __bid_estimate_decimal_digits[bin_expon_ca];
289
290     sign_ab = sign_a ^ sign_b;
291     sign_ab = ((SINT64) sign_ab) >> 63;
292
293     // T1 = 10^(16-diff_dec_expon)
294     T1 = __bid_power10_table_128[16 - diff_dec_expon].w[0];
295
296     // get number of digits in coefficient_a
297     //P_ca = __bid_power10_table_128[scale_ca].w[0];
298     //P_ca_m1 = __bid_power10_table_128[scale_ca-1].w[0];
299     if (coefficient_a >= __bid_power10_table_128[scale_ca].w[0]) {
300       scale_ca++;
301       //P_ca_m1 = P_ca;
302       //P_ca = __bid_power10_table_128[scale_ca].w[0];
303     }
304
305     scale_k = 16 - scale_ca;
306
307     // apply sign
308     //Ts = (T1 + sign_ab) ^ sign_ab;
309
310     // test range of ca
311     //X = coefficient_a + Ts - P_ca_m1;
312
313     // addition
314     saved_ca = coefficient_a - T1;
315     coefficient_a =
316       (SINT64) saved_ca *(SINT64) __bid_power10_table_128[scale_k].w[0];
317     extra_digits = diff_dec_expon - scale_k;
318
319     // apply sign
320     saved_cb = (coefficient_b + sign_ab) ^ sign_ab;
321     // add 10^16 and rounding constant
322     coefficient_b =
323       saved_cb + 10000000000000000ull +
324       __bid_round_const_table[rmode][extra_digits];
325
326     // get P*(2^M[extra_digits])/10^extra_digits
327     __mul_64x64_to_128 (CT, coefficient_b,
328                         __bid_reciprocals10_64[extra_digits]);
329
330     // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
331     amount = __bid_short_recip_scale[extra_digits];
332     C0_64 = CT.w[1] >> amount;
333
334     // result coefficient 
335     C64 = C0_64 + coefficient_a;
336     // filter out difficult (corner) cases
337     // the following test is equivalent to 
338     // ( (initial_coefficient_a + Ts) < P_ca && 
339     //     (initial_coefficient_a + Ts) > P_ca_m1 ), 
340     // which ensures the number of digits in coefficient_a does not change 
341     // after adding (the appropriately scaled and rounded) coefficient_b
342     if ((UINT64) (C64 - 1000000000000000ull - 1) > 9000000000000000ull - 2) {
343       if (C64 >= 10000000000000000ull) {
344         // result has more than 16 digits
345         if (!scale_k) {
346           // must divide coeff_a by 10
347           saved_ca = saved_ca + T1;
348           __mul_64x64_to_128 (CA, saved_ca, 0x3333333333333334ull); 
349               //__bid_reciprocals10_64[1]);
350           coefficient_a = CA.w[1] >> 1;
351           rem_a =
352             saved_ca - (coefficient_a << 3) - (coefficient_a << 1);
353           coefficient_a = coefficient_a - T1;
354
355           saved_cb +=
356             /*90000000000000000 */ +rem_a *
357             __bid_power10_table_128[diff_dec_expon].w[0];
358         } else
359           coefficient_a =
360             (SINT64) (saved_ca - T1 -
361                       (T1 << 3)) * (SINT64) __bid_power10_table_128[scale_k -
362                                                               1].w[0];
363
364         extra_digits++;
365         coefficient_b =
366           saved_cb + 100000000000000000ull +
367           __bid_round_const_table[rmode][extra_digits];
368
369         // get P*(2^M[extra_digits])/10^extra_digits
370         __mul_64x64_to_128 (CT, coefficient_b,
371                             __bid_reciprocals10_64[extra_digits]);
372
373         // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
374         amount = __bid_short_recip_scale[extra_digits];
375         C0_64 = CT.w[1] >> amount;
376
377         // result coefficient 
378         C64 = C0_64 + coefficient_a;
379       } else if (C64 <= 1000000000000000ull) {
380         // less than 16 digits in result
381         coefficient_a =
382           (SINT64) saved_ca *(SINT64) __bid_power10_table_128[scale_k +
383                                                         1].w[0];
384         //extra_digits --;
385         exponent_b--;
386         coefficient_b =
387           (saved_cb << 3) + (saved_cb << 1) + 100000000000000000ull +
388           __bid_round_const_table[rmode][extra_digits];
389
390         // get P*(2^M[extra_digits])/10^extra_digits
391         __mul_64x64_to_128 (CT_new, coefficient_b,
392                             __bid_reciprocals10_64[extra_digits]);
393
394         // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
395         amount = __bid_short_recip_scale[extra_digits];
396         C0_64 = CT_new.w[1] >> amount;
397
398         // result coefficient 
399         C64_new = C0_64 + coefficient_a;
400         if (C64_new < 10000000000000000ull) {
401           C64 = C64_new;
402 #ifdef SET_STATUS_FLAGS
403           CT = CT_new;
404 #endif
405         } else
406           exponent_b++;
407       }
408
409     }
410
411   }
412
413 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
414 #ifndef IEEE_ROUND_NEAREST
415   if (rmode == 0) //ROUNDING_TO_NEAREST
416 #endif
417     if (C64 & 1) {
418       // check whether fractional part of initial_P/10^extra_digits 
419       // is exactly .5
420       // this is the same as fractional part of 
421       //      (initial_P + 0.5*10^extra_digits)/10^extra_digits is exactly zero
422
423       // get remainder
424       remainder_h = CT.w[1] << (64 - amount);
425
426       // test whether fractional part is 0
427       if (!remainder_h && (CT.w[0] < __bid_reciprocals10_64[extra_digits])) {
428         C64--;
429       }
430     }
431 #endif
432
433 #ifdef SET_STATUS_FLAGS
434   status = INEXACT_EXCEPTION;
435
436   // get remainder
437   remainder_h = CT.w[1] << (64 - amount);
438
439   switch (rmode) {
440   case ROUNDING_TO_NEAREST:
441   case ROUNDING_TIES_AWAY:
442     // test whether fractional part is 0
443     if ((remainder_h == 0x8000000000000000ull)
444         && (CT.w[0] < __bid_reciprocals10_64[extra_digits]))
445       status = EXACT_STATUS;
446     break;
447   case ROUNDING_DOWN:
448   case ROUNDING_TO_ZERO:
449     if (!remainder_h && (CT.w[0] < __bid_reciprocals10_64[extra_digits]))
450       status = EXACT_STATUS;
451     break;
452   default:
453     // round up
454     __add_carry_out (tmp, carry, CT.w[0],
455                      __bid_reciprocals10_64[extra_digits]);
456     if ((remainder_h >> (64 - amount)) + carry >=
457         (((UINT64) 1) << amount))
458       status = EXACT_STATUS;
459     break;
460   }
461   __set_status_flags (fpsc, status);
462
463 #endif
464
465   return get_BID64 (sign_s, exponent_b + extra_digits, C64,
466                     rounding_mode, fpsc);
467 }
468
469
470 ///////////////////////////////////////////////////////////////////
471 // round 128-bit coefficient and return result in BID64 format
472 // do not worry about midpoint cases
473 //////////////////////////////////////////////////////////////////
474 static UINT64
475 __bid_simple_round64_sticky (UINT64 sign, int exponent, UINT128 P,
476                              int extra_digits, int rounding_mode,
477                              unsigned *fpsc) {
478   UINT128 Q_high, Q_low, C128;
479   UINT64 C64;
480   int amount, rmode;
481
482   rmode = rounding_mode;
483 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
484 #ifndef IEEE_ROUND_NEAREST
485   if (sign && (unsigned) (rmode - 1) < 2)
486     rmode = 3 - rmode;
487 #endif
488 #endif
489   __add_128_64 (P, P, __bid_round_const_table[rmode][extra_digits]);
490
491   // get P*(2^M[extra_digits])/10^extra_digits
492   __mul_128x128_full (Q_high, Q_low, P,
493                       __bid_reciprocals10_128[extra_digits]);
494
495   // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
496   amount = __bid_recip_scale[extra_digits];
497   __shr_128 (C128, Q_high, amount);
498
499   C64 = __low_64 (C128);
500
501 #ifdef SET_STATUS_FLAGS
502
503   __set_status_flags (fpsc, INEXACT_EXCEPTION);
504
505 #endif
506
507   return get_BID64 (sign, exponent, C64, rounding_mode, fpsc);
508 }
509
510 ///////////////////////////////////////////////////////////////////
511 // round 128-bit coefficient and return result in BID64 format
512 ///////////////////////////////////////////////////////////////////
513 static UINT64
514 __bid_full_round64 (UINT64 sign, int exponent, UINT128 P,
515                     int extra_digits, int rounding_mode,
516                     unsigned *fpsc) {
517   UINT128 Q_high, Q_low, C128, Stemp, PU;
518   UINT64 remainder_h, C64, carry, CY;
519   int amount, amount2, rmode, status = 0;
520
521   if (exponent < 0) {
522     if (exponent >= -16 && (extra_digits + exponent < 0)) {
523       extra_digits = -exponent;
524 #ifdef SET_STATUS_FLAGS
525       if (extra_digits > 0) {
526         rmode = rounding_mode;
527         if (sign && (unsigned) (rmode - 1) < 2)
528           rmode = 3 - rmode;
529         __add_128_128 (PU, P,
530                        __bid_round_const_table_128[rmode][extra_digits]);
531         if (__unsigned_compare_gt_128
532             (__bid_power10_table_128[extra_digits + 15], PU))
533           status = UNDERFLOW_EXCEPTION;
534       }
535 #endif
536     }
537   }
538
539   if (extra_digits > 0) {
540     exponent += extra_digits;
541     rmode = rounding_mode;
542     if (sign && (unsigned) (rmode - 1) < 2)
543       rmode = 3 - rmode;
544     __add_128_128 (P, P, __bid_round_const_table_128[rmode][extra_digits]);
545
546     // get P*(2^M[extra_digits])/10^extra_digits
547     __mul_128x128_full (Q_high, Q_low, P,
548                         __bid_reciprocals10_128[extra_digits]);
549
550     // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
551     amount = __bid_recip_scale[extra_digits];
552     __shr_128_long (C128, Q_high, amount);
553
554     C64 = __low_64 (C128);
555
556 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
557 #ifndef IEEE_ROUND_NEAREST
558     if (rmode == 0) //ROUNDING_TO_NEAREST
559 #endif
560       if (C64 & 1) {
561         // check whether fractional part of initial_P/10^extra_digits 
562         // is exactly .5
563
564         // get remainder
565         amount2 = 64 - amount;
566         remainder_h = 0;
567         remainder_h--;
568         remainder_h >>= amount2;
569         remainder_h = remainder_h & Q_high.w[0];
570
571         if (!remainder_h
572             && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
573                 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
574                     && Q_low.w[0] <
575                     __bid_reciprocals10_128[extra_digits].w[0]))) {
576           C64--;
577         }
578       }
579 #endif
580
581 #ifdef SET_STATUS_FLAGS
582     status |= INEXACT_EXCEPTION;
583
584     // get remainder
585     remainder_h = Q_high.w[0] << (64 - amount);
586
587     switch (rmode) {
588     case ROUNDING_TO_NEAREST:
589     case ROUNDING_TIES_AWAY:
590       // test whether fractional part is 0
591       if (remainder_h == 0x8000000000000000ull
592           && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
593               || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
594                   && Q_low.w[0] <
595                   __bid_reciprocals10_128[extra_digits].w[0])))
596         status = EXACT_STATUS;
597       break;
598     case ROUNDING_DOWN:
599     case ROUNDING_TO_ZERO:
600       if (!remainder_h
601           && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
602               || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
603                   && Q_low.w[0] <
604                   __bid_reciprocals10_128[extra_digits].w[0])))
605         status = EXACT_STATUS;
606       break;
607     default:
608       // round up
609       __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
610                        __bid_reciprocals10_128[extra_digits].w[0]);
611       __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
612                           __bid_reciprocals10_128[extra_digits].w[1], CY);
613       if ((remainder_h >> (64 - amount)) + carry >=
614           (((UINT64) 1) << amount))
615         status = EXACT_STATUS;
616     }
617
618     __set_status_flags (fpsc, status);
619
620 #endif
621   } else {
622     C64 = P.w[0];
623     if (!C64) {
624       sign = 0;
625       if (rounding_mode == ROUNDING_DOWN)
626         sign = 0x8000000000000000ull;
627     }
628   }
629   return get_BID64 (sign, exponent, C64, rounding_mode, fpsc);
630 }
631
632 /////////////////////////////////////////////////////////////////////////////////
633 // round 192-bit coefficient (P, remainder_P) and return result in BID64 format
634 // the lowest 64 bits (remainder_P) are used for midpoint checking only
635 ////////////////////////////////////////////////////////////////////////////////
636 UINT64
637 __bid_full_round64_remainder (UINT64 sign, int exponent, UINT128 P,
638                               int extra_digits, UINT64 remainder_P,
639                               int rounding_mode, unsigned *fpsc,
640                               unsigned uf_status) {
641   UINT128 Q_high, Q_low, C128, Stemp;
642   UINT64 remainder_h, C64, carry, CY;
643   int amount, amount2, rmode, status = uf_status;
644
645   rmode = rounding_mode;
646   if (sign && (unsigned) (rmode - 1) < 2)
647     rmode = 3 - rmode;
648   if (rmode == ROUNDING_UP && remainder_P) {
649     P.w[0]++;
650     if (!P.w[0])
651       P.w[1]++;
652   }
653
654   if (extra_digits) {
655     __add_128_64 (P, P, __bid_round_const_table[rmode][extra_digits]);
656
657     // get P*(2^M[extra_digits])/10^extra_digits
658     __mul_128x128_full (Q_high, Q_low, P,
659                         __bid_reciprocals10_128[extra_digits]);
660
661     // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
662     amount = __bid_recip_scale[extra_digits];
663     __shr_128 (C128, Q_high, amount);
664
665     C64 = __low_64 (C128);
666
667 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
668 #ifndef IEEE_ROUND_NEAREST
669     if (rmode == 0) //ROUNDING_TO_NEAREST
670 #endif
671       if (!remainder_P && (C64 & 1)) {
672         // check whether fractional part of initial_P/10^extra_digits 
673         // is exactly .5
674
675         // get remainder
676         amount2 = 64 - amount;
677         remainder_h = 0;
678         remainder_h--;
679         remainder_h >>= amount2;
680         remainder_h = remainder_h & Q_high.w[0];
681
682         if (!remainder_h
683             && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
684                 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
685                     && Q_low.w[0] <
686                     __bid_reciprocals10_128[extra_digits].w[0]))) {
687           C64--;
688         }
689       }
690 #endif
691
692 #ifdef SET_STATUS_FLAGS
693     status |= INEXACT_EXCEPTION;
694
695     if (!remainder_P) {
696       // get remainder
697       remainder_h = Q_high.w[0] << (64 - amount);
698
699       switch (rmode) {
700       case ROUNDING_TO_NEAREST:
701       case ROUNDING_TIES_AWAY:
702         // test whether fractional part is 0
703         if (remainder_h == 0x8000000000000000ull
704             && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
705                 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
706                     && Q_low.w[0] <
707                     __bid_reciprocals10_128[extra_digits].w[0])))
708           status = EXACT_STATUS;
709         break;
710       case ROUNDING_DOWN:
711       case ROUNDING_TO_ZERO:
712         if (!remainder_h
713             && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
714                 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
715                     && Q_low.w[0] <
716                     __bid_reciprocals10_128[extra_digits].w[0])))
717           status = EXACT_STATUS;
718         break;
719       default:
720         // round up
721         __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
722                          __bid_reciprocals10_128[extra_digits].w[0]);
723         __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
724                             __bid_reciprocals10_128[extra_digits].w[1], CY);
725         if ((remainder_h >> (64 - amount)) + carry >=
726             (((UINT64) 1) << amount))
727           status = EXACT_STATUS;
728       }
729     }
730     __set_status_flags (fpsc, status);
731
732 #endif
733   } else {
734     C64 = P.w[0];
735 #ifdef SET_STATUS_FLAGS
736     if (remainder_P) {
737       __set_status_flags (fpsc, uf_status | INEXACT_EXCEPTION);
738     }
739 #endif
740   }
741
742   return get_BID64 (sign, exponent + extra_digits, C64, rounding_mode,
743                     fpsc);
744 }
745
746
747 ///////////////////////////////////////////////////////////////////
748 // get P/10^extra_digits
749 // result fits in 64 bits
750 ///////////////////////////////////////////////////////////////////
751 __BID_INLINE__ UINT64
752 __truncate (UINT128 P, int extra_digits)
753 // extra_digits <= 16
754 {
755   UINT128 Q_high, Q_low, C128;
756   UINT64 C64;
757   int amount;
758
759   // get P*(2^M[extra_digits])/10^extra_digits
760   __mul_128x128_full (Q_high, Q_low, P,
761                       __bid_reciprocals10_128[extra_digits]);
762
763   // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
764   amount = __bid_recip_scale[extra_digits];
765   __shr_128 (C128, Q_high, amount);
766
767   C64 = __low_64 (C128);
768
769   return C64;
770 }
771
772
773 ///////////////////////////////////////////////////////////////////
774 // return number of decimal digits in 128-bit value X
775 ///////////////////////////////////////////////////////////////////
776 __BID_INLINE__ int
777 __get_dec_digits64 (UINT128 X) {
778   int_double tempx;
779   int digits_x, bin_expon_cx;
780
781   if (!X.w[1]) {
782     //--- get number of bits in the coefficients of x and y ---
783     tempx.d = (double) X.w[0];
784     bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
785     // get number of decimal digits in the coeff_x
786     digits_x = __bid_estimate_decimal_digits[bin_expon_cx];
787     if (X.w[0] >= __bid_power10_table_128[digits_x].w[0])
788       digits_x++;
789     return digits_x;
790   }
791   tempx.d = (double) X.w[1];
792   bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
793   // get number of decimal digits in the coeff_x
794   digits_x = __bid_estimate_decimal_digits[bin_expon_cx + 64];
795   if (__unsigned_compare_ge_128 (X, __bid_power10_table_128[digits_x]))
796     digits_x++;
797
798   return digits_x;
799 }
800
801
802 ////////////////////////////////////////////////////////////////////////////////
803 //
804 // add 64-bit coefficient to 128-bit coefficient, return result in BID64 format
805 //
806 ////////////////////////////////////////////////////////////////////////////////
807 __BID_INLINE__ UINT64
808 get_add128 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
809             UINT64 sign_y, int final_exponent_y, UINT128 CY,
810             int extra_digits, int rounding_mode, unsigned *fpsc) {
811   UINT128 CY_L, CX, FS, F, CT, ST, T2;
812   UINT64 CYh, CY0L, T, S, coefficient_y, remainder_y;
813   SINT64 D = 0;
814   int_double tempx;
815   int diff_dec_expon, extra_digits2, exponent_y, status;
816   int extra_dx, diff_dec2, bin_expon_cx, digits_x, rmode;
817
818   // CY has more than 16 decimal digits
819
820   exponent_y = final_exponent_y - extra_digits;
821
822 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
823   rounding_mode = 0;
824 #endif
825 #ifdef IEEE_ROUND_NEAREST
826   rounding_mode = 0;
827 #endif
828
829   if (exponent_x > exponent_y) {
830     // normalize x
831     //--- get number of bits in the coefficients of x and y ---
832     tempx.d = (double) coefficient_x;
833     bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
834     // get number of decimal digits in the coeff_x
835     digits_x = __bid_estimate_decimal_digits[bin_expon_cx];
836     if (coefficient_x >= __bid_power10_table_128[digits_x].w[0])
837       digits_x++;
838
839     extra_dx = 16 - digits_x;
840     coefficient_x *= __bid_power10_table_128[extra_dx].w[0];
841     if ((sign_x ^ sign_y) && (coefficient_x == 1000000000000000ull)) {
842       extra_dx++;
843       coefficient_x = 10000000000000000ull;
844     }
845     exponent_x -= extra_dx;
846
847     if (exponent_x > exponent_y) {
848
849       // exponent_x > exponent_y
850       diff_dec_expon = exponent_x - exponent_y;
851
852       if (exponent_x <= final_exponent_y + 1) {
853         __mul_64x64_to_128 (CX, coefficient_x,
854                             __bid_power10_table_128[diff_dec_expon].w[0]);
855
856         if (sign_x == sign_y) {
857           __add_128_128 (CT, CY, CX);
858           if ((exponent_x >
859                final_exponent_y) /*&& (final_exponent_y>0) */ )
860             extra_digits++;
861           if (__unsigned_compare_ge_128
862               (CT, __bid_power10_table_128[16 + extra_digits]))
863             extra_digits++;
864         } else {
865           __sub_128_128 (CT, CY, CX);
866           if (((SINT64) CT.w[1]) < 0) {
867             CT.w[0] = 0 - CT.w[0];
868             CT.w[1] = 0 - CT.w[1];
869             if (CT.w[0])
870               CT.w[1]--;
871             sign_y = sign_x;
872           } else if (!(CT.w[1] | CT.w[0])) {
873             sign_y =
874               (rounding_mode !=
875                ROUNDING_DOWN) ? 0 : 0x8000000000000000ull;
876           }
877           if ((exponent_x + 1 >=
878                final_exponent_y) /*&& (final_exponent_y>=0) */ ) {
879             extra_digits = __get_dec_digits64 (CT) - 16;
880             if (extra_digits <= 0) {
881               if (!CT.w[0] && rounding_mode == ROUNDING_DOWN)
882                 sign_y = 0x8000000000000000ull;
883               return get_BID64 (sign_y, exponent_y, CT.w[0],
884                                 rounding_mode, fpsc);
885             }
886           } else
887             if (__unsigned_compare_gt_128
888                 (__bid_power10_table_128[15 + extra_digits], CT))
889             extra_digits--;
890         }
891
892         return __bid_full_round64 (sign_y, exponent_y, CT, extra_digits,
893                                    rounding_mode, fpsc);
894       }
895       // diff_dec2+extra_digits is the number of digits to eliminate from 
896       //                           argument CY
897       diff_dec2 = exponent_x - final_exponent_y;
898
899       if (diff_dec2 >= 17) {
900 #ifndef IEEE_ROUND_NEAREST
901 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
902         if ((rounding_mode) & 3) {
903           switch (rounding_mode) {
904           case ROUNDING_UP:
905             if (!sign_y) {
906               D = ((SINT64) (sign_x ^ sign_y)) >> 63;
907               D = D + D + 1;
908               coefficient_x += D;
909             }
910             break;
911           case ROUNDING_DOWN:
912             if (sign_y) {
913               D = ((SINT64) (sign_x ^ sign_y)) >> 63;
914               D = D + D + 1;
915               coefficient_x += D;
916             }
917             break;
918           case ROUNDING_TO_ZERO:
919             if (sign_y != sign_x) {
920               D = 0 - 1;
921               coefficient_x += D;
922             }
923             break;
924           }
925           if (coefficient_x < 1000000000000000ull) {
926             coefficient_x -= D;
927             coefficient_x =
928               D + (coefficient_x << 1) + (coefficient_x << 3);
929             exponent_x--;
930           }
931         }
932 #endif
933 #endif
934 #ifdef SET_STATUS_FLAGS
935         if (CY.w[1] | CY.w[0])
936           __set_status_flags (fpsc, INEXACT_EXCEPTION);
937 #endif
938         return get_BID64 (sign_x, exponent_x, coefficient_x,
939                           rounding_mode, fpsc);
940       }
941       // here exponent_x <= 16+final_exponent_y
942
943       // truncate CY to 16 dec. digits
944       CYh = __truncate (CY, extra_digits);
945
946       // get remainder
947       T = __bid_power10_table_128[extra_digits].w[0];
948       __mul_64x64_to_64 (CY0L, CYh, T);
949
950       remainder_y = CY.w[0] - CY0L;
951
952       // align coeff_x, CYh
953       __mul_64x64_to_128 (CX, coefficient_x,
954                           __bid_power10_table_128[diff_dec2].w[0]);
955
956       if (sign_x == sign_y) {
957         __add_128_64 (CT, CX, CYh);
958         if (__unsigned_compare_ge_128
959             (CT, __bid_power10_table_128[16 + diff_dec2]))
960           diff_dec2++;
961       } else {
962         if (remainder_y)
963           CYh++;
964         __sub_128_64 (CT, CX, CYh);
965         if (__unsigned_compare_gt_128
966             (__bid_power10_table_128[15 + diff_dec2], CT))
967           diff_dec2--;
968       }
969
970       return __bid_full_round64_remainder (sign_x, final_exponent_y, CT,
971                                            diff_dec2, remainder_y,
972                                            rounding_mode, fpsc, 0);
973     }
974   }
975   // Here (exponent_x <= exponent_y)
976   {
977     diff_dec_expon = exponent_y - exponent_x;
978
979     if (diff_dec_expon > MAX_FORMAT_DIGITS) {
980       rmode = rounding_mode;
981
982       if ((sign_x ^ sign_y)) {
983         if (!CY.w[0])
984           CY.w[1]--;
985         CY.w[0]--;
986         if (__unsigned_compare_gt_128
987             (__bid_power10_table_128[15 + extra_digits], CY)) {
988           if (rmode & 3) {
989             extra_digits--;
990             final_exponent_y--;
991           } else {
992             CY.w[0] = 1000000000000000ull;
993             CY.w[1] = 0;
994             extra_digits = 0;
995           }
996         }
997       }
998       __scale128_10 (CY, CY);
999       extra_digits++;
1000       CY.w[0] |= 1;
1001
1002       return __bid_simple_round64_sticky (sign_y, final_exponent_y, CY,
1003                                           extra_digits, rmode, fpsc);
1004     }
1005     // apply sign to coeff_x
1006     sign_x ^= sign_y;
1007     sign_x = ((SINT64) sign_x) >> 63;
1008     CX.w[0] = (coefficient_x + sign_x) ^ sign_x;
1009     CX.w[1] = sign_x;
1010
1011     // check whether CY (rounded to 16 digits) and CX have 
1012     //                     any digits in the same position
1013     diff_dec2 = final_exponent_y - exponent_x;
1014
1015     if (diff_dec2 <= 17) {
1016       // align CY to 10^ex
1017       S = __bid_power10_table_128[diff_dec_expon].w[0];
1018       __mul_64x128_short (CY_L, S, CY);
1019
1020       __add_128_128 (ST, CY_L, CX);
1021       extra_digits2 = __get_dec_digits64 (ST) - 16;
1022       return __bid_full_round64 (sign_y, exponent_x, ST, extra_digits2,
1023                                  rounding_mode, fpsc);
1024     }
1025     // truncate CY to 16 dec. digits
1026     CYh = __truncate (CY, extra_digits);
1027
1028     // get remainder
1029     T = __bid_power10_table_128[extra_digits].w[0];
1030     __mul_64x64_to_64 (CY0L, CYh, T);
1031
1032     coefficient_y = CY.w[0] - CY0L;
1033     // add rounding constant
1034     rmode = rounding_mode;
1035     if (sign_y && (unsigned) (rmode - 1) < 2)
1036       rmode = 3 - rmode;
1037 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1038 #ifndef IEEE_ROUND_NEAREST
1039     if (!(rmode & 3)) //ROUNDING_TO_NEAREST
1040 #endif
1041 #endif
1042     {
1043       coefficient_y += __bid_round_const_table[rmode][extra_digits];
1044     }
1045     // align coefficient_y,  coefficient_x
1046     S = __bid_power10_table_128[diff_dec_expon].w[0];
1047     __mul_64x64_to_128 (F, coefficient_y, S);
1048
1049     // fraction
1050     __add_128_128 (FS, F, CX);
1051
1052 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1053 #ifndef IEEE_ROUND_NEAREST
1054     if (rmode == 0) //ROUNDING_TO_NEAREST
1055 #endif
1056     {
1057       // rounding code, here RN_EVEN
1058       // 10^(extra_digits+diff_dec_expon)
1059       T2 = __bid_power10_table_128[diff_dec_expon + extra_digits];
1060       if (__unsigned_compare_gt_128 (FS, T2)
1061           || ((CYh & 1) && __test_equal_128 (FS, T2))) {
1062         CYh++;
1063         __sub_128_128 (FS, FS, T2);
1064       }
1065     }
1066 #endif
1067 #ifndef IEEE_ROUND_NEAREST
1068 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1069     if (rmode == 4) //ROUNDING_TO_NEAREST
1070 #endif
1071     {
1072       // rounding code, here RN_AWAY
1073       // 10^(extra_digits+diff_dec_expon)
1074       T2 = __bid_power10_table_128[diff_dec_expon + extra_digits];
1075       if (__unsigned_compare_ge_128 (FS, T2)) {
1076         CYh++;
1077         __sub_128_128 (FS, FS, T2);
1078       }
1079     }
1080 #endif
1081 #ifndef IEEE_ROUND_NEAREST
1082 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1083     switch (rmode) {
1084     case ROUNDING_DOWN:
1085     case ROUNDING_TO_ZERO:
1086       if ((SINT64) FS.w[1] < 0) {
1087         CYh--;
1088         if (CYh < 1000000000000000ull) {
1089           CYh = 9999999999999999ull;
1090           final_exponent_y--;
1091         }
1092       } else {
1093         T2 = __bid_power10_table_128[diff_dec_expon + extra_digits];
1094         if (__unsigned_compare_ge_128 (FS, T2)) {
1095           CYh++;
1096           __sub_128_128 (FS, FS, T2);
1097         }
1098       }
1099       break;
1100     case ROUNDING_UP:
1101       if ((SINT64) FS.w[1] < 0)
1102         break;
1103       T2 = __bid_power10_table_128[diff_dec_expon + extra_digits];
1104       if (__unsigned_compare_gt_128 (FS, T2)) {
1105         CYh += 2;
1106         __sub_128_128 (FS, FS, T2);
1107       } else if ((FS.w[1] == T2.w[1]) && (FS.w[0] == T2.w[0])) {
1108         CYh++;
1109         FS.w[1] = FS.w[0] = 0;
1110       } else if (FS.w[1] | FS.w[0])
1111         CYh++;
1112       break;
1113     }
1114 #endif
1115 #endif
1116
1117 #ifdef SET_STATUS_FLAGS
1118     status = INEXACT_EXCEPTION;
1119 #ifndef IEEE_ROUND_NEAREST
1120 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1121     if (!(rmode & 3))
1122 #endif
1123 #endif
1124     {
1125       // RN modes
1126       if ((FS.w[1] ==
1127            __bid_round_const_table_128[0][diff_dec_expon + extra_digits].w[1])
1128           && (FS.w[0] ==
1129               __bid_round_const_table_128[0][diff_dec_expon +
1130                                        extra_digits].w[0]))
1131         status = EXACT_STATUS;
1132     }
1133 #ifndef IEEE_ROUND_NEAREST
1134 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1135     else if (!FS.w[1] && !FS.w[0])
1136       status = EXACT_STATUS;
1137 #endif
1138 #endif
1139
1140     __set_status_flags (fpsc, status);
1141 #endif
1142
1143     return get_BID64 (sign_y, final_exponent_y, CYh, rounding_mode,
1144                       fpsc);
1145   }
1146
1147 }
1148
1149
1150 //////////////////////////////////////////////////////////////////////////
1151 //
1152 //    0*10^ey + cz*10^ez,   ey<ez  
1153 //
1154 //////////////////////////////////////////////////////////////////////////
1155
1156 __BID_INLINE__ UINT64
1157 add_zero64 (int exponent_y, UINT64 sign_z, int exponent_z,
1158             UINT64 coefficient_z, unsigned *prounding_mode,
1159             unsigned *fpsc) {
1160   int_double tempx;
1161   int bin_expon, scale_k, scale_cz;
1162   int diff_expon;
1163
1164   diff_expon = exponent_z - exponent_y;
1165
1166   tempx.d = (double) coefficient_z;
1167   bin_expon = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
1168   scale_cz = __bid_estimate_decimal_digits[bin_expon];
1169   if (coefficient_z >= __bid_power10_table_128[scale_cz].w[0])
1170     scale_cz++;
1171
1172   scale_k = 16 - scale_cz;
1173   if (diff_expon < scale_k)
1174     scale_k = diff_expon;
1175   coefficient_z *= __bid_power10_table_128[scale_k].w[0];
1176
1177   return get_BID64 (sign_z, exponent_z - scale_k, coefficient_z,
1178                     *prounding_mode, fpsc);
1179 }
1180 #endif