1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
29 /*****************************************************************************
31 * Helper add functions (for fma)
33 * __BID_INLINE__ UINT64 get_add64(
34 * UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
35 * UINT64 sign_y, int exponent_y, UINT64 coefficient_y,
38 * __BID_INLINE__ UINT64 get_add128(
39 * UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
40 * UINT64 sign_y, int final_exponent_y, UINT128 CY,
41 * int extra_digits, int rounding_mode)
43 *****************************************************************************
45 * Algorithm description:
47 * get_add64: same as BID64 add, but arguments are unpacked and there
48 * are no special case checks
50 * get_add128: add 64-bit coefficient to 128-bit product (which contains
51 * 16+extra_digits decimal digits),
53 * - the exponents are compared and the two coefficients are
54 * properly aligned for addition/subtraction
55 * - multiple paths are needed
56 * - final result exponent is calculated and the lower term is
57 * rounded first if necessary, to avoid manipulating
58 * coefficients longer than 128 bits
60 ****************************************************************************/
62 #ifndef _INLINE_BID_ADD_H_
63 #define _INLINE_BID_ADD_H_
65 #include "bid_internal.h"
67 #define MAX_FORMAT_DIGITS 16
68 #define DECIMAL_EXPONENT_BIAS 398
69 #define MASK_BINARY_EXPONENT 0x7ff0000000000000ull
70 #define BINARY_EXPONENT_BIAS 0x3ff
71 #define UPPER_EXPON_LIMIT 51
73 ///////////////////////////////////////////////////////////////////////
75 // get_add64() is essentially the same as bid_add(), except that
76 // the arguments are unpacked
78 //////////////////////////////////////////////////////////////////////
80 get_add64 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
81 UINT64 sign_y, int exponent_y, UINT64 coefficient_y,
82 int rounding_mode, unsigned *fpsc) {
83 UINT128 CA, CT, CT_new;
84 UINT64 sign_a, sign_b, coefficient_a, coefficient_b, sign_s, sign_ab,
86 UINT64 saved_ca, saved_cb, C0_64, C64, remainder_h, T1, carry, tmp,
89 int exponent_a, exponent_b, diff_dec_expon;
90 int bin_expon_ca, extra_digits, amount, scale_k, scale_ca;
91 unsigned rmode, status;
93 // sort arguments by exponent
94 if (exponent_x <= exponent_y) {
96 exponent_a = exponent_y;
97 coefficient_a = coefficient_y;
99 exponent_b = exponent_x;
100 coefficient_b = coefficient_x;
103 exponent_a = exponent_x;
104 coefficient_a = coefficient_x;
106 exponent_b = exponent_y;
107 coefficient_b = coefficient_y;
110 // exponent difference
111 diff_dec_expon = exponent_a - exponent_b;
113 /* get binary coefficients of x and y */
115 //--- get number of bits in the coefficients of x and y ---
117 tempx.d = (double) coefficient_a;
118 bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
120 if (!coefficient_a) {
121 return get_BID64 (sign_b, exponent_b, coefficient_b, rounding_mode,
124 if (diff_dec_expon > MAX_FORMAT_DIGITS) {
125 // normalize a to a 16-digit coefficient
127 scale_ca = __bid_estimate_decimal_digits[bin_expon_ca];
128 if (coefficient_a >= __bid_power10_table_128[scale_ca].w[0])
131 scale_k = 16 - scale_ca;
133 coefficient_a *= __bid_power10_table_128[scale_k].w[0];
135 diff_dec_expon -= scale_k;
136 exponent_a -= scale_k;
138 /* get binary coefficients of x and y */
140 //--- get number of bits in the coefficients of x and y ---
141 tempx.d = (double) coefficient_a;
142 bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
144 if (diff_dec_expon > MAX_FORMAT_DIGITS) {
145 #ifdef SET_STATUS_FLAGS
147 __set_status_flags (fpsc, INEXACT_EXCEPTION);
151 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
152 #ifndef IEEE_ROUND_NEAREST
153 if (((rounding_mode) & 3) && coefficient_b) // not ROUNDING_TO_NEAREST
155 switch (rounding_mode) {
158 coefficient_a -= ((((SINT64) sign_a) >> 63) | 1);
159 if (coefficient_a < 1000000000000000ull) {
161 coefficient_a = 9999999999999999ull;
162 } else if (coefficient_a >= 10000000000000000ull) {
164 coefficient_a = 1000000000000000ull;
170 coefficient_a += ((((SINT64) sign_a) >> 63) | 1);
171 if (coefficient_a < 1000000000000000ull) {
173 coefficient_a = 9999999999999999ull;
174 } else if (coefficient_a >= 10000000000000000ull) {
176 coefficient_a = 1000000000000000ull;
181 if (sign_a != sign_b) {
183 if (coefficient_a < 1000000000000000ull) {
185 coefficient_a = 9999999999999999ull;
193 // check special case here
194 if ((coefficient_a == 1000000000000000ull)
195 && (diff_dec_expon == MAX_FORMAT_DIGITS + 1)
196 && (sign_a ^ sign_b) && (coefficient_b > 5000000000000000ull)) {
197 coefficient_a = 9999999999999999ull;
201 return get_BID64 (sign_a, exponent_a, coefficient_a,
202 rounding_mode, fpsc);
205 // test whether coefficient_a*10^(exponent_a-exponent_b) may exceed 2^62
206 if (bin_expon_ca + __bid_estimate_bin_expon[diff_dec_expon] < 60) {
207 // coefficient_a*10^(exponent_a-exponent_b)<2^63
209 // multiply by 10^(exponent_a-exponent_b)
210 coefficient_a *= __bid_power10_table_128[diff_dec_expon].w[0];
213 sign_b = ((SINT64) sign_b) >> 63;
214 // apply sign to coeff. of b
215 coefficient_b = (coefficient_b + sign_b) ^ sign_b;
217 // apply sign to coefficient a
218 sign_a = ((SINT64) sign_a) >> 63;
219 coefficient_a = (coefficient_a + sign_a) ^ sign_a;
221 coefficient_a += coefficient_b;
223 sign_s = ((SINT64) coefficient_a) >> 63;
224 coefficient_a = (coefficient_a + sign_s) ^ sign_s;
225 sign_s &= 0x8000000000000000ull;
227 // coefficient_a < 10^16 ?
228 if (coefficient_a < __bid_power10_table_128[MAX_FORMAT_DIGITS].w[0]) {
229 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
230 #ifndef IEEE_ROUND_NEAREST
231 if (rounding_mode == ROUNDING_DOWN && (!coefficient_a)
233 sign_s = 0x8000000000000000ull;
236 return get_BID64 (sign_s, exponent_b, coefficient_a,
237 rounding_mode, fpsc);
239 // otherwise rounding is necessary
241 // already know coefficient_a<10^19
242 // coefficient_a < 10^17 ?
243 if (coefficient_a < __bid_power10_table_128[17].w[0])
245 else if (coefficient_a < __bid_power10_table_128[18].w[0])
250 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
251 #ifndef IEEE_ROUND_NEAREST
252 rmode = rounding_mode;
253 if (sign_s && (unsigned) (rmode - 1) < 2)
261 coefficient_a += __bid_round_const_table[rmode][extra_digits];
263 // get P*(2^M[extra_digits])/10^extra_digits
264 __mul_64x64_to_128 (CT, coefficient_a,
265 __bid_reciprocals10_64[extra_digits]);
267 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
268 amount = __bid_short_recip_scale[extra_digits];
269 C64 = CT.w[1] >> amount;
272 // coefficient_a*10^(exponent_a-exponent_b) is large
275 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
276 #ifndef IEEE_ROUND_NEAREST
277 rmode = rounding_mode;
278 if (sign_s && (unsigned) (rmode - 1) < 2)
287 // check whether we can take faster path
288 scale_ca = __bid_estimate_decimal_digits[bin_expon_ca];
290 sign_ab = sign_a ^ sign_b;
291 sign_ab = ((SINT64) sign_ab) >> 63;
293 // T1 = 10^(16-diff_dec_expon)
294 T1 = __bid_power10_table_128[16 - diff_dec_expon].w[0];
296 // get number of digits in coefficient_a
297 //P_ca = __bid_power10_table_128[scale_ca].w[0];
298 //P_ca_m1 = __bid_power10_table_128[scale_ca-1].w[0];
299 if (coefficient_a >= __bid_power10_table_128[scale_ca].w[0]) {
302 //P_ca = __bid_power10_table_128[scale_ca].w[0];
305 scale_k = 16 - scale_ca;
308 //Ts = (T1 + sign_ab) ^ sign_ab;
311 //X = coefficient_a + Ts - P_ca_m1;
314 saved_ca = coefficient_a - T1;
316 (SINT64) saved_ca *(SINT64) __bid_power10_table_128[scale_k].w[0];
317 extra_digits = diff_dec_expon - scale_k;
320 saved_cb = (coefficient_b + sign_ab) ^ sign_ab;
321 // add 10^16 and rounding constant
323 saved_cb + 10000000000000000ull +
324 __bid_round_const_table[rmode][extra_digits];
326 // get P*(2^M[extra_digits])/10^extra_digits
327 __mul_64x64_to_128 (CT, coefficient_b,
328 __bid_reciprocals10_64[extra_digits]);
330 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
331 amount = __bid_short_recip_scale[extra_digits];
332 C0_64 = CT.w[1] >> amount;
334 // result coefficient
335 C64 = C0_64 + coefficient_a;
336 // filter out difficult (corner) cases
337 // the following test is equivalent to
338 // ( (initial_coefficient_a + Ts) < P_ca &&
339 // (initial_coefficient_a + Ts) > P_ca_m1 ),
340 // which ensures the number of digits in coefficient_a does not change
341 // after adding (the appropriately scaled and rounded) coefficient_b
342 if ((UINT64) (C64 - 1000000000000000ull - 1) > 9000000000000000ull - 2) {
343 if (C64 >= 10000000000000000ull) {
344 // result has more than 16 digits
346 // must divide coeff_a by 10
347 saved_ca = saved_ca + T1;
348 __mul_64x64_to_128 (CA, saved_ca, 0x3333333333333334ull);
349 //__bid_reciprocals10_64[1]);
350 coefficient_a = CA.w[1] >> 1;
352 saved_ca - (coefficient_a << 3) - (coefficient_a << 1);
353 coefficient_a = coefficient_a - T1;
356 /*90000000000000000 */ +rem_a *
357 __bid_power10_table_128[diff_dec_expon].w[0];
360 (SINT64) (saved_ca - T1 -
361 (T1 << 3)) * (SINT64) __bid_power10_table_128[scale_k -
366 saved_cb + 100000000000000000ull +
367 __bid_round_const_table[rmode][extra_digits];
369 // get P*(2^M[extra_digits])/10^extra_digits
370 __mul_64x64_to_128 (CT, coefficient_b,
371 __bid_reciprocals10_64[extra_digits]);
373 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
374 amount = __bid_short_recip_scale[extra_digits];
375 C0_64 = CT.w[1] >> amount;
377 // result coefficient
378 C64 = C0_64 + coefficient_a;
379 } else if (C64 <= 1000000000000000ull) {
380 // less than 16 digits in result
382 (SINT64) saved_ca *(SINT64) __bid_power10_table_128[scale_k +
387 (saved_cb << 3) + (saved_cb << 1) + 100000000000000000ull +
388 __bid_round_const_table[rmode][extra_digits];
390 // get P*(2^M[extra_digits])/10^extra_digits
391 __mul_64x64_to_128 (CT_new, coefficient_b,
392 __bid_reciprocals10_64[extra_digits]);
394 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
395 amount = __bid_short_recip_scale[extra_digits];
396 C0_64 = CT_new.w[1] >> amount;
398 // result coefficient
399 C64_new = C0_64 + coefficient_a;
400 if (C64_new < 10000000000000000ull) {
402 #ifdef SET_STATUS_FLAGS
413 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
414 #ifndef IEEE_ROUND_NEAREST
415 if (rmode == 0) //ROUNDING_TO_NEAREST
418 // check whether fractional part of initial_P/10^extra_digits
420 // this is the same as fractional part of
421 // (initial_P + 0.5*10^extra_digits)/10^extra_digits is exactly zero
424 remainder_h = CT.w[1] << (64 - amount);
426 // test whether fractional part is 0
427 if (!remainder_h && (CT.w[0] < __bid_reciprocals10_64[extra_digits])) {
433 #ifdef SET_STATUS_FLAGS
434 status = INEXACT_EXCEPTION;
437 remainder_h = CT.w[1] << (64 - amount);
440 case ROUNDING_TO_NEAREST:
441 case ROUNDING_TIES_AWAY:
442 // test whether fractional part is 0
443 if ((remainder_h == 0x8000000000000000ull)
444 && (CT.w[0] < __bid_reciprocals10_64[extra_digits]))
445 status = EXACT_STATUS;
448 case ROUNDING_TO_ZERO:
449 if (!remainder_h && (CT.w[0] < __bid_reciprocals10_64[extra_digits]))
450 status = EXACT_STATUS;
454 __add_carry_out (tmp, carry, CT.w[0],
455 __bid_reciprocals10_64[extra_digits]);
456 if ((remainder_h >> (64 - amount)) + carry >=
457 (((UINT64) 1) << amount))
458 status = EXACT_STATUS;
461 __set_status_flags (fpsc, status);
465 return get_BID64 (sign_s, exponent_b + extra_digits, C64,
466 rounding_mode, fpsc);
470 ///////////////////////////////////////////////////////////////////
471 // round 128-bit coefficient and return result in BID64 format
472 // do not worry about midpoint cases
473 //////////////////////////////////////////////////////////////////
475 __bid_simple_round64_sticky (UINT64 sign, int exponent, UINT128 P,
476 int extra_digits, int rounding_mode,
478 UINT128 Q_high, Q_low, C128;
482 rmode = rounding_mode;
483 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
484 #ifndef IEEE_ROUND_NEAREST
485 if (sign && (unsigned) (rmode - 1) < 2)
489 __add_128_64 (P, P, __bid_round_const_table[rmode][extra_digits]);
491 // get P*(2^M[extra_digits])/10^extra_digits
492 __mul_128x128_full (Q_high, Q_low, P,
493 __bid_reciprocals10_128[extra_digits]);
495 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
496 amount = __bid_recip_scale[extra_digits];
497 __shr_128 (C128, Q_high, amount);
499 C64 = __low_64 (C128);
501 #ifdef SET_STATUS_FLAGS
503 __set_status_flags (fpsc, INEXACT_EXCEPTION);
507 return get_BID64 (sign, exponent, C64, rounding_mode, fpsc);
510 ///////////////////////////////////////////////////////////////////
511 // round 128-bit coefficient and return result in BID64 format
512 ///////////////////////////////////////////////////////////////////
514 __bid_full_round64 (UINT64 sign, int exponent, UINT128 P,
515 int extra_digits, int rounding_mode,
517 UINT128 Q_high, Q_low, C128, Stemp, PU;
518 UINT64 remainder_h, C64, carry, CY;
519 int amount, amount2, rmode, status = 0;
522 if (exponent >= -16 && (extra_digits + exponent < 0)) {
523 extra_digits = -exponent;
524 #ifdef SET_STATUS_FLAGS
525 if (extra_digits > 0) {
526 rmode = rounding_mode;
527 if (sign && (unsigned) (rmode - 1) < 2)
529 __add_128_128 (PU, P,
530 __bid_round_const_table_128[rmode][extra_digits]);
531 if (__unsigned_compare_gt_128
532 (__bid_power10_table_128[extra_digits + 15], PU))
533 status = UNDERFLOW_EXCEPTION;
539 if (extra_digits > 0) {
540 exponent += extra_digits;
541 rmode = rounding_mode;
542 if (sign && (unsigned) (rmode - 1) < 2)
544 __add_128_128 (P, P, __bid_round_const_table_128[rmode][extra_digits]);
546 // get P*(2^M[extra_digits])/10^extra_digits
547 __mul_128x128_full (Q_high, Q_low, P,
548 __bid_reciprocals10_128[extra_digits]);
550 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
551 amount = __bid_recip_scale[extra_digits];
552 __shr_128_long (C128, Q_high, amount);
554 C64 = __low_64 (C128);
556 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
557 #ifndef IEEE_ROUND_NEAREST
558 if (rmode == 0) //ROUNDING_TO_NEAREST
561 // check whether fractional part of initial_P/10^extra_digits
565 amount2 = 64 - amount;
568 remainder_h >>= amount2;
569 remainder_h = remainder_h & Q_high.w[0];
572 && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
573 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
575 __bid_reciprocals10_128[extra_digits].w[0]))) {
581 #ifdef SET_STATUS_FLAGS
582 status |= INEXACT_EXCEPTION;
585 remainder_h = Q_high.w[0] << (64 - amount);
588 case ROUNDING_TO_NEAREST:
589 case ROUNDING_TIES_AWAY:
590 // test whether fractional part is 0
591 if (remainder_h == 0x8000000000000000ull
592 && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
593 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
595 __bid_reciprocals10_128[extra_digits].w[0])))
596 status = EXACT_STATUS;
599 case ROUNDING_TO_ZERO:
601 && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
602 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
604 __bid_reciprocals10_128[extra_digits].w[0])))
605 status = EXACT_STATUS;
609 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
610 __bid_reciprocals10_128[extra_digits].w[0]);
611 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
612 __bid_reciprocals10_128[extra_digits].w[1], CY);
613 if ((remainder_h >> (64 - amount)) + carry >=
614 (((UINT64) 1) << amount))
615 status = EXACT_STATUS;
618 __set_status_flags (fpsc, status);
625 if (rounding_mode == ROUNDING_DOWN)
626 sign = 0x8000000000000000ull;
629 return get_BID64 (sign, exponent, C64, rounding_mode, fpsc);
632 /////////////////////////////////////////////////////////////////////////////////
633 // round 192-bit coefficient (P, remainder_P) and return result in BID64 format
634 // the lowest 64 bits (remainder_P) are used for midpoint checking only
635 ////////////////////////////////////////////////////////////////////////////////
637 __bid_full_round64_remainder (UINT64 sign, int exponent, UINT128 P,
638 int extra_digits, UINT64 remainder_P,
639 int rounding_mode, unsigned *fpsc,
640 unsigned uf_status) {
641 UINT128 Q_high, Q_low, C128, Stemp;
642 UINT64 remainder_h, C64, carry, CY;
643 int amount, amount2, rmode, status = uf_status;
645 rmode = rounding_mode;
646 if (sign && (unsigned) (rmode - 1) < 2)
648 if (rmode == ROUNDING_UP && remainder_P) {
655 __add_128_64 (P, P, __bid_round_const_table[rmode][extra_digits]);
657 // get P*(2^M[extra_digits])/10^extra_digits
658 __mul_128x128_full (Q_high, Q_low, P,
659 __bid_reciprocals10_128[extra_digits]);
661 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
662 amount = __bid_recip_scale[extra_digits];
663 __shr_128 (C128, Q_high, amount);
665 C64 = __low_64 (C128);
667 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
668 #ifndef IEEE_ROUND_NEAREST
669 if (rmode == 0) //ROUNDING_TO_NEAREST
671 if (!remainder_P && (C64 & 1)) {
672 // check whether fractional part of initial_P/10^extra_digits
676 amount2 = 64 - amount;
679 remainder_h >>= amount2;
680 remainder_h = remainder_h & Q_high.w[0];
683 && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
684 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
686 __bid_reciprocals10_128[extra_digits].w[0]))) {
692 #ifdef SET_STATUS_FLAGS
693 status |= INEXACT_EXCEPTION;
697 remainder_h = Q_high.w[0] << (64 - amount);
700 case ROUNDING_TO_NEAREST:
701 case ROUNDING_TIES_AWAY:
702 // test whether fractional part is 0
703 if (remainder_h == 0x8000000000000000ull
704 && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
705 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
707 __bid_reciprocals10_128[extra_digits].w[0])))
708 status = EXACT_STATUS;
711 case ROUNDING_TO_ZERO:
713 && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
714 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
716 __bid_reciprocals10_128[extra_digits].w[0])))
717 status = EXACT_STATUS;
721 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
722 __bid_reciprocals10_128[extra_digits].w[0]);
723 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
724 __bid_reciprocals10_128[extra_digits].w[1], CY);
725 if ((remainder_h >> (64 - amount)) + carry >=
726 (((UINT64) 1) << amount))
727 status = EXACT_STATUS;
730 __set_status_flags (fpsc, status);
735 #ifdef SET_STATUS_FLAGS
737 __set_status_flags (fpsc, uf_status | INEXACT_EXCEPTION);
742 return get_BID64 (sign, exponent + extra_digits, C64, rounding_mode,
747 ///////////////////////////////////////////////////////////////////
748 // get P/10^extra_digits
749 // result fits in 64 bits
750 ///////////////////////////////////////////////////////////////////
751 __BID_INLINE__ UINT64
752 __truncate (UINT128 P, int extra_digits)
753 // extra_digits <= 16
755 UINT128 Q_high, Q_low, C128;
759 // get P*(2^M[extra_digits])/10^extra_digits
760 __mul_128x128_full (Q_high, Q_low, P,
761 __bid_reciprocals10_128[extra_digits]);
763 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
764 amount = __bid_recip_scale[extra_digits];
765 __shr_128 (C128, Q_high, amount);
767 C64 = __low_64 (C128);
773 ///////////////////////////////////////////////////////////////////
774 // return number of decimal digits in 128-bit value X
775 ///////////////////////////////////////////////////////////////////
777 __get_dec_digits64 (UINT128 X) {
779 int digits_x, bin_expon_cx;
782 //--- get number of bits in the coefficients of x and y ---
783 tempx.d = (double) X.w[0];
784 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
785 // get number of decimal digits in the coeff_x
786 digits_x = __bid_estimate_decimal_digits[bin_expon_cx];
787 if (X.w[0] >= __bid_power10_table_128[digits_x].w[0])
791 tempx.d = (double) X.w[1];
792 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
793 // get number of decimal digits in the coeff_x
794 digits_x = __bid_estimate_decimal_digits[bin_expon_cx + 64];
795 if (__unsigned_compare_ge_128 (X, __bid_power10_table_128[digits_x]))
802 ////////////////////////////////////////////////////////////////////////////////
804 // add 64-bit coefficient to 128-bit coefficient, return result in BID64 format
806 ////////////////////////////////////////////////////////////////////////////////
807 __BID_INLINE__ UINT64
808 get_add128 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
809 UINT64 sign_y, int final_exponent_y, UINT128 CY,
810 int extra_digits, int rounding_mode, unsigned *fpsc) {
811 UINT128 CY_L, CX, FS, F, CT, ST, T2;
812 UINT64 CYh, CY0L, T, S, coefficient_y, remainder_y;
815 int diff_dec_expon, extra_digits2, exponent_y, status;
816 int extra_dx, diff_dec2, bin_expon_cx, digits_x, rmode;
818 // CY has more than 16 decimal digits
820 exponent_y = final_exponent_y - extra_digits;
822 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
825 #ifdef IEEE_ROUND_NEAREST
829 if (exponent_x > exponent_y) {
831 //--- get number of bits in the coefficients of x and y ---
832 tempx.d = (double) coefficient_x;
833 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
834 // get number of decimal digits in the coeff_x
835 digits_x = __bid_estimate_decimal_digits[bin_expon_cx];
836 if (coefficient_x >= __bid_power10_table_128[digits_x].w[0])
839 extra_dx = 16 - digits_x;
840 coefficient_x *= __bid_power10_table_128[extra_dx].w[0];
841 if ((sign_x ^ sign_y) && (coefficient_x == 1000000000000000ull)) {
843 coefficient_x = 10000000000000000ull;
845 exponent_x -= extra_dx;
847 if (exponent_x > exponent_y) {
849 // exponent_x > exponent_y
850 diff_dec_expon = exponent_x - exponent_y;
852 if (exponent_x <= final_exponent_y + 1) {
853 __mul_64x64_to_128 (CX, coefficient_x,
854 __bid_power10_table_128[diff_dec_expon].w[0]);
856 if (sign_x == sign_y) {
857 __add_128_128 (CT, CY, CX);
859 final_exponent_y) /*&& (final_exponent_y>0) */ )
861 if (__unsigned_compare_ge_128
862 (CT, __bid_power10_table_128[16 + extra_digits]))
865 __sub_128_128 (CT, CY, CX);
866 if (((SINT64) CT.w[1]) < 0) {
867 CT.w[0] = 0 - CT.w[0];
868 CT.w[1] = 0 - CT.w[1];
872 } else if (!(CT.w[1] | CT.w[0])) {
875 ROUNDING_DOWN) ? 0 : 0x8000000000000000ull;
877 if ((exponent_x + 1 >=
878 final_exponent_y) /*&& (final_exponent_y>=0) */ ) {
879 extra_digits = __get_dec_digits64 (CT) - 16;
880 if (extra_digits <= 0) {
881 if (!CT.w[0] && rounding_mode == ROUNDING_DOWN)
882 sign_y = 0x8000000000000000ull;
883 return get_BID64 (sign_y, exponent_y, CT.w[0],
884 rounding_mode, fpsc);
887 if (__unsigned_compare_gt_128
888 (__bid_power10_table_128[15 + extra_digits], CT))
892 return __bid_full_round64 (sign_y, exponent_y, CT, extra_digits,
893 rounding_mode, fpsc);
895 // diff_dec2+extra_digits is the number of digits to eliminate from
897 diff_dec2 = exponent_x - final_exponent_y;
899 if (diff_dec2 >= 17) {
900 #ifndef IEEE_ROUND_NEAREST
901 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
902 if ((rounding_mode) & 3) {
903 switch (rounding_mode) {
906 D = ((SINT64) (sign_x ^ sign_y)) >> 63;
913 D = ((SINT64) (sign_x ^ sign_y)) >> 63;
918 case ROUNDING_TO_ZERO:
919 if (sign_y != sign_x) {
925 if (coefficient_x < 1000000000000000ull) {
928 D + (coefficient_x << 1) + (coefficient_x << 3);
934 #ifdef SET_STATUS_FLAGS
935 if (CY.w[1] | CY.w[0])
936 __set_status_flags (fpsc, INEXACT_EXCEPTION);
938 return get_BID64 (sign_x, exponent_x, coefficient_x,
939 rounding_mode, fpsc);
941 // here exponent_x <= 16+final_exponent_y
943 // truncate CY to 16 dec. digits
944 CYh = __truncate (CY, extra_digits);
947 T = __bid_power10_table_128[extra_digits].w[0];
948 __mul_64x64_to_64 (CY0L, CYh, T);
950 remainder_y = CY.w[0] - CY0L;
952 // align coeff_x, CYh
953 __mul_64x64_to_128 (CX, coefficient_x,
954 __bid_power10_table_128[diff_dec2].w[0]);
956 if (sign_x == sign_y) {
957 __add_128_64 (CT, CX, CYh);
958 if (__unsigned_compare_ge_128
959 (CT, __bid_power10_table_128[16 + diff_dec2]))
964 __sub_128_64 (CT, CX, CYh);
965 if (__unsigned_compare_gt_128
966 (__bid_power10_table_128[15 + diff_dec2], CT))
970 return __bid_full_round64_remainder (sign_x, final_exponent_y, CT,
971 diff_dec2, remainder_y,
972 rounding_mode, fpsc, 0);
975 // Here (exponent_x <= exponent_y)
977 diff_dec_expon = exponent_y - exponent_x;
979 if (diff_dec_expon > MAX_FORMAT_DIGITS) {
980 rmode = rounding_mode;
982 if ((sign_x ^ sign_y)) {
986 if (__unsigned_compare_gt_128
987 (__bid_power10_table_128[15 + extra_digits], CY)) {
992 CY.w[0] = 1000000000000000ull;
998 __scale128_10 (CY, CY);
1002 return __bid_simple_round64_sticky (sign_y, final_exponent_y, CY,
1003 extra_digits, rmode, fpsc);
1005 // apply sign to coeff_x
1007 sign_x = ((SINT64) sign_x) >> 63;
1008 CX.w[0] = (coefficient_x + sign_x) ^ sign_x;
1011 // check whether CY (rounded to 16 digits) and CX have
1012 // any digits in the same position
1013 diff_dec2 = final_exponent_y - exponent_x;
1015 if (diff_dec2 <= 17) {
1016 // align CY to 10^ex
1017 S = __bid_power10_table_128[diff_dec_expon].w[0];
1018 __mul_64x128_short (CY_L, S, CY);
1020 __add_128_128 (ST, CY_L, CX);
1021 extra_digits2 = __get_dec_digits64 (ST) - 16;
1022 return __bid_full_round64 (sign_y, exponent_x, ST, extra_digits2,
1023 rounding_mode, fpsc);
1025 // truncate CY to 16 dec. digits
1026 CYh = __truncate (CY, extra_digits);
1029 T = __bid_power10_table_128[extra_digits].w[0];
1030 __mul_64x64_to_64 (CY0L, CYh, T);
1032 coefficient_y = CY.w[0] - CY0L;
1033 // add rounding constant
1034 rmode = rounding_mode;
1035 if (sign_y && (unsigned) (rmode - 1) < 2)
1037 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1038 #ifndef IEEE_ROUND_NEAREST
1039 if (!(rmode & 3)) //ROUNDING_TO_NEAREST
1043 coefficient_y += __bid_round_const_table[rmode][extra_digits];
1045 // align coefficient_y, coefficient_x
1046 S = __bid_power10_table_128[diff_dec_expon].w[0];
1047 __mul_64x64_to_128 (F, coefficient_y, S);
1050 __add_128_128 (FS, F, CX);
1052 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1053 #ifndef IEEE_ROUND_NEAREST
1054 if (rmode == 0) //ROUNDING_TO_NEAREST
1057 // rounding code, here RN_EVEN
1058 // 10^(extra_digits+diff_dec_expon)
1059 T2 = __bid_power10_table_128[diff_dec_expon + extra_digits];
1060 if (__unsigned_compare_gt_128 (FS, T2)
1061 || ((CYh & 1) && __test_equal_128 (FS, T2))) {
1063 __sub_128_128 (FS, FS, T2);
1067 #ifndef IEEE_ROUND_NEAREST
1068 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1069 if (rmode == 4) //ROUNDING_TO_NEAREST
1072 // rounding code, here RN_AWAY
1073 // 10^(extra_digits+diff_dec_expon)
1074 T2 = __bid_power10_table_128[diff_dec_expon + extra_digits];
1075 if (__unsigned_compare_ge_128 (FS, T2)) {
1077 __sub_128_128 (FS, FS, T2);
1081 #ifndef IEEE_ROUND_NEAREST
1082 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1085 case ROUNDING_TO_ZERO:
1086 if ((SINT64) FS.w[1] < 0) {
1088 if (CYh < 1000000000000000ull) {
1089 CYh = 9999999999999999ull;
1093 T2 = __bid_power10_table_128[diff_dec_expon + extra_digits];
1094 if (__unsigned_compare_ge_128 (FS, T2)) {
1096 __sub_128_128 (FS, FS, T2);
1101 if ((SINT64) FS.w[1] < 0)
1103 T2 = __bid_power10_table_128[diff_dec_expon + extra_digits];
1104 if (__unsigned_compare_gt_128 (FS, T2)) {
1106 __sub_128_128 (FS, FS, T2);
1107 } else if ((FS.w[1] == T2.w[1]) && (FS.w[0] == T2.w[0])) {
1109 FS.w[1] = FS.w[0] = 0;
1110 } else if (FS.w[1] | FS.w[0])
1117 #ifdef SET_STATUS_FLAGS
1118 status = INEXACT_EXCEPTION;
1119 #ifndef IEEE_ROUND_NEAREST
1120 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1127 __bid_round_const_table_128[0][diff_dec_expon + extra_digits].w[1])
1129 __bid_round_const_table_128[0][diff_dec_expon +
1130 extra_digits].w[0]))
1131 status = EXACT_STATUS;
1133 #ifndef IEEE_ROUND_NEAREST
1134 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1135 else if (!FS.w[1] && !FS.w[0])
1136 status = EXACT_STATUS;
1140 __set_status_flags (fpsc, status);
1143 return get_BID64 (sign_y, final_exponent_y, CYh, rounding_mode,
1150 //////////////////////////////////////////////////////////////////////////
1152 // 0*10^ey + cz*10^ez, ey<ez
1154 //////////////////////////////////////////////////////////////////////////
1156 __BID_INLINE__ UINT64
1157 add_zero64 (int exponent_y, UINT64 sign_z, int exponent_z,
1158 UINT64 coefficient_z, unsigned *prounding_mode,
1161 int bin_expon, scale_k, scale_cz;
1164 diff_expon = exponent_z - exponent_y;
1166 tempx.d = (double) coefficient_z;
1167 bin_expon = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
1168 scale_cz = __bid_estimate_decimal_digits[bin_expon];
1169 if (coefficient_z >= __bid_power10_table_128[scale_cz].w[0])
1172 scale_k = 16 - scale_cz;
1173 if (diff_expon < scale_k)
1174 scale_k = diff_expon;
1175 coefficient_z *= __bid_power10_table_128[scale_k].w[0];
1177 return get_BID64 (sign_z, exponent_z - scale_k, coefficient_z,
1178 *prounding_mode, fpsc);