1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
29 #ifndef _DIV_MACROS_H_
30 #define _DIV_MACROS_H_
32 #include "bid_internal.h"
37 //#define DOUBLE_EXTENDED_ON
39 #if DOUBLE_EXTENDED_ON
43 __div_128_by_128 (UINT128 * pCQ, UINT128 * pCR, UINT128 CX, UINT128 CY) {
44 UINT128 CB, CB2, CB4, CB8, CQB, CA;
45 int_double d64, dm64, ds;
48 long double lq, lx, ly;
49 UINT64 Rh, R, B2, B4, Ph, Ql, Ql2, carry, Qh;
55 pCQ->w[0] = CX.w[0] / CY.w[0];
58 pCR->w[0] = CX.w[0] - pCQ->w[0] * CY.w[0];
61 // This path works for CX<2^116 only
64 d64.i = 0x43f0000000000000;
66 dm64.i = 0x3bf0000000000000;
68 ds.i = 0x3cb8000000000000;
69 dx = (long double) CX.w[1] * d64.d + (long double) CX.w[0];
70 dq = dx / (long double) CY.w[0];
74 Ql = (UINT64) (dq - ((double) Qh) * d64.d);
76 Rh = CX.w[0] - Ql * CY.w[0];
78 pCR->w[0] = Rh - Ql2 * CY.w[0];
79 __add_carry_out ((pCQ->w[0]), carry, Ql, Ql2);
80 pCQ->w[1] = Qh + carry;
90 (long double) CX.w[1] * (long double) t64.d + (long double) CX.w[0];
92 (long double) CY.w[1] * (long double) t64.d + (long double) CY.w[0];
94 pCQ->w[0] = (UINT64) lq;
99 /*if(__unsigned_compare_ge_128(CX,CY))
102 __sub_128_128((*pCR), CX, CY);
112 if (CY.w[1] >= 16 || pCQ->w[0] <= 0x1000000000000000ull) {
113 pCQ->w[0] = (UINT64) lq - 1;
114 __mul_64x128_full (Ph, CQB, (pCQ->w[0]), CY);
115 __sub_128_128 (CA, CX, CQB);
116 if (__unsigned_compare_ge_128 (CA, CY)) {
117 __sub_128_128 (CA, CA, CY);
119 if (__unsigned_compare_ge_128 (CA, CY)) {
120 __sub_128_128 (CA, CA, CY);
127 pCQ->w[0] = (UINT64) lq - 6;
129 __mul_64x128_full (Ph, CQB, (pCQ->w[0]), CY);
130 __sub_128_128 (CA, CX, CQB);
132 CB8.w[1] = (CY.w[1] << 3) | (CY.w[0] >> 61);
133 CB8.w[0] = CY.w[0] << 3;
134 CB4.w[1] = (CY.w[1] << 2) | (CY.w[0] >> 62);
135 CB4.w[0] = CY.w[0] << 2;
136 CB2.w[1] = (CY.w[1] << 1) | (CY.w[0] >> 63);
137 CB2.w[0] = CY.w[0] << 1;
139 if (__unsigned_compare_ge_128 (CA, CB8)) {
141 __sub_128_128 (CA, CA, CB8);
143 if (__unsigned_compare_ge_128 (CA, CB4)) {
145 __sub_128_128 (CA, CA, CB4);
147 if (__unsigned_compare_ge_128 (CA, CB2)) {
149 __sub_128_128 (CA, CA, CB2);
151 if (__unsigned_compare_ge_128 (CA, CY)) {
153 __sub_128_128 (CA, CA, CY);
167 __div_256_by_128 (UINT128 * pCQ, UINT256 * pCA4, UINT128 CY) {
172 long double lx, ly, lq, l64, l128;
175 d64.i = 0x43f0000000000000ull;
176 l64 = (long double) d64.d;
181 ((long double) (*pCA4).w[3] * l64 +
182 (long double) (*pCA4).w[2]) * l128 +
183 (long double) (*pCA4).w[1] * l64 + (long double) (*pCA4).w[0];
184 ly = (long double) CY.w[1] * l128 + (long double) CY.w[0] * l64;
187 CQ2.w[1] = (UINT64) lq;
188 lq = (lq - CQ2.w[1]) * l64;
189 CQ2.w[0] = (UINT64) lq;
192 __mul_128x128_to_256 (CQ2Y, CY, CQ2);
195 if (CQ2Y.w[3] < (*pCA4).w[3]
196 || (CQ2Y.w[3] == (*pCA4).w[3]
197 && (CQ2Y.w[2] < (*pCA4).w[2]
198 || (CQ2Y.w[2] == (*pCA4).w[2]
199 && (CQ2Y.w[1] < (*pCA4).w[1]
200 || (CQ2Y.w[1] == (*pCA4).w[1]
201 && (CQ2Y.w[0] <= (*pCA4).w[0]))))))) {
203 // (*pCA4) -CQ2Y, guaranteed below 5*2^49*CY < 5*2^(49+128)
204 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CQ2Y.w[0]);
205 __sub_borrow_in_out ((*pCA4).w[1], carry64, (*pCA4).w[1], CQ2Y.w[1],
207 (*pCA4).w[2] = (*pCA4).w[2] - CQ2Y.w[2] - carry64;
209 lx = ((long double) (*pCA4).w[2] * l128 +
210 ((long double) (*pCA4).w[1] * l64 +
211 (long double) (*pCA4).w[0])) * l64;
217 __mul_64x128_short (CQ3Y, Q3, CY);
218 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CQ3Y.w[0]);
219 (*pCA4).w[1] = (*pCA4).w[1] - CQ3Y.w[1] - carry64;
221 if ((*pCA4).w[1] > CY.w[1]
222 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) {
224 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]);
225 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64;
226 if ((*pCA4).w[1] > CY.w[1]
227 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) {
229 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0],
231 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64;
235 __add_carry_out (CQ2.w[0], carry64, Q3, CQ2.w[0]);
239 // CQ2Y - (*pCA4), guaranteed below 5*2^(49+128)
240 __sub_borrow_out ((*pCA4).w[0], carry64, CQ2Y.w[0], (*pCA4).w[0]);
241 __sub_borrow_in_out ((*pCA4).w[1], carry64, CQ2Y.w[1], (*pCA4).w[1],
243 (*pCA4).w[2] = CQ2Y.w[2] - (*pCA4).w[2] - carry64;
246 ((long double) (*pCA4).w[2] * l128 +
247 (long double) (*pCA4).w[1] * l64 +
248 (long double) (*pCA4).w[0]) * l64;
250 Q3 = 1 + (UINT64) lq;
252 __mul_64x128_short (CQ3Y, Q3, CY);
253 __sub_borrow_out ((*pCA4).w[0], carry64, CQ3Y.w[0], (*pCA4).w[0]);
254 (*pCA4).w[1] = CQ3Y.w[1] - (*pCA4).w[1] - carry64;
256 if ((SINT64) (*pCA4).w[1] > (SINT64) CY.w[1]
257 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) {
259 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]);
260 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64;
261 } else if ((SINT64) (*pCA4).w[1] < 0) {
263 __add_carry_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]);
264 (*pCA4).w[1] = (*pCA4).w[1] + CY.w[1] + carry64;
266 // subtract Q3 from Q2
267 __sub_borrow_out (CQ2.w[0], carry64, CQ2.w[0], Q3);
271 // (*pCQ) + CQ2 + carry
272 __add_carry_out ((*pCQ).w[0], carry64, CQ2.w[0], (*pCQ).w[0]);
273 (*pCQ).w[1] = (*pCQ).w[1] + CQ2.w[1] + carry64;
280 __div_128_by_128 (UINT128 * pCQ, UINT128 * pCR, UINT128 CX0, UINT128 CY) {
281 UINT128 CY36, CY51, CQ, A2, CX, CQT;
283 int_double t64, d49, d60;
286 if (!CX0.w[1] && !CY.w[1]) {
287 pCQ->w[0] = CX0.w[0] / CY.w[0];
289 pCR->w[1] = pCR->w[0] = 0;
290 pCR->w[0] = CX0.w[0] - pCQ->w[0] * CY.w[0];
298 t64.i = 0x43f0000000000000ull;
299 lx = (double) CX.w[1] * t64.d + (double) CX.w[0];
300 ly = (double) CY.w[1] * t64.d + (double) CY.w[0];
303 CY36.w[1] = CY.w[0] >> (64 - 36);
304 CY36.w[0] = CY.w[0] << 36;
306 CQ.w[1] = CQ.w[0] = 0;
309 if (!CY.w[1] && !CY36.w[1] && (CX.w[1] >= CY36.w[0])) {
313 d60.i = 0x3c30000000000000ull;
315 Q = -4 + (UINT64) lq;
318 __mul_64x64_to_128 (A2, Q, CY.w[0]);
321 A2.w[1] = (A2.w[1] << 60) | (A2.w[0] >> (64 - 60));
324 __sub_128_128 (CX, CX, A2);
326 lx = (double) CX.w[1] * t64.d + (double) CX.w[0];
329 CQ.w[1] = Q >> (64 - 60);
334 CY51.w[1] = (CY.w[1] << 51) | (CY.w[0] >> (64 - 51));
335 CY51.w[0] = CY.w[0] << 51;
337 if (CY.w[1] < (UINT64) (1 << (64 - 51))
338 && (__unsigned_compare_gt_128 (CX, CY51))) {
342 d49.i = 0x3ce0000000000000ull;
345 Q = -1 + (UINT64) lq;
348 __mul_64x64_to_128 (A2, Q, CY.w[0]);
349 A2.w[1] += Q * CY.w[1];
352 A2.w[1] = (A2.w[1] << 49) | (A2.w[0] >> (64 - 49));
355 __sub_128_128 (CX, CX, A2);
357 CQT.w[1] = Q >> (64 - 49);
359 __add_128_128 (CQ, CQ, CQT);
361 lx = (double) CX.w[1] * t64.d + (double) CX.w[0];
367 __mul_64x64_to_128 (A2, Q, CY.w[0]);
368 A2.w[1] += Q * CY.w[1];
370 __sub_128_128 (CX, CX, A2);
371 if ((SINT64) CX.w[1] < 0) {
374 if (CX.w[0] < CY.w[0])
377 if ((SINT64) CX.w[1] < 0) {
380 if (CX.w[0] < CY.w[0])
384 } else if (__unsigned_compare_ge_128 (CX, CY)) {
386 __sub_128_128 (CX, CX, CY);
389 __add_128_64 (CQ, CQ, Q);
401 __div_256_by_128 (UINT128 * pCQ, UINT256 * pCA4, UINT128 CY) {
402 UINT256 CA4, CA2, CY51, CY36;
403 UINT128 CQ, A2, A2h, CQT;
405 int_double t64, d49, d60;
406 double lx, ly, lq, d128, d192;
408 // the quotient is assumed to be at most 113 bits,
409 // as needed by BID128 divide routines
412 CA4.w[3] = (*pCA4).w[3];
413 CA4.w[2] = (*pCA4).w[2];
414 CA4.w[1] = (*pCA4).w[1];
415 CA4.w[0] = (*pCA4).w[0];
416 CQ.w[1] = (*pCQ).w[1];
417 CQ.w[0] = (*pCQ).w[0];
420 t64.i = 0x43f0000000000000ull;
421 d128 = t64.d * t64.d;
423 lx = (double) CA4.w[3] * d192 + ((double) CA4.w[2] * d128 +
424 ((double) CA4.w[1] * t64.d +
426 ly = (double) CY.w[1] * t64.d + (double) CY.w[0];
429 CY36.w[2] = CY.w[1] >> (64 - 36);
430 CY36.w[1] = (CY.w[1] << 36) | (CY.w[0] >> (64 - 36));
431 CY36.w[0] = CY.w[0] << 36;
433 CQ.w[1] = (*pCQ).w[1];
434 CQ.w[0] = (*pCQ).w[0];
437 if (CA4.w[3] > CY36.w[2]
438 || (CA4.w[3] == CY36.w[2]
439 && (CA4.w[2] > CY36.w[1]
440 || (CA4.w[2] == CY36.w[1] && CA4.w[1] >= CY36.w[0])))) {
442 d60.i = 0x3c30000000000000ull;
444 Q = -4 + (UINT64) lq;
447 __mul_64x128_to_192 (CA2, Q, CY);
450 // CA2.w[3] = CA2.w[2] >> (64-60);
451 CA2.w[2] = (CA2.w[2] << 60) | (CA2.w[1] >> (64 - 60));
452 CA2.w[1] = (CA2.w[1] << 60) | (CA2.w[0] >> (64 - 60));
456 __sub_borrow_out (CA4.w[0], carry64, CA4.w[0], CA2.w[0]);
457 __sub_borrow_in_out (CA4.w[1], carry64, CA4.w[1], CA2.w[1],
459 CA4.w[2] = CA4.w[2] - CA2.w[2] - carry64;
461 lx = ((double) CA4.w[2] * d128 +
462 ((double) CA4.w[1] * t64.d + (double) CA4.w[0]));
465 CQT.w[1] = Q >> (64 - 60);
467 __add_128_128 (CQ, CQ, CQT);
470 CY51.w[2] = CY.w[1] >> (64 - 51);
471 CY51.w[1] = (CY.w[1] << 51) | (CY.w[0] >> (64 - 51));
472 CY51.w[0] = CY.w[0] << 51;
474 if (CA4.w[2] > CY51.w[2] ||
475 ((CA4.w[2] == CY51.w[2])
476 && (__unsigned_compare_gt_128 (CA4, CY51)))) {
480 d49.i = 0x3ce0000000000000ull;
483 Q = -1 + (UINT64) lq;
486 __mul_64x64_to_128 (A2, Q, CY.w[0]);
487 __mul_64x64_to_128 (A2h, Q, CY.w[1]);
489 if (A2.w[1] < A2h.w[0])
493 CA2.w[2] = (A2h.w[1] << 49) | (A2.w[1] >> (64 - 49));
494 CA2.w[1] = (A2.w[1] << 49) | (A2.w[0] >> (64 - 49));
495 CA2.w[0] = A2.w[0] << 49;
497 __sub_borrow_out (CA4.w[0], carry64, CA4.w[0], CA2.w[0]);
498 __sub_borrow_in_out (CA4.w[1], carry64, CA4.w[1], CA2.w[1],
500 CA4.w[2] = CA4.w[2] - CA2.w[2] - carry64;
502 CQT.w[1] = Q >> (64 - 49);
504 __add_128_128 (CQ, CQ, CQT);
506 lx = ((double) CA4.w[2] * d128 +
507 ((double) CA4.w[1] * t64.d + (double) CA4.w[0]));
513 __mul_64x64_to_128 (A2, Q, CY.w[0]);
514 A2.w[1] += Q * CY.w[1];
516 __sub_128_128 (CA4, CA4, A2);
517 if ((SINT64) CA4.w[1] < 0) {
520 if (CA4.w[0] < CY.w[0])
523 if ((SINT64) CA4.w[1] < 0) {
526 if (CA4.w[0] < CY.w[0])
530 } else if (__unsigned_compare_ge_128 (CA4, CY)) {
532 __sub_128_128 (CA4, CA4, CY);
535 __add_128_64 (CQ, CQ, Q);
539 pCA4->w[1] = CA4.w[1];
540 pCA4->w[0] = CA4.w[0];