1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
30 #include "sqrt_macros.h"
32 BID128_FUNCTION_ARG1(__bid128_sqrt, x)
34 UINT256 M256, C256, C4, C8;
35 UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res;
39 int exponent_x = 0, bin_expon_cx;
40 int digits, scale, exponent_q;
42 // unpack arguments, check for NaN or Infinity
43 if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
47 if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
48 #ifdef SET_STATUS_FLAGS
49 if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN
50 __set_status_flags (pfpsf, INVALID_EXCEPTION);
52 res.w[1] &= QUIET_MASK64;
56 if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
59 res.w[1] = 0x7c00000000000000ull;
60 #ifdef SET_STATUS_FLAGS
61 __set_status_flags (pfpsf, INVALID_EXCEPTION);
70 ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) <<
75 res.w[1] = 0x7c00000000000000ull;
77 #ifdef SET_STATUS_FLAGS
78 __set_status_flags (pfpsf, INVALID_EXCEPTION);
86 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
87 bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
88 digits = __bid_estimate_decimal_digits[bin_expon_cx];
92 A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
93 A10.w[0] = CX.w[0] << 3;
94 CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
95 CX2.w[0] = CX.w[0] << 1;
96 __add_128_128 (A10, A10, CX2);
99 CS.w[0] = short_sqrt128 (A10);
101 // check for exact result
102 if (CS.w[0] * CS.w[0] == A10.w[0]) {
103 __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]);
104 if (S2.w[1] == A10.w[1]) // && S2.w[0]==A10.w[0])
106 get_BID128_very_fast (&res, 0,
108 DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
112 // get number of digits in CX
113 D = CX.w[1] - __bid_power10_index_binexp_128[bin_expon_cx].w[1];
115 || (!D && CX.w[0] >= __bid_power10_index_binexp_128[bin_expon_cx].w[0]))
118 // if exponent is odd, scale coefficient by 10
120 exponent_q = exponent_x - scale;
121 scale += (exponent_q & 1); // exp. bias is even
124 T128 = __bid_power10_table_128[scale - 37];
125 __mul_128x128_low (CX1, CX, T128);
127 TP128 = __bid_power10_table_128[37];
128 __mul_128x128_to_256 (C256, CX1, TP128);
130 T128 = __bid_power10_table_128[scale];
131 __mul_128x128_to_256 (C256, CX, T128);
136 C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62);
137 C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62);
138 C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
139 C4.w[0] = C256.w[0] << 2;
141 long_sqrt128 (&CS, C256);
143 #ifndef IEEE_ROUND_NEAREST
144 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
145 if (!((rnd_mode) & 3)) {
148 // compare to midpoints
149 CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
150 CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
152 //__mul_128x128_to_256(M256, CSM, CSM);
153 __sqr128_to_256 (M256, CSM);
155 if (C4.w[3] > M256.w[3]
156 || (C4.w[3] == M256.w[3]
157 && (C4.w[2] > M256.w[2]
158 || (C4.w[2] == M256.w[2]
159 && (C4.w[1] > M256.w[1]
160 || (C4.w[1] == M256.w[1]
161 && C4.w[0] > M256.w[0])))))) {
167 C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61);
168 C8.w[0] = CS.w[0] << 3;
170 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
171 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
172 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
173 M256.w[3] = M256.w[3] - Carry;
175 // if CSM' > C256, round up
176 if (M256.w[3] > C4.w[3]
177 || (M256.w[3] == C4.w[3]
178 && (M256.w[2] > C4.w[2]
179 || (M256.w[2] == C4.w[2]
180 && (M256.w[1] > C4.w[1]
181 || (M256.w[1] == C4.w[1]
182 && M256.w[0] > C4.w[0])))))) {
189 #ifndef IEEE_ROUND_NEAREST
190 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
192 __sqr128_to_256 (M256, CS);
193 C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
194 C8.w[0] = CS.w[0] << 1;
195 if (M256.w[3] > C256.w[3]
196 || (M256.w[3] == C256.w[3]
197 && (M256.w[2] > C256.w[2]
198 || (M256.w[2] == C256.w[2]
199 && (M256.w[1] > C256.w[1]
200 || (M256.w[1] == C256.w[1]
201 && M256.w[0] > C256.w[0])))))) {
202 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
203 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
204 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
205 M256.w[3] = M256.w[3] - Carry;
220 if (M256.w[3] > C256.w[3]
221 || (M256.w[3] == C256.w[3]
222 && (M256.w[2] > C256.w[2]
223 || (M256.w[2] == C256.w[2]
224 && (M256.w[1] > C256.w[1]
225 || (M256.w[1] == C256.w[1]
226 && M256.w[0] > C256.w[0])))))) {
235 __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
236 __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
237 __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
238 M256.w[3] = M256.w[3] + Carry;
248 if (M256.w[3] < C256.w[3]
249 || (M256.w[3] == C256.w[3]
250 && (M256.w[2] < C256.w[2]
251 || (M256.w[2] == C256.w[2]
252 && (M256.w[1] < C256.w[1]
253 || (M256.w[1] == C256.w[1]
254 && M256.w[0] <= C256.w[0])))))) {
262 if ((rnd_mode) == ROUNDING_UP) {
272 #ifdef SET_STATUS_FLAGS
273 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
275 get_BID128_fast (&res, 0,
276 (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, CS);