OSDN Git Service

Fix bug added to sparc 64-bit sub_ddmmss() implementation.
[pf3gnuchains/gcc-fork.git] / libgcc / longlong.h
1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2    Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4    Free Software Foundation, Inc.
5
6    This file is part of the GNU C Library.
7
8    The GNU C Library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public
10    License as published by the Free Software Foundation; either
11    version 2.1 of the License, or (at your option) any later version.
12
13    In addition to the permissions in the GNU Lesser General Public
14    License, the Free Software Foundation gives you unlimited
15    permission to link the compiled version of this file into
16    combinations with other programs, and to distribute those
17    combinations without any restriction coming from the use of this
18    file.  (The Lesser General Public License restrictions do apply in
19    other respects; for example, they cover modification of the file,
20    and distribution when not linked into a combine executable.)
21
22    The GNU C Library is distributed in the hope that it will be useful,
23    but WITHOUT ANY WARRANTY; without even the implied warranty of
24    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25    Lesser General Public License for more details.
26
27    You should have received a copy of the GNU Lesser General Public
28    License along with the GNU C Library; if not, see
29    <http://www.gnu.org/licenses/>.  */
30
31 /* You have to define the following before including this file:
32
33    UWtype -- An unsigned type, default type for operations (typically a "word")
34    UHWtype -- An unsigned type, at least half the size of UWtype.
35    UDWtype -- An unsigned type, at least twice as large a UWtype
36    W_TYPE_SIZE -- size in bits of UWtype
37
38    UQItype -- Unsigned 8 bit type.
39    SItype, USItype -- Signed and unsigned 32 bit types.
40    DItype, UDItype -- Signed and unsigned 64 bit types.
41
42    On a 32 bit machine UWtype should typically be USItype;
43    on a 64 bit machine, UWtype should typically be UDItype.  */
44
45 #define __BITS4 (W_TYPE_SIZE / 4)
46 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
47 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
48 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
49
50 #ifndef W_TYPE_SIZE
51 #define W_TYPE_SIZE     32
52 #define UWtype          USItype
53 #define UHWtype         USItype
54 #define UDWtype         UDItype
55 #endif
56
57 /* Used in glibc only.  */
58 #ifndef attribute_hidden
59 #define attribute_hidden
60 #endif
61
62 extern const UQItype __clz_tab[256] attribute_hidden;
63
64 /* Define auxiliary asm macros.
65
66    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
67    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
68    word product in HIGH_PROD and LOW_PROD.
69
70    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
71    UDWtype product.  This is just a variant of umul_ppmm.
72
73    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
74    denominator) divides a UDWtype, composed by the UWtype integers
75    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
76    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
77    than DENOMINATOR for correct operation.  If, in addition, the most
78    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
79    UDIV_NEEDS_NORMALIZATION is defined to 1.
80
81    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
82    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
83    is rounded towards 0.
84
85    5) count_leading_zeros(count, x) counts the number of zero-bits from the
86    msb to the first nonzero bit in the UWtype X.  This is the number of
87    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
88    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
89
90    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
91    from the least significant end.
92
93    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
94    high_addend_2, low_addend_2) adds two UWtype integers, composed by
95    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
96    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
97    (i.e. carry out) is not stored anywhere, and is lost.
98
99    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
100    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
101    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
102    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
103    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
104    and is lost.
105
106    If any of these macros are left undefined for a particular CPU,
107    C macros are used.  */
108
109 /* The CPUs come in alphabetical order below.
110
111    Please add support for more CPUs here, or improve the current support
112    for the CPUs below!
113    (E.g. WE32100, IBM360.)  */
114
115 #if defined (__GNUC__) && !defined (NO_ASM)
116
117 /* We sometimes need to clobber "cc" with gcc2, but that would not be
118    understood by gcc1.  Use cpp to avoid major code duplication.  */
119 #if __GNUC__ < 2
120 #define __CLOBBER_CC
121 #define __AND_CLOBBER_CC
122 #else /* __GNUC__ >= 2 */
123 #define __CLOBBER_CC : "cc"
124 #define __AND_CLOBBER_CC , "cc"
125 #endif /* __GNUC__ < 2 */
126
127 #if defined (__alpha) && W_TYPE_SIZE == 64
128 #define umul_ppmm(ph, pl, m0, m1) \
129   do {                                                                  \
130     UDItype __m0 = (m0), __m1 = (m1);                                   \
131     (ph) = __builtin_alpha_umulh (__m0, __m1);                          \
132     (pl) = __m0 * __m1;                                                 \
133   } while (0)
134 #define UMUL_TIME 46
135 #ifndef LONGLONG_STANDALONE
136 #define udiv_qrnnd(q, r, n1, n0, d) \
137   do { UDItype __r;                                                     \
138     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
139     (r) = __r;                                                          \
140   } while (0)
141 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
142 #define UDIV_TIME 220
143 #endif /* LONGLONG_STANDALONE */
144 #ifdef __alpha_cix__
145 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
146 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
147 #define COUNT_LEADING_ZEROS_0 64
148 #else
149 #define count_leading_zeros(COUNT,X) \
150   do {                                                                  \
151     UDItype __xr = (X), __t, __a;                                       \
152     __t = __builtin_alpha_cmpbge (0, __xr);                             \
153     __a = __clz_tab[__t ^ 0xff] - 1;                                    \
154     __t = __builtin_alpha_extbl (__xr, __a);                            \
155     (COUNT) = 64 - (__clz_tab[__t] + __a*8);                            \
156   } while (0)
157 #define count_trailing_zeros(COUNT,X) \
158   do {                                                                  \
159     UDItype __xr = (X), __t, __a;                                       \
160     __t = __builtin_alpha_cmpbge (0, __xr);                             \
161     __t = ~__t & -~__t;                                                 \
162     __a = ((__t & 0xCC) != 0) * 2;                                      \
163     __a += ((__t & 0xF0) != 0) * 4;                                     \
164     __a += ((__t & 0xAA) != 0);                                         \
165     __t = __builtin_alpha_extbl (__xr, __a);                            \
166     __a <<= 3;                                                          \
167     __t &= -__t;                                                        \
168     __a += ((__t & 0xCC) != 0) * 2;                                     \
169     __a += ((__t & 0xF0) != 0) * 4;                                     \
170     __a += ((__t & 0xAA) != 0);                                         \
171     (COUNT) = __a;                                                      \
172   } while (0)
173 #endif /* __alpha_cix__ */
174 #endif /* __alpha */
175
176 #if defined (__arc__) && W_TYPE_SIZE == 32
177 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
178   __asm__ ("add.f       %1, %4, %5\n\tadc       %0, %2, %3"             \
179            : "=r" ((USItype) (sh)),                                     \
180              "=&r" ((USItype) (sl))                                     \
181            : "%r" ((USItype) (ah)),                                     \
182              "rIJ" ((USItype) (bh)),                                    \
183              "%r" ((USItype) (al)),                                     \
184              "rIJ" ((USItype) (bl)))
185 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
186   __asm__ ("sub.f       %1, %4, %5\n\tsbc       %0, %2, %3"             \
187            : "=r" ((USItype) (sh)),                                     \
188              "=&r" ((USItype) (sl))                                     \
189            : "r" ((USItype) (ah)),                                      \
190              "rIJ" ((USItype) (bh)),                                    \
191              "r" ((USItype) (al)),                                      \
192              "rIJ" ((USItype) (bl)))
193 /* Call libgcc routine.  */
194 #define umul_ppmm(w1, w0, u, v) \
195 do {                                                                    \
196   DWunion __w;                                                          \
197   __w.ll = __umulsidi3 (u, v);                                          \
198   w1 = __w.s.high;                                                      \
199   w0 = __w.s.low;                                                       \
200 } while (0)
201 #define __umulsidi3 __umulsidi3
202 UDItype __umulsidi3 (USItype, USItype);
203 #endif
204
205 #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
206  && W_TYPE_SIZE == 32
207 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
208   __asm__ ("adds        %1, %4, %5\n\tadc       %0, %2, %3"             \
209            : "=r" ((USItype) (sh)),                                     \
210              "=&r" ((USItype) (sl))                                     \
211            : "%r" ((USItype) (ah)),                                     \
212              "rI" ((USItype) (bh)),                                     \
213              "%r" ((USItype) (al)),                                     \
214              "rI" ((USItype) (bl)) __CLOBBER_CC)
215 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
216   __asm__ ("subs        %1, %4, %5\n\tsbc       %0, %2, %3"             \
217            : "=r" ((USItype) (sh)),                                     \
218              "=&r" ((USItype) (sl))                                     \
219            : "r" ((USItype) (ah)),                                      \
220              "rI" ((USItype) (bh)),                                     \
221              "r" ((USItype) (al)),                                      \
222              "rI" ((USItype) (bl)) __CLOBBER_CC)
223 # if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
224      || defined(__ARM_ARCH_3__)
225 #  define umul_ppmm(xh, xl, a, b)                                       \
226   do {                                                                  \
227     register USItype __t0, __t1, __t2;                                  \
228     __asm__ ("%@ Inlined umul_ppmm\n"                                   \
229            "    mov     %2, %5, lsr #16\n"                              \
230            "    mov     %0, %6, lsr #16\n"                              \
231            "    bic     %3, %5, %2, lsl #16\n"                          \
232            "    bic     %4, %6, %0, lsl #16\n"                          \
233            "    mul     %1, %3, %4\n"                                   \
234            "    mul     %4, %2, %4\n"                                   \
235            "    mul     %3, %0, %3\n"                                   \
236            "    mul     %0, %2, %0\n"                                   \
237            "    adds    %3, %4, %3\n"                                   \
238            "    addcs   %0, %0, #65536\n"                               \
239            "    adds    %1, %1, %3, lsl #16\n"                          \
240            "    adc     %0, %0, %3, lsr #16"                            \
241            : "=&r" ((USItype) (xh)),                                    \
242              "=r" ((USItype) (xl)),                                     \
243              "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
244            : "r" ((USItype) (a)),                                       \
245              "r" ((USItype) (b)) __CLOBBER_CC );                        \
246   } while (0)
247 #  define UMUL_TIME 20
248 # else
249 #  define umul_ppmm(xh, xl, a, b)                                       \
250   do {                                                                  \
251     /* Generate umull, under compiler control.  */                      \
252     register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b);       \
253     (xl) = (USItype)__t0;                                               \
254     (xh) = (USItype)(__t0 >> 32);                                       \
255   } while (0)
256 #  define UMUL_TIME 3
257 # endif
258 # define UDIV_TIME 100
259 #endif /* __arm__ */
260
261 #if defined(__arm__)
262 /* Let gcc decide how best to implement count_leading_zeros.  */
263 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
264 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctz (X))
265 #define COUNT_LEADING_ZEROS_0 32
266 #endif
267
268 #if defined (__AVR__)
269
270 #if W_TYPE_SIZE == 16
271 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
272 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
273 #define COUNT_LEADING_ZEROS_0 16
274 #endif /* W_TYPE_SIZE == 16 */
275
276 #if W_TYPE_SIZE == 32
277 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzl (X))
278 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
279 #define COUNT_LEADING_ZEROS_0 32
280 #endif /* W_TYPE_SIZE == 32 */
281
282 #if W_TYPE_SIZE == 64
283 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzll (X))
284 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
285 #define COUNT_LEADING_ZEROS_0 64
286 #endif /* W_TYPE_SIZE == 64 */
287
288 #endif /* defined (__AVR__) */
289
290 #if defined (__CRIS__) && __CRIS_arch_version >= 3
291 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
292 #if __CRIS_arch_version >= 8
293 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
294 #endif
295 #endif /* __CRIS__ */
296
297 #if defined (__hppa) && W_TYPE_SIZE == 32
298 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
299   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"                              \
300            : "=r" ((USItype) (sh)),                                     \
301              "=&r" ((USItype) (sl))                                     \
302            : "%rM" ((USItype) (ah)),                                    \
303              "rM" ((USItype) (bh)),                                     \
304              "%rM" ((USItype) (al)),                                    \
305              "rM" ((USItype) (bl)))
306 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
307   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"                              \
308            : "=r" ((USItype) (sh)),                                     \
309              "=&r" ((USItype) (sl))                                     \
310            : "rM" ((USItype) (ah)),                                     \
311              "rM" ((USItype) (bh)),                                     \
312              "rM" ((USItype) (al)),                                     \
313              "rM" ((USItype) (bl)))
314 #if defined (_PA_RISC1_1)
315 #define umul_ppmm(w1, w0, u, v) \
316   do {                                                                  \
317     union                                                               \
318       {                                                                 \
319         UDItype __f;                                                    \
320         struct {USItype __w1, __w0;} __w1w0;                            \
321       } __t;                                                            \
322     __asm__ ("xmpyu %1,%2,%0"                                           \
323              : "=x" (__t.__f)                                           \
324              : "x" ((USItype) (u)),                                     \
325                "x" ((USItype) (v)));                                    \
326     (w1) = __t.__w1w0.__w1;                                             \
327     (w0) = __t.__w1w0.__w0;                                             \
328      } while (0)
329 #define UMUL_TIME 8
330 #else
331 #define UMUL_TIME 30
332 #endif
333 #define UDIV_TIME 40
334 #define count_leading_zeros(count, x) \
335   do {                                                                  \
336     USItype __tmp;                                                      \
337     __asm__ (                                                           \
338        "ldi             1,%0\n"                                         \
339 "       extru,=         %1,15,16,%%r0           ; Bits 31..16 zero?\n"  \
340 "       extru,tr        %1,15,16,%1             ; No.  Shift down, skip add.\n"\
341 "       ldo             16(%0),%0               ; Yes.  Perform add.\n" \
342 "       extru,=         %1,23,8,%%r0            ; Bits 15..8 zero?\n"   \
343 "       extru,tr        %1,23,8,%1              ; No.  Shift down, skip add.\n"\
344 "       ldo             8(%0),%0                ; Yes.  Perform add.\n" \
345 "       extru,=         %1,27,4,%%r0            ; Bits 7..4 zero?\n"    \
346 "       extru,tr        %1,27,4,%1              ; No.  Shift down, skip add.\n"\
347 "       ldo             4(%0),%0                ; Yes.  Perform add.\n" \
348 "       extru,=         %1,29,2,%%r0            ; Bits 3..2 zero?\n"    \
349 "       extru,tr        %1,29,2,%1              ; No.  Shift down, skip add.\n"\
350 "       ldo             2(%0),%0                ; Yes.  Perform add.\n" \
351 "       extru           %1,30,1,%1              ; Extract bit 1.\n"     \
352 "       sub             %0,%1,%0                ; Subtract it.\n"       \
353         : "=r" (count), "=r" (__tmp) : "1" (x));                        \
354   } while (0)
355 #endif
356
357 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
358 #if !defined (__zarch__)
359 #define smul_ppmm(xh, xl, m0, m1) \
360   do {                                                                  \
361     union {DItype __ll;                                                 \
362            struct {USItype __h, __l;} __i;                              \
363           } __x;                                                        \
364     __asm__ ("lr %N0,%1\n\tmr %0,%2"                                    \
365              : "=&r" (__x.__ll)                                         \
366              : "r" (m0), "r" (m1));                                     \
367     (xh) = __x.__i.__h; (xl) = __x.__i.__l;                             \
368   } while (0)
369 #define sdiv_qrnnd(q, r, n1, n0, d) \
370   do {                                                                  \
371     union {DItype __ll;                                                 \
372            struct {USItype __h, __l;} __i;                              \
373           } __x;                                                        \
374     __x.__i.__h = n1; __x.__i.__l = n0;                                 \
375     __asm__ ("dr %0,%2"                                                 \
376              : "=r" (__x.__ll)                                          \
377              : "0" (__x.__ll), "r" (d));                                \
378     (q) = __x.__i.__l; (r) = __x.__i.__h;                               \
379   } while (0)
380 #else
381 #define smul_ppmm(xh, xl, m0, m1) \
382   do {                                                                  \
383     register SItype __r0 __asm__ ("0");                                 \
384     register SItype __r1 __asm__ ("1") = (m0);                          \
385                                                                         \
386     __asm__ ("mr\t%%r0,%3"                                              \
387              : "=r" (__r0), "=r" (__r1)                                 \
388              : "r"  (__r1),  "r" (m1));                                 \
389     (xh) = __r0; (xl) = __r1;                                           \
390   } while (0)
391
392 #define sdiv_qrnnd(q, r, n1, n0, d) \
393   do {                                                                  \
394     register SItype __r0 __asm__ ("0") = (n1);                          \
395     register SItype __r1 __asm__ ("1") = (n0);                          \
396                                                                         \
397     __asm__ ("dr\t%%r0,%4"                                              \
398              : "=r" (__r0), "=r" (__r1)                                 \
399              : "r" (__r0), "r" (__r1), "r" (d));                        \
400     (q) = __r1; (r) = __r0;                                             \
401   } while (0)
402 #endif /* __zarch__ */
403 #endif
404
405 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
406 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
407   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"               \
408            : "=r" ((USItype) (sh)),                                     \
409              "=&r" ((USItype) (sl))                                     \
410            : "%0" ((USItype) (ah)),                                     \
411              "g" ((USItype) (bh)),                                      \
412              "%1" ((USItype) (al)),                                     \
413              "g" ((USItype) (bl)))
414 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
415   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"               \
416            : "=r" ((USItype) (sh)),                                     \
417              "=&r" ((USItype) (sl))                                     \
418            : "0" ((USItype) (ah)),                                      \
419              "g" ((USItype) (bh)),                                      \
420              "1" ((USItype) (al)),                                      \
421              "g" ((USItype) (bl)))
422 #define umul_ppmm(w1, w0, u, v) \
423   __asm__ ("mul{l} %3"                                                  \
424            : "=a" ((USItype) (w0)),                                     \
425              "=d" ((USItype) (w1))                                      \
426            : "%0" ((USItype) (u)),                                      \
427              "rm" ((USItype) (v)))
428 #define udiv_qrnnd(q, r, n1, n0, dv) \
429   __asm__ ("div{l} %4"                                                  \
430            : "=a" ((USItype) (q)),                                      \
431              "=d" ((USItype) (r))                                       \
432            : "0" ((USItype) (n0)),                                      \
433              "1" ((USItype) (n1)),                                      \
434              "rm" ((USItype) (dv)))
435 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
436 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
437 #define UMUL_TIME 40
438 #define UDIV_TIME 40
439 #endif /* 80x86 */
440
441 #if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
442 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
443   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"               \
444            : "=r" ((UDItype) (sh)),                                     \
445              "=&r" ((UDItype) (sl))                                     \
446            : "%0" ((UDItype) (ah)),                                     \
447              "rme" ((UDItype) (bh)),                                    \
448              "%1" ((UDItype) (al)),                                     \
449              "rme" ((UDItype) (bl)))
450 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
451   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"               \
452            : "=r" ((UDItype) (sh)),                                     \
453              "=&r" ((UDItype) (sl))                                     \
454            : "0" ((UDItype) (ah)),                                      \
455              "rme" ((UDItype) (bh)),                                    \
456              "1" ((UDItype) (al)),                                      \
457              "rme" ((UDItype) (bl)))
458 #define umul_ppmm(w1, w0, u, v) \
459   __asm__ ("mul{q} %3"                                                  \
460            : "=a" ((UDItype) (w0)),                                     \
461              "=d" ((UDItype) (w1))                                      \
462            : "%0" ((UDItype) (u)),                                      \
463              "rm" ((UDItype) (v)))
464 #define udiv_qrnnd(q, r, n1, n0, dv) \
465   __asm__ ("div{q} %4"                                                  \
466            : "=a" ((UDItype) (q)),                                      \
467              "=d" ((UDItype) (r))                                       \
468            : "0" ((UDItype) (n0)),                                      \
469              "1" ((UDItype) (n1)),                                      \
470              "rm" ((UDItype) (dv)))
471 #define count_leading_zeros(count, x)   ((count) = __builtin_clzll (x))
472 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctzll (x))
473 #define UMUL_TIME 40
474 #define UDIV_TIME 40
475 #endif /* x86_64 */
476
477 #if defined (__i960__) && W_TYPE_SIZE == 32
478 #define umul_ppmm(w1, w0, u, v) \
479   ({union {UDItype __ll;                                                \
480            struct {USItype __l, __h;} __i;                              \
481           } __xx;                                                       \
482   __asm__ ("emul        %2,%1,%0"                                       \
483            : "=d" (__xx.__ll)                                           \
484            : "%dI" ((USItype) (u)),                                     \
485              "dI" ((USItype) (v)));                                     \
486   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
487 #define __umulsidi3(u, v) \
488   ({UDItype __w;                                                        \
489     __asm__ ("emul      %2,%1,%0"                                       \
490              : "=d" (__w)                                               \
491              : "%dI" ((USItype) (u)),                                   \
492                "dI" ((USItype) (v)));                                   \
493     __w; })
494 #endif /* __i960__ */
495
496 #if defined (__ia64) && W_TYPE_SIZE == 64
497 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
498    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
499    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
500    register, which takes an extra cycle.  */
501 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
502   do {                                                                  \
503     UWtype __x;                                                         \
504     __x = (al) - (bl);                                                  \
505     if ((al) < (bl))                                                    \
506       (sh) = (ah) - (bh) - 1;                                           \
507     else                                                                \
508       (sh) = (ah) - (bh);                                               \
509     (sl) = __x;                                                         \
510   } while (0)
511
512 /* Do both product parts in assembly, since that gives better code with
513    all gcc versions.  Some callers will just use the upper part, and in
514    that situation we waste an instruction, but not any cycles.  */
515 #define umul_ppmm(ph, pl, m0, m1)                                       \
516   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"            \
517            : "=&f" (ph), "=f" (pl)                                      \
518            : "f" (m0), "f" (m1))
519 #define count_leading_zeros(count, x)                                   \
520   do {                                                                  \
521     UWtype _x = (x), _y, _a, _c;                                        \
522     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));              \
523     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));            \
524     _c = (_a - 1) << 3;                                                 \
525     _x >>= _c;                                                          \
526     if (_x >= 1 << 4)                                                   \
527       _x >>= 4, _c += 4;                                                \
528     if (_x >= 1 << 2)                                                   \
529       _x >>= 2, _c += 2;                                                \
530     _c += _x >> 1;                                                      \
531     (count) =  W_TYPE_SIZE - 1 - _c;                                    \
532   } while (0)
533 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
534    based, and we don't need a special case for x==0 here */
535 #define count_trailing_zeros(count, x)                                  \
536   do {                                                                  \
537     UWtype __ctz_x = (x);                                               \
538     __asm__ ("popcnt %0 = %1"                                           \
539              : "=r" (count)                                             \
540              : "r" ((__ctz_x-1) & ~__ctz_x));                           \
541   } while (0)
542 #define UMUL_TIME 14
543 #endif
544
545 #if defined (__M32R__) && W_TYPE_SIZE == 32
546 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
547   /* The cmp clears the condition bit.  */ \
548   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"                      \
549            : "=r" ((USItype) (sh)),                                     \
550              "=&r" ((USItype) (sl))                                     \
551            : "0" ((USItype) (ah)),                                      \
552              "r" ((USItype) (bh)),                                      \
553              "1" ((USItype) (al)),                                      \
554              "r" ((USItype) (bl))                                       \
555            : "cbit")
556 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
557   /* The cmp clears the condition bit.  */ \
558   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"                      \
559            : "=r" ((USItype) (sh)),                                     \
560              "=&r" ((USItype) (sl))                                     \
561            : "0" ((USItype) (ah)),                                      \
562              "r" ((USItype) (bh)),                                      \
563              "1" ((USItype) (al)),                                      \
564              "r" ((USItype) (bl))                                       \
565            : "cbit")
566 #endif /* __M32R__ */
567
568 #if defined (__mc68000__) && W_TYPE_SIZE == 32
569 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
570   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                              \
571            : "=d" ((USItype) (sh)),                                     \
572              "=&d" ((USItype) (sl))                                     \
573            : "%0" ((USItype) (ah)),                                     \
574              "d" ((USItype) (bh)),                                      \
575              "%1" ((USItype) (al)),                                     \
576              "g" ((USItype) (bl)))
577 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
578   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                              \
579            : "=d" ((USItype) (sh)),                                     \
580              "=&d" ((USItype) (sl))                                     \
581            : "0" ((USItype) (ah)),                                      \
582              "d" ((USItype) (bh)),                                      \
583              "1" ((USItype) (al)),                                      \
584              "g" ((USItype) (bl)))
585
586 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
587 #if (defined (__mc68020__) && !defined (__mc68060__))
588 #define umul_ppmm(w1, w0, u, v) \
589   __asm__ ("mulu%.l %3,%1:%0"                                           \
590            : "=d" ((USItype) (w0)),                                     \
591              "=d" ((USItype) (w1))                                      \
592            : "%0" ((USItype) (u)),                                      \
593              "dmi" ((USItype) (v)))
594 #define UMUL_TIME 45
595 #define udiv_qrnnd(q, r, n1, n0, d) \
596   __asm__ ("divu%.l %4,%1:%0"                                           \
597            : "=d" ((USItype) (q)),                                      \
598              "=d" ((USItype) (r))                                       \
599            : "0" ((USItype) (n0)),                                      \
600              "1" ((USItype) (n1)),                                      \
601              "dmi" ((USItype) (d)))
602 #define UDIV_TIME 90
603 #define sdiv_qrnnd(q, r, n1, n0, d) \
604   __asm__ ("divs%.l %4,%1:%0"                                           \
605            : "=d" ((USItype) (q)),                                      \
606              "=d" ((USItype) (r))                                       \
607            : "0" ((USItype) (n0)),                                      \
608              "1" ((USItype) (n1)),                                      \
609              "dmi" ((USItype) (d)))
610
611 #elif defined (__mcoldfire__) /* not mc68020 */
612
613 #define umul_ppmm(xh, xl, a, b) \
614   __asm__ ("| Inlined umul_ppmm\n"                                      \
615            "    move%.l %2,%/d0\n"                                      \
616            "    move%.l %3,%/d1\n"                                      \
617            "    move%.l %/d0,%/d2\n"                                    \
618            "    swap    %/d0\n"                                         \
619            "    move%.l %/d1,%/d3\n"                                    \
620            "    swap    %/d1\n"                                         \
621            "    move%.w %/d2,%/d4\n"                                    \
622            "    mulu    %/d3,%/d4\n"                                    \
623            "    mulu    %/d1,%/d2\n"                                    \
624            "    mulu    %/d0,%/d3\n"                                    \
625            "    mulu    %/d0,%/d1\n"                                    \
626            "    move%.l %/d4,%/d0\n"                                    \
627            "    clr%.w  %/d0\n"                                         \
628            "    swap    %/d0\n"                                         \
629            "    add%.l  %/d0,%/d2\n"                                    \
630            "    add%.l  %/d3,%/d2\n"                                    \
631            "    jcc     1f\n"                                           \
632            "    add%.l  %#65536,%/d1\n"                                 \
633            "1:  swap    %/d2\n"                                         \
634            "    moveq   %#0,%/d0\n"                                     \
635            "    move%.w %/d2,%/d0\n"                                    \
636            "    move%.w %/d4,%/d2\n"                                    \
637            "    move%.l %/d2,%1\n"                                      \
638            "    add%.l  %/d1,%/d0\n"                                    \
639            "    move%.l %/d0,%0"                                        \
640            : "=g" ((USItype) (xh)),                                     \
641              "=g" ((USItype) (xl))                                      \
642            : "g" ((USItype) (a)),                                       \
643              "g" ((USItype) (b))                                        \
644            : "d0", "d1", "d2", "d3", "d4")
645 #define UMUL_TIME 100
646 #define UDIV_TIME 400
647 #else /* not ColdFire */
648 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
649 #define umul_ppmm(xh, xl, a, b) \
650   __asm__ ("| Inlined umul_ppmm\n"                                      \
651            "    move%.l %2,%/d0\n"                                      \
652            "    move%.l %3,%/d1\n"                                      \
653            "    move%.l %/d0,%/d2\n"                                    \
654            "    swap    %/d0\n"                                         \
655            "    move%.l %/d1,%/d3\n"                                    \
656            "    swap    %/d1\n"                                         \
657            "    move%.w %/d2,%/d4\n"                                    \
658            "    mulu    %/d3,%/d4\n"                                    \
659            "    mulu    %/d1,%/d2\n"                                    \
660            "    mulu    %/d0,%/d3\n"                                    \
661            "    mulu    %/d0,%/d1\n"                                    \
662            "    move%.l %/d4,%/d0\n"                                    \
663            "    eor%.w  %/d0,%/d0\n"                                    \
664            "    swap    %/d0\n"                                         \
665            "    add%.l  %/d0,%/d2\n"                                    \
666            "    add%.l  %/d3,%/d2\n"                                    \
667            "    jcc     1f\n"                                           \
668            "    add%.l  %#65536,%/d1\n"                                 \
669            "1:  swap    %/d2\n"                                         \
670            "    moveq   %#0,%/d0\n"                                     \
671            "    move%.w %/d2,%/d0\n"                                    \
672            "    move%.w %/d4,%/d2\n"                                    \
673            "    move%.l %/d2,%1\n"                                      \
674            "    add%.l  %/d1,%/d0\n"                                    \
675            "    move%.l %/d0,%0"                                        \
676            : "=g" ((USItype) (xh)),                                     \
677              "=g" ((USItype) (xl))                                      \
678            : "g" ((USItype) (a)),                                       \
679              "g" ((USItype) (b))                                        \
680            : "d0", "d1", "d2", "d3", "d4")
681 #define UMUL_TIME 100
682 #define UDIV_TIME 400
683
684 #endif /* not mc68020 */
685
686 /* The '020, '030, '040 and '060 have bitfield insns.
687    cpu32 disguises as a 68020, but lacks them.  */
688 #if defined (__mc68020__) && !defined (__mcpu32__)
689 #define count_leading_zeros(count, x) \
690   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
691            : "=d" ((USItype) (count))                                   \
692            : "od" ((USItype) (x)), "n" (0))
693 /* Some ColdFire architectures have a ff1 instruction supported via
694    __builtin_clz. */
695 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
696 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
697 #define COUNT_LEADING_ZEROS_0 32
698 #endif
699 #endif /* mc68000 */
700
701 #if defined (__m88000__) && W_TYPE_SIZE == 32
702 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
703   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                   \
704            : "=r" ((USItype) (sh)),                                     \
705              "=&r" ((USItype) (sl))                                     \
706            : "%rJ" ((USItype) (ah)),                                    \
707              "rJ" ((USItype) (bh)),                                     \
708              "%rJ" ((USItype) (al)),                                    \
709              "rJ" ((USItype) (bl)))
710 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
711   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                   \
712            : "=r" ((USItype) (sh)),                                     \
713              "=&r" ((USItype) (sl))                                     \
714            : "rJ" ((USItype) (ah)),                                     \
715              "rJ" ((USItype) (bh)),                                     \
716              "rJ" ((USItype) (al)),                                     \
717              "rJ" ((USItype) (bl)))
718 #define count_leading_zeros(count, x) \
719   do {                                                                  \
720     USItype __cbtmp;                                                    \
721     __asm__ ("ff1 %0,%1"                                                \
722              : "=r" (__cbtmp)                                           \
723              : "r" ((USItype) (x)));                                    \
724     (count) = __cbtmp ^ 31;                                             \
725   } while (0)
726 #define COUNT_LEADING_ZEROS_0 63 /* sic */
727 #if defined (__mc88110__)
728 #define umul_ppmm(wh, wl, u, v) \
729   do {                                                                  \
730     union {UDItype __ll;                                                \
731            struct {USItype __h, __l;} __i;                              \
732           } __xx;                                                       \
733     __asm__ ("mulu.d    %0,%1,%2"                                       \
734              : "=r" (__xx.__ll)                                         \
735              : "r" ((USItype) (u)),                                     \
736                "r" ((USItype) (v)));                                    \
737     (wh) = __xx.__i.__h;                                                \
738     (wl) = __xx.__i.__l;                                                \
739   } while (0)
740 #define udiv_qrnnd(q, r, n1, n0, d) \
741   ({union {UDItype __ll;                                                \
742            struct {USItype __h, __l;} __i;                              \
743           } __xx;                                                       \
744   USItype __q;                                                          \
745   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
746   __asm__ ("divu.d %0,%1,%2"                                            \
747            : "=r" (__q)                                                 \
748            : "r" (__xx.__ll),                                           \
749              "r" ((USItype) (d)));                                      \
750   (r) = (n0) - __q * (d); (q) = __q; })
751 #define UMUL_TIME 5
752 #define UDIV_TIME 25
753 #else
754 #define UMUL_TIME 17
755 #define UDIV_TIME 150
756 #endif /* __mc88110__ */
757 #endif /* __m88000__ */
758
759 #if defined (__mn10300__)
760 # if defined (__AM33__)
761 #  define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
762 #  define umul_ppmm(w1, w0, u, v)               \
763     asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
764 #  define smul_ppmm(w1, w0, u, v)               \
765     asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
766 # else
767 #  define umul_ppmm(w1, w0, u, v)               \
768     asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
769 #  define smul_ppmm(w1, w0, u, v)               \
770     asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
771 # endif
772 # define add_ssaaaa(sh, sl, ah, al, bh, bl)     \
773   do {                                          \
774     DWunion __s, __a, __b;                      \
775     __a.s.low = (al); __a.s.high = (ah);        \
776     __b.s.low = (bl); __b.s.high = (bh);        \
777     __s.ll = __a.ll + __b.ll;                   \
778     (sl) = __s.s.low; (sh) = __s.s.high;        \
779   } while (0)
780 # define sub_ddmmss(sh, sl, ah, al, bh, bl)     \
781   do {                                          \
782     DWunion __s, __a, __b;                      \
783     __a.s.low = (al); __a.s.high = (ah);        \
784     __b.s.low = (bl); __b.s.high = (bh);        \
785     __s.ll = __a.ll - __b.ll;                   \
786     (sl) = __s.s.low; (sh) = __s.s.high;        \
787   } while (0)
788 # define udiv_qrnnd(q, r, nh, nl, d)            \
789   asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
790 # define sdiv_qrnnd(q, r, nh, nl, d)            \
791   asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
792 # define UMUL_TIME 3
793 # define UDIV_TIME 38
794 #endif
795
796 #if defined (__mips__) && W_TYPE_SIZE == 32
797 #define umul_ppmm(w1, w0, u, v)                                         \
798   do {                                                                  \
799     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
800     (w1) = (USItype) (__x >> 32);                                       \
801     (w0) = (USItype) (__x);                                             \
802   } while (0)
803 #define UMUL_TIME 10
804 #define UDIV_TIME 100
805
806 #if (__mips == 32 || __mips == 64) && ! __mips16
807 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
808 #define COUNT_LEADING_ZEROS_0 32
809 #endif
810 #endif /* __mips__ */
811
812 #if defined (__ns32000__) && W_TYPE_SIZE == 32
813 #define umul_ppmm(w1, w0, u, v) \
814   ({union {UDItype __ll;                                                \
815            struct {USItype __l, __h;} __i;                              \
816           } __xx;                                                       \
817   __asm__ ("meid %2,%0"                                                 \
818            : "=g" (__xx.__ll)                                           \
819            : "%0" ((USItype) (u)),                                      \
820              "g" ((USItype) (v)));                                      \
821   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
822 #define __umulsidi3(u, v) \
823   ({UDItype __w;                                                        \
824     __asm__ ("meid %2,%0"                                               \
825              : "=g" (__w)                                               \
826              : "%0" ((USItype) (u)),                                    \
827                "g" ((USItype) (v)));                                    \
828     __w; })
829 #define udiv_qrnnd(q, r, n1, n0, d) \
830   ({union {UDItype __ll;                                                \
831            struct {USItype __l, __h;} __i;                              \
832           } __xx;                                                       \
833   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
834   __asm__ ("deid %2,%0"                                                 \
835            : "=g" (__xx.__ll)                                           \
836            : "0" (__xx.__ll),                                           \
837              "g" ((USItype) (d)));                                      \
838   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
839 #define count_trailing_zeros(count,x) \
840   do {                                                                  \
841     __asm__ ("ffsd     %2,%0"                                           \
842             : "=r" ((USItype) (count))                                  \
843             : "0" ((USItype) 0),                                        \
844               "r" ((USItype) (x)));                                     \
845   } while (0)
846 #endif /* __ns32000__ */
847
848 /* FIXME: We should test _IBMR2 here when we add assembly support for the
849    system vendor compilers.
850    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
851    enough, since that hits ARM and m68k too.  */
852 #if (defined (_ARCH_PPC)        /* AIX */                               \
853      || defined (_ARCH_PWR)     /* AIX */                               \
854      || defined (_ARCH_COM)     /* AIX */                               \
855      || defined (__powerpc__)   /* gcc */                               \
856      || defined (__POWERPC__)   /* BEOS */                              \
857      || defined (__ppc__)       /* Darwin */                            \
858      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
859      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
860          && CPU_FAMILY == PPC)                                                \
861      ) && W_TYPE_SIZE == 32
862 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
863   do {                                                                  \
864     if (__builtin_constant_p (bh) && (bh) == 0)                         \
865       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
866              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
867     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
868       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
869              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
870     else                                                                \
871       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
872              : "=r" (sh), "=&r" (sl)                                    \
873              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
874   } while (0)
875 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
876   do {                                                                  \
877     if (__builtin_constant_p (ah) && (ah) == 0)                         \
878       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
879                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
880     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)         \
881       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
882                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
883     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
884       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
885                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
886     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
887       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
888                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
889     else                                                                \
890       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
891                : "=r" (sh), "=&r" (sl)                                  \
892                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
893   } while (0)
894 #define count_leading_zeros(count, x) \
895   __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
896 #define COUNT_LEADING_ZEROS_0 32
897 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
898   || defined (__ppc__)                                                    \
899   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
900   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
901          && CPU_FAMILY == PPC)
902 #define umul_ppmm(ph, pl, m0, m1) \
903   do {                                                                  \
904     USItype __m0 = (m0), __m1 = (m1);                                   \
905     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
906     (pl) = __m0 * __m1;                                                 \
907   } while (0)
908 #define UMUL_TIME 15
909 #define smul_ppmm(ph, pl, m0, m1) \
910   do {                                                                  \
911     SItype __m0 = (m0), __m1 = (m1);                                    \
912     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
913     (pl) = __m0 * __m1;                                                 \
914   } while (0)
915 #define SMUL_TIME 14
916 #define UDIV_TIME 120
917 #elif defined (_ARCH_PWR)
918 #define UMUL_TIME 8
919 #define smul_ppmm(xh, xl, m0, m1) \
920   __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
921 #define SMUL_TIME 4
922 #define sdiv_qrnnd(q, r, nh, nl, d) \
923   __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
924 #define UDIV_TIME 100
925 #endif
926 #endif /* 32-bit POWER architecture variants.  */
927
928 /* We should test _IBMR2 here when we add assembly support for the system
929    vendor compilers.  */
930 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
931 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
932   do {                                                                  \
933     if (__builtin_constant_p (bh) && (bh) == 0)                         \
934       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
935              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
936     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
937       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
938              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
939     else                                                                \
940       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
941              : "=r" (sh), "=&r" (sl)                                    \
942              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
943   } while (0)
944 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
945   do {                                                                  \
946     if (__builtin_constant_p (ah) && (ah) == 0)                         \
947       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
948                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
949     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)         \
950       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
951                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
952     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
953       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
954                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
955     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
956       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
957                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
958     else                                                                \
959       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
960                : "=r" (sh), "=&r" (sl)                                  \
961                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
962   } while (0)
963 #define count_leading_zeros(count, x) \
964   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
965 #define COUNT_LEADING_ZEROS_0 64
966 #define umul_ppmm(ph, pl, m0, m1) \
967   do {                                                                  \
968     UDItype __m0 = (m0), __m1 = (m1);                                   \
969     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
970     (pl) = __m0 * __m1;                                                 \
971   } while (0)
972 #define UMUL_TIME 15
973 #define smul_ppmm(ph, pl, m0, m1) \
974   do {                                                                  \
975     DItype __m0 = (m0), __m1 = (m1);                                    \
976     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
977     (pl) = __m0 * __m1;                                                 \
978   } while (0)
979 #define SMUL_TIME 14  /* ??? */
980 #define UDIV_TIME 120 /* ??? */
981 #endif /* 64-bit PowerPC.  */
982
983 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
984 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
985   __asm__ ("a %1,%5\n\tae %0,%3"                                        \
986            : "=r" ((USItype) (sh)),                                     \
987              "=&r" ((USItype) (sl))                                     \
988            : "%0" ((USItype) (ah)),                                     \
989              "r" ((USItype) (bh)),                                      \
990              "%1" ((USItype) (al)),                                     \
991              "r" ((USItype) (bl)))
992 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
993   __asm__ ("s %1,%5\n\tse %0,%3"                                        \
994            : "=r" ((USItype) (sh)),                                     \
995              "=&r" ((USItype) (sl))                                     \
996            : "0" ((USItype) (ah)),                                      \
997              "r" ((USItype) (bh)),                                      \
998              "1" ((USItype) (al)),                                      \
999              "r" ((USItype) (bl)))
1000 #define umul_ppmm(ph, pl, m0, m1) \
1001   do {                                                                  \
1002     USItype __m0 = (m0), __m1 = (m1);                                   \
1003     __asm__ (                                                           \
1004        "s       r2,r2\n"                                                \
1005 "       mts     r10,%2\n"                                               \
1006 "       m       r2,%3\n"                                                \
1007 "       m       r2,%3\n"                                                \
1008 "       m       r2,%3\n"                                                \
1009 "       m       r2,%3\n"                                                \
1010 "       m       r2,%3\n"                                                \
1011 "       m       r2,%3\n"                                                \
1012 "       m       r2,%3\n"                                                \
1013 "       m       r2,%3\n"                                                \
1014 "       m       r2,%3\n"                                                \
1015 "       m       r2,%3\n"                                                \
1016 "       m       r2,%3\n"                                                \
1017 "       m       r2,%3\n"                                                \
1018 "       m       r2,%3\n"                                                \
1019 "       m       r2,%3\n"                                                \
1020 "       m       r2,%3\n"                                                \
1021 "       m       r2,%3\n"                                                \
1022 "       cas     %0,r2,r0\n"                                             \
1023 "       mfs     r10,%1"                                                 \
1024              : "=r" ((USItype) (ph)),                                   \
1025                "=r" ((USItype) (pl))                                    \
1026              : "%r" (__m0),                                             \
1027                 "r" (__m1)                                              \
1028              : "r2");                                                   \
1029     (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
1030              + (((SItype) __m1 >> 31) & __m0));                         \
1031   } while (0)
1032 #define UMUL_TIME 20
1033 #define UDIV_TIME 200
1034 #define count_leading_zeros(count, x) \
1035   do {                                                                  \
1036     if ((x) >= 0x10000)                                                 \
1037       __asm__ ("clz     %0,%1"                                          \
1038                : "=r" ((USItype) (count))                               \
1039                : "r" ((USItype) (x) >> 16));                            \
1040     else                                                                \
1041       {                                                                 \
1042         __asm__ ("clz   %0,%1"                                          \
1043                  : "=r" ((USItype) (count))                             \
1044                  : "r" ((USItype) (x)));                                        \
1045         (count) += 16;                                                  \
1046       }                                                                 \
1047   } while (0)
1048 #endif
1049
1050 #if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
1051 #ifndef __sh1__
1052 #define umul_ppmm(w1, w0, u, v) \
1053   __asm__ (                                                             \
1054        "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0       mach,%0"        \
1055            : "=r<" ((USItype)(w1)),                                     \
1056              "=r<" ((USItype)(w0))                                      \
1057            : "r" ((USItype)(u)),                                        \
1058              "r" ((USItype)(v))                                         \
1059            : "macl", "mach")
1060 #define UMUL_TIME 5
1061 #endif
1062
1063 /* This is the same algorithm as __udiv_qrnnd_c.  */
1064 #define UDIV_NEEDS_NORMALIZATION 1
1065
1066 #define udiv_qrnnd(q, r, n1, n0, d) \
1067   do {                                                                  \
1068     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
1069                         __attribute__ ((visibility ("hidden")));        \
1070     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
1071     __asm__ (                                                           \
1072         "mov%M4 %4,r5\n"                                                \
1073 "       swap.w %3,r4\n"                                                 \
1074 "       swap.w r5,r6\n"                                                 \
1075 "       jsr @%5\n"                                                      \
1076 "       shll16 r6\n"                                                    \
1077 "       swap.w r4,r4\n"                                                 \
1078 "       jsr @%5\n"                                                      \
1079 "       swap.w r1,%0\n"                                                 \
1080 "       or r1,%0"                                                       \
1081         : "=r" (q), "=&z" (r)                                           \
1082         : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
1083         : "r1", "r2", "r4", "r5", "r6", "pr", "t");                     \
1084   } while (0)
1085
1086 #define UDIV_TIME 80
1087
1088 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1089   __asm__ ("clrt;subc %5,%1; subc %4,%0"                                \
1090            : "=r" (sh), "=r" (sl)                                       \
1091            : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
1092
1093 #endif /* __sh__ */
1094
1095 #if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1096 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1097 #define count_leading_zeros(count, x) \
1098   do                                                                    \
1099     {                                                                   \
1100       UDItype x_ = (USItype)(x);                                        \
1101       SItype c_;                                                        \
1102                                                                         \
1103       __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));                    \
1104       (count) = c_ - 31;                                                \
1105     }                                                                   \
1106   while (0)
1107 #define COUNT_LEADING_ZEROS_0 32
1108 #endif
1109
1110 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1111     && W_TYPE_SIZE == 32
1112 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1113   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                          \
1114            : "=r" ((USItype) (sh)),                                     \
1115              "=&r" ((USItype) (sl))                                     \
1116            : "%rJ" ((USItype) (ah)),                                    \
1117              "rI" ((USItype) (bh)),                                     \
1118              "%rJ" ((USItype) (al)),                                    \
1119              "rI" ((USItype) (bl))                                      \
1120            __CLOBBER_CC)
1121 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1122   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                          \
1123            : "=r" ((USItype) (sh)),                                     \
1124              "=&r" ((USItype) (sl))                                     \
1125            : "rJ" ((USItype) (ah)),                                     \
1126              "rI" ((USItype) (bh)),                                     \
1127              "rJ" ((USItype) (al)),                                     \
1128              "rI" ((USItype) (bl))                                      \
1129            __CLOBBER_CC)
1130 #if defined (__sparc_v9__)
1131 #define umul_ppmm(w1, w0, u, v) \
1132   do {                                                                  \
1133     register USItype __g1 asm ("g1");                                   \
1134     __asm__ ("umul\t%2,%3,%1\n\t"                                       \
1135              "srlx\t%1, 32, %0"                                         \
1136              : "=r" ((USItype) (w1)),                                   \
1137                "=r" (__g1)                                              \
1138              : "r" ((USItype) (u)),                                     \
1139                "r" ((USItype) (v)));                                    \
1140     (w0) = __g1;                                                        \
1141   } while (0)
1142 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1143   __asm__ ("mov\t%2,%%y\n\t"                                            \
1144            "udiv\t%3,%4,%0\n\t"                                         \
1145            "umul\t%0,%4,%1\n\t"                                         \
1146            "sub\t%3,%1,%1"                                              \
1147            : "=&r" ((USItype) (__q)),                                   \
1148              "=&r" ((USItype) (__r))                                    \
1149            : "r" ((USItype) (__n1)),                                    \
1150              "r" ((USItype) (__n0)),                                    \
1151              "r" ((USItype) (__d)))
1152 #else
1153 #if defined (__sparc_v8__)
1154 #define umul_ppmm(w1, w0, u, v) \
1155   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1156            : "=r" ((USItype) (w1)),                                     \
1157              "=r" ((USItype) (w0))                                      \
1158            : "r" ((USItype) (u)),                                       \
1159              "r" ((USItype) (v)))
1160 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1161   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1162            : "=&r" ((USItype) (__q)),                                   \
1163              "=&r" ((USItype) (__r))                                    \
1164            : "r" ((USItype) (__n1)),                                    \
1165              "r" ((USItype) (__n0)),                                    \
1166              "r" ((USItype) (__d)))
1167 #else
1168 #if defined (__sparclite__)
1169 /* This has hardware multiply but not divide.  It also has two additional
1170    instructions scan (ffs from high bit) and divscc.  */
1171 #define umul_ppmm(w1, w0, u, v) \
1172   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1173            : "=r" ((USItype) (w1)),                                     \
1174              "=r" ((USItype) (w0))                                      \
1175            : "r" ((USItype) (u)),                                       \
1176              "r" ((USItype) (v)))
1177 #define udiv_qrnnd(q, r, n1, n0, d) \
1178   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1179 "       wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
1180 "       tst     %%g0\n"                                                 \
1181 "       divscc  %3,%4,%%g1\n"                                           \
1182 "       divscc  %%g1,%4,%%g1\n"                                         \
1183 "       divscc  %%g1,%4,%%g1\n"                                         \
1184 "       divscc  %%g1,%4,%%g1\n"                                         \
1185 "       divscc  %%g1,%4,%%g1\n"                                         \
1186 "       divscc  %%g1,%4,%%g1\n"                                         \
1187 "       divscc  %%g1,%4,%%g1\n"                                         \
1188 "       divscc  %%g1,%4,%%g1\n"                                         \
1189 "       divscc  %%g1,%4,%%g1\n"                                         \
1190 "       divscc  %%g1,%4,%%g1\n"                                         \
1191 "       divscc  %%g1,%4,%%g1\n"                                         \
1192 "       divscc  %%g1,%4,%%g1\n"                                         \
1193 "       divscc  %%g1,%4,%%g1\n"                                         \
1194 "       divscc  %%g1,%4,%%g1\n"                                         \
1195 "       divscc  %%g1,%4,%%g1\n"                                         \
1196 "       divscc  %%g1,%4,%%g1\n"                                         \
1197 "       divscc  %%g1,%4,%%g1\n"                                         \
1198 "       divscc  %%g1,%4,%%g1\n"                                         \
1199 "       divscc  %%g1,%4,%%g1\n"                                         \
1200 "       divscc  %%g1,%4,%%g1\n"                                         \
1201 "       divscc  %%g1,%4,%%g1\n"                                         \
1202 "       divscc  %%g1,%4,%%g1\n"                                         \
1203 "       divscc  %%g1,%4,%%g1\n"                                         \
1204 "       divscc  %%g1,%4,%%g1\n"                                         \
1205 "       divscc  %%g1,%4,%%g1\n"                                         \
1206 "       divscc  %%g1,%4,%%g1\n"                                         \
1207 "       divscc  %%g1,%4,%%g1\n"                                         \
1208 "       divscc  %%g1,%4,%%g1\n"                                         \
1209 "       divscc  %%g1,%4,%%g1\n"                                         \
1210 "       divscc  %%g1,%4,%%g1\n"                                         \
1211 "       divscc  %%g1,%4,%%g1\n"                                         \
1212 "       divscc  %%g1,%4,%0\n"                                           \
1213 "       rd      %%y,%1\n"                                               \
1214 "       bl,a 1f\n"                                                      \
1215 "       add     %1,%4,%1\n"                                             \
1216 "1:     ! End of inline udiv_qrnnd"                                     \
1217            : "=r" ((USItype) (q)),                                      \
1218              "=r" ((USItype) (r))                                       \
1219            : "r" ((USItype) (n1)),                                      \
1220              "r" ((USItype) (n0)),                                      \
1221              "rI" ((USItype) (d))                                       \
1222            : "g1" __AND_CLOBBER_CC)
1223 #define UDIV_TIME 37
1224 #define count_leading_zeros(count, x) \
1225   do {                                                                  \
1226   __asm__ ("scan %1,1,%0"                                               \
1227            : "=r" ((USItype) (count))                                   \
1228            : "r" ((USItype) (x)));                                      \
1229   } while (0)
1230 /* Early sparclites return 63 for an argument of 0, but they warn that future
1231    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1232    undefined.  */
1233 #else
1234 /* SPARC without integer multiplication and divide instructions.
1235    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1236 #define umul_ppmm(w1, w0, u, v) \
1237   __asm__ ("! Inlined umul_ppmm\n"                                      \
1238 "       wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n"\
1239 "       sra     %3,31,%%o5      ! Don't move this insn\n"               \
1240 "       and     %2,%%o5,%%o5    ! Don't move this insn\n"               \
1241 "       andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
1242 "       mulscc  %%g1,%3,%%g1\n"                                         \
1243 "       mulscc  %%g1,%3,%%g1\n"                                         \
1244 "       mulscc  %%g1,%3,%%g1\n"                                         \
1245 "       mulscc  %%g1,%3,%%g1\n"                                         \
1246 "       mulscc  %%g1,%3,%%g1\n"                                         \
1247 "       mulscc  %%g1,%3,%%g1\n"                                         \
1248 "       mulscc  %%g1,%3,%%g1\n"                                         \
1249 "       mulscc  %%g1,%3,%%g1\n"                                         \
1250 "       mulscc  %%g1,%3,%%g1\n"                                         \
1251 "       mulscc  %%g1,%3,%%g1\n"                                         \
1252 "       mulscc  %%g1,%3,%%g1\n"                                         \
1253 "       mulscc  %%g1,%3,%%g1\n"                                         \
1254 "       mulscc  %%g1,%3,%%g1\n"                                         \
1255 "       mulscc  %%g1,%3,%%g1\n"                                         \
1256 "       mulscc  %%g1,%3,%%g1\n"                                         \
1257 "       mulscc  %%g1,%3,%%g1\n"                                         \
1258 "       mulscc  %%g1,%3,%%g1\n"                                         \
1259 "       mulscc  %%g1,%3,%%g1\n"                                         \
1260 "       mulscc  %%g1,%3,%%g1\n"                                         \
1261 "       mulscc  %%g1,%3,%%g1\n"                                         \
1262 "       mulscc  %%g1,%3,%%g1\n"                                         \
1263 "       mulscc  %%g1,%3,%%g1\n"                                         \
1264 "       mulscc  %%g1,%3,%%g1\n"                                         \
1265 "       mulscc  %%g1,%3,%%g1\n"                                         \
1266 "       mulscc  %%g1,%3,%%g1\n"                                         \
1267 "       mulscc  %%g1,%3,%%g1\n"                                         \
1268 "       mulscc  %%g1,%3,%%g1\n"                                         \
1269 "       mulscc  %%g1,%3,%%g1\n"                                         \
1270 "       mulscc  %%g1,%3,%%g1\n"                                         \
1271 "       mulscc  %%g1,%3,%%g1\n"                                         \
1272 "       mulscc  %%g1,%3,%%g1\n"                                         \
1273 "       mulscc  %%g1,%3,%%g1\n"                                         \
1274 "       mulscc  %%g1,0,%%g1\n"                                          \
1275 "       add     %%g1,%%o5,%0\n"                                         \
1276 "       rd      %%y,%1"                                                 \
1277            : "=r" ((USItype) (w1)),                                     \
1278              "=r" ((USItype) (w0))                                      \
1279            : "%rI" ((USItype) (u)),                                     \
1280              "r" ((USItype) (v))                                                \
1281            : "g1", "o5" __AND_CLOBBER_CC)
1282 #define UMUL_TIME 39            /* 39 instructions */
1283 /* It's quite necessary to add this much assembler for the sparc.
1284    The default udiv_qrnnd (in C) is more than 10 times slower!  */
1285 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1286   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1287 "       mov     32,%%g1\n"                                              \
1288 "       subcc   %1,%2,%%g0\n"                                           \
1289 "1:     bcs     5f\n"                                                   \
1290 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1291 "       sub     %1,%2,%1        ! this kills msb of n\n"                \
1292 "       addx    %1,%1,%1        ! so this can't give carry\n"           \
1293 "       subcc   %%g1,1,%%g1\n"                                          \
1294 "2:     bne     1b\n"                                                   \
1295 "        subcc  %1,%2,%%g0\n"                                           \
1296 "       bcs     3f\n"                                                   \
1297 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1298 "       b       3f\n"                                                   \
1299 "        sub    %1,%2,%1        ! this kills msb of n\n"                \
1300 "4:     sub     %1,%2,%1\n"                                             \
1301 "5:     addxcc  %1,%1,%1\n"                                             \
1302 "       bcc     2b\n"                                                   \
1303 "        subcc  %%g1,1,%%g1\n"                                          \
1304 "! Got carry from n.  Subtract next step to cancel this carry.\n"       \
1305 "       bne     4b\n"                                                   \
1306 "        addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n"      \
1307 "       sub     %1,%2,%1\n"                                             \
1308 "3:     xnor    %0,0,%0\n"                                              \
1309 "       ! End of inline udiv_qrnnd"                                     \
1310            : "=&r" ((USItype) (__q)),                                   \
1311              "=&r" ((USItype) (__r))                                    \
1312            : "r" ((USItype) (__d)),                                     \
1313              "1" ((USItype) (__n1)),                                    \
1314              "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1315 #define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1316 #endif /* __sparclite__ */
1317 #endif /* __sparc_v8__ */
1318 #endif /* __sparc_v9__ */
1319 #endif /* sparc32 */
1320
1321 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1322     && W_TYPE_SIZE == 64
1323 #define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
1324   do {                                                                  \
1325     UDItype __carry = 0;                                                \
1326     __asm__ ("addcc\t%r5,%6,%1\n\t"                                     \
1327              "add\t%r3,%4,%0\n\t"                                       \
1328              "movcs\t%%xcc, 1, %2\n\t"                                  \
1329              "add\t%0, %2, %0"                                          \
1330              : "=r" ((UDItype)(sh)),                                    \
1331                "=&r" ((UDItype)(sl)),                                   \
1332                "+r" (__carry)                                           \
1333              : "%rJ" ((UDItype)(ah)),                                   \
1334                "rI" ((UDItype)(bh)),                                    \
1335                "%rJ" ((UDItype)(al)),                                   \
1336                "rI" ((UDItype)(bl))                                     \
1337              __CLOBBER_CC);                                             \
1338   } while (0)
1339
1340 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1341   do {                                                                  \
1342     UDItype __carry = 0;                                                \
1343     __asm__ ("subcc\t%r5,%6,%1\n\t"                                     \
1344              "sub\t%r3,%4,%0\n\t"                                       \
1345              "movcs\t%%xcc, 1, %2\n\t"                                  \
1346              "sub\t%0, %2, %0"                                          \
1347              : "=r" ((UDItype)(sh)),                                    \
1348                "=&r" ((UDItype)(sl)),                                   \
1349                "+r" (__carry)                                           \
1350              : "%rJ" ((UDItype)(ah)),                                   \
1351                "rI" ((UDItype)(bh)),                                    \
1352                "%rJ" ((UDItype)(al)),                                   \
1353                "rI" ((UDItype)(bl))                                     \
1354              __CLOBBER_CC);                                             \
1355   } while (0)
1356
1357 #define umul_ppmm(wh, wl, u, v)                                         \
1358   do {                                                                  \
1359           UDItype tmp1, tmp2, tmp3, tmp4;                               \
1360           __asm__ __volatile__ (                                        \
1361                    "srl %7,0,%3\n\t"                                    \
1362                    "mulx %3,%6,%1\n\t"                                  \
1363                    "srlx %6,32,%2\n\t"                                  \
1364                    "mulx %2,%3,%4\n\t"                                  \
1365                    "sllx %4,32,%5\n\t"                                  \
1366                    "srl %6,0,%3\n\t"                                    \
1367                    "sub %1,%5,%5\n\t"                                   \
1368                    "srlx %5,32,%5\n\t"                                  \
1369                    "addcc %4,%5,%4\n\t"                                 \
1370                    "srlx %7,32,%5\n\t"                                  \
1371                    "mulx %3,%5,%3\n\t"                                  \
1372                    "mulx %2,%5,%5\n\t"                                  \
1373                    "sethi %%hi(0x80000000),%2\n\t"                      \
1374                    "addcc %4,%3,%4\n\t"                                 \
1375                    "srlx %4,32,%4\n\t"                                  \
1376                    "add %2,%2,%2\n\t"                                   \
1377                    "movcc %%xcc,%%g0,%2\n\t"                            \
1378                    "addcc %5,%4,%5\n\t"                                 \
1379                    "sllx %3,32,%3\n\t"                                  \
1380                    "add %1,%3,%1\n\t"                                   \
1381                    "add %5,%2,%0"                                       \
1382            : "=r" ((UDItype)(wh)),                                      \
1383              "=&r" ((UDItype)(wl)),                                     \
1384              "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)     \
1385            : "r" ((UDItype)(u)),                                        \
1386              "r" ((UDItype)(v))                                         \
1387            __CLOBBER_CC);                                               \
1388   } while (0)
1389 #define UMUL_TIME 96
1390 #define UDIV_TIME 230
1391 #endif /* sparc64 */
1392
1393 #if defined (__vax__) && W_TYPE_SIZE == 32
1394 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1395   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                  \
1396            : "=g" ((USItype) (sh)),                                     \
1397              "=&g" ((USItype) (sl))                                     \
1398            : "%0" ((USItype) (ah)),                                     \
1399              "g" ((USItype) (bh)),                                      \
1400              "%1" ((USItype) (al)),                                     \
1401              "g" ((USItype) (bl)))
1402 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1403   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                  \
1404            : "=g" ((USItype) (sh)),                                     \
1405              "=&g" ((USItype) (sl))                                     \
1406            : "0" ((USItype) (ah)),                                      \
1407              "g" ((USItype) (bh)),                                      \
1408              "1" ((USItype) (al)),                                      \
1409              "g" ((USItype) (bl)))
1410 #define umul_ppmm(xh, xl, m0, m1) \
1411   do {                                                                  \
1412     union {                                                             \
1413         UDItype __ll;                                                   \
1414         struct {USItype __l, __h;} __i;                                 \
1415       } __xx;                                                           \
1416     USItype __m0 = (m0), __m1 = (m1);                                   \
1417     __asm__ ("emul %1,%2,$0,%0"                                         \
1418              : "=r" (__xx.__ll)                                         \
1419              : "g" (__m0),                                              \
1420                "g" (__m1));                                             \
1421     (xh) = __xx.__i.__h;                                                \
1422     (xl) = __xx.__i.__l;                                                \
1423     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1424              + (((SItype) __m1 >> 31) & __m0));                         \
1425   } while (0)
1426 #define sdiv_qrnnd(q, r, n1, n0, d) \
1427   do {                                                                  \
1428     union {DItype __ll;                                                 \
1429            struct {SItype __l, __h;} __i;                               \
1430           } __xx;                                                       \
1431     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1432     __asm__ ("ediv %3,%2,%0,%1"                                         \
1433              : "=g" (q), "=g" (r)                                       \
1434              : "g" (__xx.__ll), "g" (d));                               \
1435   } while (0)
1436 #endif /* __vax__ */
1437
1438 #ifdef _TMS320C6X
1439 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1440   do                                                                    \
1441     {                                                                   \
1442       UDItype __ll;                                                     \
1443       __asm__ ("addu .l1 %1, %2, %0"                                    \
1444                : "=a" (__ll) : "a" (al), "a" (bl));                     \
1445       (sl) = (USItype)__ll;                                             \
1446       (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh);                     \
1447     }                                                                   \
1448   while (0)
1449
1450 #ifdef _TMS320C6400_PLUS
1451 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1452 #define umul_ppmm(w1, w0, u, v)                                         \
1453   do {                                                                  \
1454     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
1455     (w1) = (USItype) (__x >> 32);                                       \
1456     (w0) = (USItype) (__x);                                             \
1457   } while (0)
1458 #endif  /* _TMS320C6400_PLUS */
1459
1460 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
1461 #ifdef _TMS320C6400
1462 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
1463 #endif
1464 #define UMUL_TIME 4
1465 #define UDIV_TIME 40
1466 #endif /* _TMS320C6X */
1467
1468 #if defined (__xtensa__) && W_TYPE_SIZE == 32
1469 /* This code is not Xtensa-configuration-specific, so rely on the compiler
1470    to expand builtin functions depending on what configuration features
1471    are available.  This avoids library calls when the operation can be
1472    performed in-line.  */
1473 #define umul_ppmm(w1, w0, u, v)                                         \
1474   do {                                                                  \
1475     DWunion __w;                                                        \
1476     __w.ll = __builtin_umulsidi3 (u, v);                                \
1477     w1 = __w.s.high;                                                    \
1478     w0 = __w.s.low;                                                     \
1479   } while (0)
1480 #define __umulsidi3(u, v)               __builtin_umulsidi3 (u, v)
1481 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
1482 #define count_trailing_zeros(COUNT, X)  ((COUNT) = __builtin_ctz (X))
1483 #endif /* __xtensa__ */
1484
1485 #if defined xstormy16
1486 extern UHItype __stormy16_count_leading_zeros (UHItype);
1487 #define count_leading_zeros(count, x)                                   \
1488   do                                                                    \
1489     {                                                                   \
1490       UHItype size;                                                     \
1491                                                                         \
1492       /* We assume that W_TYPE_SIZE is a multiple of 16...  */          \
1493       for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)           \
1494         {                                                               \
1495           UHItype c;                                                    \
1496                                                                         \
1497           c = __clzhi2 ((x) >> (size - 16));                            \
1498           (count) += c;                                                 \
1499           if (c != 16)                                                  \
1500             break;                                                      \
1501         }                                                               \
1502     }                                                                   \
1503   while (0)
1504 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1505 #endif
1506
1507 #if defined (__z8000__) && W_TYPE_SIZE == 16
1508 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1509   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1510            : "=r" ((unsigned int)(sh)),                                 \
1511              "=&r" ((unsigned int)(sl))                                 \
1512            : "%0" ((unsigned int)(ah)),                                 \
1513              "r" ((unsigned int)(bh)),                                  \
1514              "%1" ((unsigned int)(al)),                                 \
1515              "rQR" ((unsigned int)(bl)))
1516 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1517   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1518            : "=r" ((unsigned int)(sh)),                                 \
1519              "=&r" ((unsigned int)(sl))                                 \
1520            : "0" ((unsigned int)(ah)),                                  \
1521              "r" ((unsigned int)(bh)),                                  \
1522              "1" ((unsigned int)(al)),                                  \
1523              "rQR" ((unsigned int)(bl)))
1524 #define umul_ppmm(xh, xl, m0, m1) \
1525   do {                                                                  \
1526     union {long int __ll;                                               \
1527            struct {unsigned int __h, __l;} __i;                         \
1528           } __xx;                                                       \
1529     unsigned int __m0 = (m0), __m1 = (m1);                              \
1530     __asm__ ("mult      %S0,%H3"                                        \
1531              : "=r" (__xx.__i.__h),                                     \
1532                "=r" (__xx.__i.__l)                                      \
1533              : "%1" (__m0),                                             \
1534                "rQR" (__m1));                                           \
1535     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1536     (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1537              + (((signed int) __m1 >> 15) & __m0));                     \
1538   } while (0)
1539 #endif /* __z8000__ */
1540
1541 #endif /* __GNUC__ */
1542
1543 /* If this machine has no inline assembler, use C macros.  */
1544
1545 #if !defined (add_ssaaaa)
1546 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1547   do {                                                                  \
1548     UWtype __x;                                                         \
1549     __x = (al) + (bl);                                                  \
1550     (sh) = (ah) + (bh) + (__x < (al));                                  \
1551     (sl) = __x;                                                         \
1552   } while (0)
1553 #endif
1554
1555 #if !defined (sub_ddmmss)
1556 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1557   do {                                                                  \
1558     UWtype __x;                                                         \
1559     __x = (al) - (bl);                                                  \
1560     (sh) = (ah) - (bh) - (__x > (al));                                  \
1561     (sl) = __x;                                                         \
1562   } while (0)
1563 #endif
1564
1565 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1566    smul_ppmm.  */
1567 #if !defined (umul_ppmm) && defined (smul_ppmm)
1568 #define umul_ppmm(w1, w0, u, v)                                         \
1569   do {                                                                  \
1570     UWtype __w1;                                                        \
1571     UWtype __xm0 = (u), __xm1 = (v);                                    \
1572     smul_ppmm (__w1, w0, __xm0, __xm1);                                 \
1573     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)               \
1574                 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
1575   } while (0)
1576 #endif
1577
1578 /* If we still don't have umul_ppmm, define it using plain C.  */
1579 #if !defined (umul_ppmm)
1580 #define umul_ppmm(w1, w0, u, v)                                         \
1581   do {                                                                  \
1582     UWtype __x0, __x1, __x2, __x3;                                      \
1583     UHWtype __ul, __vl, __uh, __vh;                                     \
1584                                                                         \
1585     __ul = __ll_lowpart (u);                                            \
1586     __uh = __ll_highpart (u);                                           \
1587     __vl = __ll_lowpart (v);                                            \
1588     __vh = __ll_highpart (v);                                           \
1589                                                                         \
1590     __x0 = (UWtype) __ul * __vl;                                        \
1591     __x1 = (UWtype) __ul * __vh;                                        \
1592     __x2 = (UWtype) __uh * __vl;                                        \
1593     __x3 = (UWtype) __uh * __vh;                                        \
1594                                                                         \
1595     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1596     __x1 += __x2;               /* but this indeed can */               \
1597     if (__x1 < __x2)            /* did we get it? */                    \
1598       __x3 += __ll_B;           /* yes, add it in the proper pos.  */   \
1599                                                                         \
1600     (w1) = __x3 + __ll_highpart (__x1);                                 \
1601     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);          \
1602   } while (0)
1603 #endif
1604
1605 #if !defined (__umulsidi3)
1606 #define __umulsidi3(u, v) \
1607   ({DWunion __w;                                                        \
1608     umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
1609     __w.ll; })
1610 #endif
1611
1612 /* Define this unconditionally, so it can be used for debugging.  */
1613 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1614   do {                                                                  \
1615     UWtype __d1, __d0, __q1, __q0;                                      \
1616     UWtype __r1, __r0, __m;                                             \
1617     __d1 = __ll_highpart (d);                                           \
1618     __d0 = __ll_lowpart (d);                                            \
1619                                                                         \
1620     __r1 = (n1) % __d1;                                                 \
1621     __q1 = (n1) / __d1;                                                 \
1622     __m = (UWtype) __q1 * __d0;                                         \
1623     __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1624     if (__r1 < __m)                                                     \
1625       {                                                                 \
1626         __q1--, __r1 += (d);                                            \
1627         if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1628           if (__r1 < __m)                                               \
1629             __q1--, __r1 += (d);                                        \
1630       }                                                                 \
1631     __r1 -= __m;                                                        \
1632                                                                         \
1633     __r0 = __r1 % __d1;                                                 \
1634     __q0 = __r1 / __d1;                                                 \
1635     __m = (UWtype) __q0 * __d0;                                         \
1636     __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1637     if (__r0 < __m)                                                     \
1638       {                                                                 \
1639         __q0--, __r0 += (d);                                            \
1640         if (__r0 >= (d))                                                \
1641           if (__r0 < __m)                                               \
1642             __q0--, __r0 += (d);                                        \
1643       }                                                                 \
1644     __r0 -= __m;                                                        \
1645                                                                         \
1646     (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1647     (r) = __r0;                                                         \
1648   } while (0)
1649
1650 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1651    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1652 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1653 #define udiv_qrnnd(q, r, nh, nl, d) \
1654   do {                                                                  \
1655     USItype __r;                                                        \
1656     (q) = __udiv_w_sdiv (&__r, nh, nl, d);                              \
1657     (r) = __r;                                                          \
1658   } while (0)
1659 #endif
1660
1661 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1662 #if !defined (udiv_qrnnd)
1663 #define UDIV_NEEDS_NORMALIZATION 1
1664 #define udiv_qrnnd __udiv_qrnnd_c
1665 #endif
1666
1667 #if !defined (count_leading_zeros)
1668 #define count_leading_zeros(count, x) \
1669   do {                                                                  \
1670     UWtype __xr = (x);                                                  \
1671     UWtype __a;                                                         \
1672                                                                         \
1673     if (W_TYPE_SIZE <= 32)                                              \
1674       {                                                                 \
1675         __a = __xr < ((UWtype)1<<2*__BITS4)                             \
1676           ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)                 \
1677           : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);   \
1678       }                                                                 \
1679     else                                                                \
1680       {                                                                 \
1681         for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
1682           if (((__xr >> __a) & 0xff) != 0)                              \
1683             break;                                                      \
1684       }                                                                 \
1685                                                                         \
1686     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);             \
1687   } while (0)
1688 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1689 #endif
1690
1691 #if !defined (count_trailing_zeros)
1692 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1693    defined in asm, but if it is not, the C version above is good enough.  */
1694 #define count_trailing_zeros(count, x) \
1695   do {                                                                  \
1696     UWtype __ctz_x = (x);                                               \
1697     UWtype __ctz_c;                                                     \
1698     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1699     (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1700   } while (0)
1701 #endif
1702
1703 #ifndef UDIV_NEEDS_NORMALIZATION
1704 #define UDIV_NEEDS_NORMALIZATION 0
1705 #endif