OSDN Git Service

gcc/
[pf3gnuchains/gcc-fork.git] / gcc / longlong.h
1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2    Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4    Free Software Foundation, Inc.
5
6    This file is part of the GNU C Library.
7
8    The GNU C Library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public
10    License as published by the Free Software Foundation; either
11    version 2.1 of the License, or (at your option) any later version.
12
13    In addition to the permissions in the GNU Lesser General Public
14    License, the Free Software Foundation gives you unlimited
15    permission to link the compiled version of this file into
16    combinations with other programs, and to distribute those
17    combinations without any restriction coming from the use of this
18    file.  (The Lesser General Public License restrictions do apply in
19    other respects; for example, they cover modification of the file,
20    and distribution when not linked into a combine executable.)
21
22    The GNU C Library is distributed in the hope that it will be useful,
23    but WITHOUT ANY WARRANTY; without even the implied warranty of
24    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25    Lesser General Public License for more details.
26
27    You should have received a copy of the GNU Lesser General Public
28    License along with the GNU C Library; if not, write to the Free
29    Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
30    MA 02110-1301, USA.  */
31
32 /* You have to define the following before including this file:
33
34    UWtype -- An unsigned type, default type for operations (typically a "word")
35    UHWtype -- An unsigned type, at least half the size of UWtype.
36    UDWtype -- An unsigned type, at least twice as large a UWtype
37    W_TYPE_SIZE -- size in bits of UWtype
38
39    UQItype -- Unsigned 8 bit type.
40    SItype, USItype -- Signed and unsigned 32 bit types.
41    DItype, UDItype -- Signed and unsigned 64 bit types.
42
43    On a 32 bit machine UWtype should typically be USItype;
44    on a 64 bit machine, UWtype should typically be UDItype.  */
45
46 #define __BITS4 (W_TYPE_SIZE / 4)
47 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
48 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
49 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
50
51 #ifndef W_TYPE_SIZE
52 #define W_TYPE_SIZE     32
53 #define UWtype          USItype
54 #define UHWtype         USItype
55 #define UDWtype         UDItype
56 #endif
57
58 /* Used in glibc only.  */
59 #ifndef attribute_hidden
60 #define attribute_hidden
61 #endif
62
63 extern const UQItype __clz_tab[256] attribute_hidden;
64
65 /* Define auxiliary asm macros.
66
67    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
68    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
69    word product in HIGH_PROD and LOW_PROD.
70
71    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
72    UDWtype product.  This is just a variant of umul_ppmm.
73
74    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
75    denominator) divides a UDWtype, composed by the UWtype integers
76    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
77    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
78    than DENOMINATOR for correct operation.  If, in addition, the most
79    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
80    UDIV_NEEDS_NORMALIZATION is defined to 1.
81
82    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
83    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
84    is rounded towards 0.
85
86    5) count_leading_zeros(count, x) counts the number of zero-bits from the
87    msb to the first nonzero bit in the UWtype X.  This is the number of
88    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
89    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
90
91    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
92    from the least significant end.
93
94    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
95    high_addend_2, low_addend_2) adds two UWtype integers, composed by
96    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
97    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
98    (i.e. carry out) is not stored anywhere, and is lost.
99
100    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
101    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
102    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
103    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
104    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
105    and is lost.
106
107    If any of these macros are left undefined for a particular CPU,
108    C macros are used.  */
109
110 /* The CPUs come in alphabetical order below.
111
112    Please add support for more CPUs here, or improve the current support
113    for the CPUs below!
114    (E.g. WE32100, IBM360.)  */
115
116 #if defined (__GNUC__) && !defined (NO_ASM)
117
118 /* We sometimes need to clobber "cc" with gcc2, but that would not be
119    understood by gcc1.  Use cpp to avoid major code duplication.  */
120 #if __GNUC__ < 2
121 #define __CLOBBER_CC
122 #define __AND_CLOBBER_CC
123 #else /* __GNUC__ >= 2 */
124 #define __CLOBBER_CC : "cc"
125 #define __AND_CLOBBER_CC , "cc"
126 #endif /* __GNUC__ < 2 */
127
128 #if defined (__alpha) && W_TYPE_SIZE == 64
129 #define umul_ppmm(ph, pl, m0, m1) \
130   do {                                                                  \
131     UDItype __m0 = (m0), __m1 = (m1);                                   \
132     (ph) = __builtin_alpha_umulh (__m0, __m1);                          \
133     (pl) = __m0 * __m1;                                                 \
134   } while (0)
135 #define UMUL_TIME 46
136 #ifndef LONGLONG_STANDALONE
137 #define udiv_qrnnd(q, r, n1, n0, d) \
138   do { UDItype __r;                                                     \
139     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
140     (r) = __r;                                                          \
141   } while (0)
142 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
143 #define UDIV_TIME 220
144 #endif /* LONGLONG_STANDALONE */
145 #ifdef __alpha_cix__
146 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
147 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
148 #define COUNT_LEADING_ZEROS_0 64
149 #else
150 #define count_leading_zeros(COUNT,X) \
151   do {                                                                  \
152     UDItype __xr = (X), __t, __a;                                       \
153     __t = __builtin_alpha_cmpbge (0, __xr);                             \
154     __a = __clz_tab[__t ^ 0xff] - 1;                                    \
155     __t = __builtin_alpha_extbl (__xr, __a);                            \
156     (COUNT) = 64 - (__clz_tab[__t] + __a*8);                            \
157   } while (0)
158 #define count_trailing_zeros(COUNT,X) \
159   do {                                                                  \
160     UDItype __xr = (X), __t, __a;                                       \
161     __t = __builtin_alpha_cmpbge (0, __xr);                             \
162     __t = ~__t & -~__t;                                                 \
163     __a = ((__t & 0xCC) != 0) * 2;                                      \
164     __a += ((__t & 0xF0) != 0) * 4;                                     \
165     __a += ((__t & 0xAA) != 0);                                         \
166     __t = __builtin_alpha_extbl (__xr, __a);                            \
167     __a <<= 3;                                                          \
168     __t &= -__t;                                                        \
169     __a += ((__t & 0xCC) != 0) * 2;                                     \
170     __a += ((__t & 0xF0) != 0) * 4;                                     \
171     __a += ((__t & 0xAA) != 0);                                         \
172     (COUNT) = __a;                                                      \
173   } while (0)
174 #endif /* __alpha_cix__ */
175 #endif /* __alpha */
176
177 #if defined (__arc__) && W_TYPE_SIZE == 32
178 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
179   __asm__ ("add.f       %1, %4, %5\n\tadc       %0, %2, %3"             \
180            : "=r" ((USItype) (sh)),                                     \
181              "=&r" ((USItype) (sl))                                     \
182            : "%r" ((USItype) (ah)),                                     \
183              "rIJ" ((USItype) (bh)),                                    \
184              "%r" ((USItype) (al)),                                     \
185              "rIJ" ((USItype) (bl)))
186 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
187   __asm__ ("sub.f       %1, %4, %5\n\tsbc       %0, %2, %3"             \
188            : "=r" ((USItype) (sh)),                                     \
189              "=&r" ((USItype) (sl))                                     \
190            : "r" ((USItype) (ah)),                                      \
191              "rIJ" ((USItype) (bh)),                                    \
192              "r" ((USItype) (al)),                                      \
193              "rIJ" ((USItype) (bl)))
194 /* Call libgcc routine.  */
195 #define umul_ppmm(w1, w0, u, v) \
196 do {                                                                    \
197   DWunion __w;                                                          \
198   __w.ll = __umulsidi3 (u, v);                                          \
199   w1 = __w.s.high;                                                      \
200   w0 = __w.s.low;                                                       \
201 } while (0)
202 #define __umulsidi3 __umulsidi3
203 UDItype __umulsidi3 (USItype, USItype);
204 #endif
205
206 #if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
207 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
208   __asm__ ("adds        %1, %4, %5\n\tadc       %0, %2, %3"             \
209            : "=r" ((USItype) (sh)),                                     \
210              "=&r" ((USItype) (sl))                                     \
211            : "%r" ((USItype) (ah)),                                     \
212              "rI" ((USItype) (bh)),                                     \
213              "%r" ((USItype) (al)),                                     \
214              "rI" ((USItype) (bl)) __CLOBBER_CC)
215 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
216   __asm__ ("subs        %1, %4, %5\n\tsbc       %0, %2, %3"             \
217            : "=r" ((USItype) (sh)),                                     \
218              "=&r" ((USItype) (sl))                                     \
219            : "r" ((USItype) (ah)),                                      \
220              "rI" ((USItype) (bh)),                                     \
221              "r" ((USItype) (al)),                                      \
222              "rI" ((USItype) (bl)) __CLOBBER_CC)
223 #define umul_ppmm(xh, xl, a, b) \
224 {register USItype __t0, __t1, __t2;                                     \
225   __asm__ ("%@ Inlined umul_ppmm\n"                                     \
226            "    mov     %2, %5, lsr #16\n"                              \
227            "    mov     %0, %6, lsr #16\n"                              \
228            "    bic     %3, %5, %2, lsl #16\n"                          \
229            "    bic     %4, %6, %0, lsl #16\n"                          \
230            "    mul     %1, %3, %4\n"                                   \
231            "    mul     %4, %2, %4\n"                                   \
232            "    mul     %3, %0, %3\n"                                   \
233            "    mul     %0, %2, %0\n"                                   \
234            "    adds    %3, %4, %3\n"                                   \
235            "    addcs   %0, %0, #65536\n"                               \
236            "    adds    %1, %1, %3, lsl #16\n"                          \
237            "    adc     %0, %0, %3, lsr #16"                            \
238            : "=&r" ((USItype) (xh)),                                    \
239              "=r" ((USItype) (xl)),                                     \
240              "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
241            : "r" ((USItype) (a)),                                       \
242              "r" ((USItype) (b)) __CLOBBER_CC );}
243 #define UMUL_TIME 20
244 #define UDIV_TIME 100
245 #endif /* __arm__ */
246
247 #if defined(__arm__)
248 /* Let gcc decide how best to implement count_leading_zeros.  */
249 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
250 #define COUNT_LEADING_ZEROS_0 32
251 #endif
252
253 #if defined (__AVR__) && W_TYPE_SIZE == 32
254 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzl (X))
255 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
256 #define COUNT_LEADING_ZEROS_0 32
257 #endif /* defined (__AVR__) && W_TYPE_SIZE == 32 */
258
259 #if defined (__CRIS__) && __CRIS_arch_version >= 3
260 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
261 #if __CRIS_arch_version >= 8
262 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
263 #endif
264 #endif /* __CRIS__ */
265
266 #if defined (__hppa) && W_TYPE_SIZE == 32
267 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
268   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"                              \
269            : "=r" ((USItype) (sh)),                                     \
270              "=&r" ((USItype) (sl))                                     \
271            : "%rM" ((USItype) (ah)),                                    \
272              "rM" ((USItype) (bh)),                                     \
273              "%rM" ((USItype) (al)),                                    \
274              "rM" ((USItype) (bl)))
275 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
276   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"                              \
277            : "=r" ((USItype) (sh)),                                     \
278              "=&r" ((USItype) (sl))                                     \
279            : "rM" ((USItype) (ah)),                                     \
280              "rM" ((USItype) (bh)),                                     \
281              "rM" ((USItype) (al)),                                     \
282              "rM" ((USItype) (bl)))
283 #if defined (_PA_RISC1_1)
284 #define umul_ppmm(w1, w0, u, v) \
285   do {                                                                  \
286     union                                                               \
287       {                                                                 \
288         UDItype __f;                                                    \
289         struct {USItype __w1, __w0;} __w1w0;                            \
290       } __t;                                                            \
291     __asm__ ("xmpyu %1,%2,%0"                                           \
292              : "=x" (__t.__f)                                           \
293              : "x" ((USItype) (u)),                                     \
294                "x" ((USItype) (v)));                                    \
295     (w1) = __t.__w1w0.__w1;                                             \
296     (w0) = __t.__w1w0.__w0;                                             \
297      } while (0)
298 #define UMUL_TIME 8
299 #else
300 #define UMUL_TIME 30
301 #endif
302 #define UDIV_TIME 40
303 #define count_leading_zeros(count, x) \
304   do {                                                                  \
305     USItype __tmp;                                                      \
306     __asm__ (                                                           \
307        "ldi             1,%0\n"                                         \
308 "       extru,=         %1,15,16,%%r0           ; Bits 31..16 zero?\n"  \
309 "       extru,tr        %1,15,16,%1             ; No.  Shift down, skip add.\n"\
310 "       ldo             16(%0),%0               ; Yes.  Perform add.\n" \
311 "       extru,=         %1,23,8,%%r0            ; Bits 15..8 zero?\n"   \
312 "       extru,tr        %1,23,8,%1              ; No.  Shift down, skip add.\n"\
313 "       ldo             8(%0),%0                ; Yes.  Perform add.\n" \
314 "       extru,=         %1,27,4,%%r0            ; Bits 7..4 zero?\n"    \
315 "       extru,tr        %1,27,4,%1              ; No.  Shift down, skip add.\n"\
316 "       ldo             4(%0),%0                ; Yes.  Perform add.\n" \
317 "       extru,=         %1,29,2,%%r0            ; Bits 3..2 zero?\n"    \
318 "       extru,tr        %1,29,2,%1              ; No.  Shift down, skip add.\n"\
319 "       ldo             2(%0),%0                ; Yes.  Perform add.\n" \
320 "       extru           %1,30,1,%1              ; Extract bit 1.\n"     \
321 "       sub             %0,%1,%0                ; Subtract it.\n"       \
322         : "=r" (count), "=r" (__tmp) : "1" (x));                        \
323   } while (0)
324 #endif
325
326 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
327 #if !defined (__zarch__)
328 #define smul_ppmm(xh, xl, m0, m1) \
329   do {                                                                  \
330     union {DItype __ll;                                                 \
331            struct {USItype __h, __l;} __i;                              \
332           } __x;                                                        \
333     __asm__ ("lr %N0,%1\n\tmr %0,%2"                                    \
334              : "=&r" (__x.__ll)                                         \
335              : "r" (m0), "r" (m1));                                     \
336     (xh) = __x.__i.__h; (xl) = __x.__i.__l;                             \
337   } while (0)
338 #define sdiv_qrnnd(q, r, n1, n0, d) \
339   do {                                                                  \
340     union {DItype __ll;                                                 \
341            struct {USItype __h, __l;} __i;                              \
342           } __x;                                                        \
343     __x.__i.__h = n1; __x.__i.__l = n0;                                 \
344     __asm__ ("dr %0,%2"                                                 \
345              : "=r" (__x.__ll)                                          \
346              : "0" (__x.__ll), "r" (d));                                \
347     (q) = __x.__i.__l; (r) = __x.__i.__h;                               \
348   } while (0)
349 #else
350 #define smul_ppmm(xh, xl, m0, m1) \
351   do {                                                                  \
352     register SItype r0 __asm__ ("0");                                   \
353     register SItype r1 __asm__ ("1") = m0;                              \
354                                                                         \
355     __asm__ ("mr\t%%r0,%3"                                              \
356              : "=r" (r0), "=r" (r1)                                     \
357              : "r"  (r1),  "r" (m1));                                   \
358     (xh) = r0; (xl) = r1;                                               \
359   } while (0)
360 #define sdiv_qrnnd(q, r, n1, n0, d) \
361   do {                                                                  \
362     register SItype r0 __asm__ ("0") = n0;                              \
363     register SItype r1 __asm__ ("1") = n1;                              \
364                                                                         \
365     __asm__ ("dr\t%%r0,%3"                                              \
366              : "=r" (r0), "=r" (r1)                                     \
367              : "r" (r0), "r" (r1), "r" (d));                            \
368     (q) = r0; (r) = r1;                                                 \
369   } while (0)
370 #endif /* __zarch__ */
371 #endif
372
373 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
374 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
375   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"               \
376            : "=r" ((USItype) (sh)),                                     \
377              "=&r" ((USItype) (sl))                                     \
378            : "%0" ((USItype) (ah)),                                     \
379              "g" ((USItype) (bh)),                                      \
380              "%1" ((USItype) (al)),                                     \
381              "g" ((USItype) (bl)))
382 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
383   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"               \
384            : "=r" ((USItype) (sh)),                                     \
385              "=&r" ((USItype) (sl))                                     \
386            : "0" ((USItype) (ah)),                                      \
387              "g" ((USItype) (bh)),                                      \
388              "1" ((USItype) (al)),                                      \
389              "g" ((USItype) (bl)))
390 #define umul_ppmm(w1, w0, u, v) \
391   __asm__ ("mul{l} %3"                                                  \
392            : "=a" ((USItype) (w0)),                                     \
393              "=d" ((USItype) (w1))                                      \
394            : "%0" ((USItype) (u)),                                      \
395              "rm" ((USItype) (v)))
396 #define udiv_qrnnd(q, r, n1, n0, dv) \
397   __asm__ ("div{l} %4"                                                  \
398            : "=a" ((USItype) (q)),                                      \
399              "=d" ((USItype) (r))                                       \
400            : "0" ((USItype) (n0)),                                      \
401              "1" ((USItype) (n1)),                                      \
402              "rm" ((USItype) (dv)))
403 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
404 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
405 #define UMUL_TIME 40
406 #define UDIV_TIME 40
407 #endif /* 80x86 */
408
409 #if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
410 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
411   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"               \
412            : "=r" ((UDItype) (sh)),                                     \
413              "=&r" ((UDItype) (sl))                                     \
414            : "%0" ((UDItype) (ah)),                                     \
415              "rme" ((UDItype) (bh)),                                    \
416              "%1" ((UDItype) (al)),                                     \
417              "rme" ((UDItype) (bl)))
418 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
419   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"               \
420            : "=r" ((UDItype) (sh)),                                     \
421              "=&r" ((UDItype) (sl))                                     \
422            : "0" ((UDItype) (ah)),                                      \
423              "rme" ((UDItype) (bh)),                                    \
424              "1" ((UDItype) (al)),                                      \
425              "rme" ((UDItype) (bl)))
426 #define umul_ppmm(w1, w0, u, v) \
427   __asm__ ("mul{q} %3"                                                  \
428            : "=a" ((UDItype) (w0)),                                     \
429              "=d" ((UDItype) (w1))                                      \
430            : "%0" ((UDItype) (u)),                                      \
431              "rm" ((UDItype) (v)))
432 #define udiv_qrnnd(q, r, n1, n0, dv) \
433   __asm__ ("div{q} %4"                                                  \
434            : "=a" ((UDItype) (q)),                                      \
435              "=d" ((UDItype) (r))                                       \
436            : "0" ((UDItype) (n0)),                                      \
437              "1" ((UDItype) (n1)),                                      \
438              "rm" ((UDItype) (dv)))
439 #define count_leading_zeros(count, x)   ((count) = __builtin_clzl (x))
440 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctzl (x))
441 #define UMUL_TIME 40
442 #define UDIV_TIME 40
443 #endif /* x86_64 */
444
445 #if defined (__i960__) && W_TYPE_SIZE == 32
446 #define umul_ppmm(w1, w0, u, v) \
447   ({union {UDItype __ll;                                                \
448            struct {USItype __l, __h;} __i;                              \
449           } __xx;                                                       \
450   __asm__ ("emul        %2,%1,%0"                                       \
451            : "=d" (__xx.__ll)                                           \
452            : "%dI" ((USItype) (u)),                                     \
453              "dI" ((USItype) (v)));                                     \
454   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
455 #define __umulsidi3(u, v) \
456   ({UDItype __w;                                                        \
457     __asm__ ("emul      %2,%1,%0"                                       \
458              : "=d" (__w)                                               \
459              : "%dI" ((USItype) (u)),                                   \
460                "dI" ((USItype) (v)));                                   \
461     __w; })
462 #endif /* __i960__ */
463
464 #if defined (__ia64) && W_TYPE_SIZE == 64
465 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
466    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
467    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
468    register, which takes an extra cycle.  */
469 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
470   do {                                                                  \
471     UWtype __x;                                                         \
472     __x = (al) - (bl);                                                  \
473     if ((al) < (bl))                                                    \
474       (sh) = (ah) - (bh) - 1;                                           \
475     else                                                                \
476       (sh) = (ah) - (bh);                                               \
477     (sl) = __x;                                                         \
478   } while (0)
479
480 /* Do both product parts in assembly, since that gives better code with
481    all gcc versions.  Some callers will just use the upper part, and in
482    that situation we waste an instruction, but not any cycles.  */
483 #define umul_ppmm(ph, pl, m0, m1)                                       \
484   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"            \
485            : "=&f" (ph), "=f" (pl)                                      \
486            : "f" (m0), "f" (m1))
487 #define count_leading_zeros(count, x)                                   \
488   do {                                                                  \
489     UWtype _x = (x), _y, _a, _c;                                        \
490     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));              \
491     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));            \
492     _c = (_a - 1) << 3;                                                 \
493     _x >>= _c;                                                          \
494     if (_x >= 1 << 4)                                                   \
495       _x >>= 4, _c += 4;                                                \
496     if (_x >= 1 << 2)                                                   \
497       _x >>= 2, _c += 2;                                                \
498     _c += _x >> 1;                                                      \
499     (count) =  W_TYPE_SIZE - 1 - _c;                                    \
500   } while (0)
501 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
502    based, and we don't need a special case for x==0 here */
503 #define count_trailing_zeros(count, x)                                  \
504   do {                                                                  \
505     UWtype __ctz_x = (x);                                               \
506     __asm__ ("popcnt %0 = %1"                                           \
507              : "=r" (count)                                             \
508              : "r" ((__ctz_x-1) & ~__ctz_x));                           \
509   } while (0)
510 #define UMUL_TIME 14
511 #endif
512
513 #if defined (__M32R__) && W_TYPE_SIZE == 32
514 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
515   /* The cmp clears the condition bit.  */ \
516   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"                      \
517            : "=r" ((USItype) (sh)),                                     \
518              "=&r" ((USItype) (sl))                                     \
519            : "0" ((USItype) (ah)),                                      \
520              "r" ((USItype) (bh)),                                      \
521              "1" ((USItype) (al)),                                      \
522              "r" ((USItype) (bl))                                       \
523            : "cbit")
524 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
525   /* The cmp clears the condition bit.  */ \
526   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"                      \
527            : "=r" ((USItype) (sh)),                                     \
528              "=&r" ((USItype) (sl))                                     \
529            : "0" ((USItype) (ah)),                                      \
530              "r" ((USItype) (bh)),                                      \
531              "1" ((USItype) (al)),                                      \
532              "r" ((USItype) (bl))                                       \
533            : "cbit")
534 #endif /* __M32R__ */
535
536 #if defined (__mc68000__) && W_TYPE_SIZE == 32
537 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
538   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                              \
539            : "=d" ((USItype) (sh)),                                     \
540              "=&d" ((USItype) (sl))                                     \
541            : "%0" ((USItype) (ah)),                                     \
542              "d" ((USItype) (bh)),                                      \
543              "%1" ((USItype) (al)),                                     \
544              "g" ((USItype) (bl)))
545 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
546   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                              \
547            : "=d" ((USItype) (sh)),                                     \
548              "=&d" ((USItype) (sl))                                     \
549            : "0" ((USItype) (ah)),                                      \
550              "d" ((USItype) (bh)),                                      \
551              "1" ((USItype) (al)),                                      \
552              "g" ((USItype) (bl)))
553
554 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
555 #if (defined (__mc68020__) && !defined (__mc68060__))
556 #define umul_ppmm(w1, w0, u, v) \
557   __asm__ ("mulu%.l %3,%1:%0"                                           \
558            : "=d" ((USItype) (w0)),                                     \
559              "=d" ((USItype) (w1))                                      \
560            : "%0" ((USItype) (u)),                                      \
561              "dmi" ((USItype) (v)))
562 #define UMUL_TIME 45
563 #define udiv_qrnnd(q, r, n1, n0, d) \
564   __asm__ ("divu%.l %4,%1:%0"                                           \
565            : "=d" ((USItype) (q)),                                      \
566              "=d" ((USItype) (r))                                       \
567            : "0" ((USItype) (n0)),                                      \
568              "1" ((USItype) (n1)),                                      \
569              "dmi" ((USItype) (d)))
570 #define UDIV_TIME 90
571 #define sdiv_qrnnd(q, r, n1, n0, d) \
572   __asm__ ("divs%.l %4,%1:%0"                                           \
573            : "=d" ((USItype) (q)),                                      \
574              "=d" ((USItype) (r))                                       \
575            : "0" ((USItype) (n0)),                                      \
576              "1" ((USItype) (n1)),                                      \
577              "dmi" ((USItype) (d)))
578
579 #elif defined (__mcoldfire__) /* not mc68020 */
580
581 #define umul_ppmm(xh, xl, a, b) \
582   __asm__ ("| Inlined umul_ppmm\n"                                      \
583            "    move%.l %2,%/d0\n"                                      \
584            "    move%.l %3,%/d1\n"                                      \
585            "    move%.l %/d0,%/d2\n"                                    \
586            "    swap    %/d0\n"                                         \
587            "    move%.l %/d1,%/d3\n"                                    \
588            "    swap    %/d1\n"                                         \
589            "    move%.w %/d2,%/d4\n"                                    \
590            "    mulu    %/d3,%/d4\n"                                    \
591            "    mulu    %/d1,%/d2\n"                                    \
592            "    mulu    %/d0,%/d3\n"                                    \
593            "    mulu    %/d0,%/d1\n"                                    \
594            "    move%.l %/d4,%/d0\n"                                    \
595            "    clr%.w  %/d0\n"                                         \
596            "    swap    %/d0\n"                                         \
597            "    add%.l  %/d0,%/d2\n"                                    \
598            "    add%.l  %/d3,%/d2\n"                                    \
599            "    jcc     1f\n"                                           \
600            "    add%.l  %#65536,%/d1\n"                                 \
601            "1:  swap    %/d2\n"                                         \
602            "    moveq   %#0,%/d0\n"                                     \
603            "    move%.w %/d2,%/d0\n"                                    \
604            "    move%.w %/d4,%/d2\n"                                    \
605            "    move%.l %/d2,%1\n"                                      \
606            "    add%.l  %/d1,%/d0\n"                                    \
607            "    move%.l %/d0,%0"                                        \
608            : "=g" ((USItype) (xh)),                                     \
609              "=g" ((USItype) (xl))                                      \
610            : "g" ((USItype) (a)),                                       \
611              "g" ((USItype) (b))                                        \
612            : "d0", "d1", "d2", "d3", "d4")
613 #define UMUL_TIME 100
614 #define UDIV_TIME 400
615 #else /* not ColdFire */
616 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
617 #define umul_ppmm(xh, xl, a, b) \
618   __asm__ ("| Inlined umul_ppmm\n"                                      \
619            "    move%.l %2,%/d0\n"                                      \
620            "    move%.l %3,%/d1\n"                                      \
621            "    move%.l %/d0,%/d2\n"                                    \
622            "    swap    %/d0\n"                                         \
623            "    move%.l %/d1,%/d3\n"                                    \
624            "    swap    %/d1\n"                                         \
625            "    move%.w %/d2,%/d4\n"                                    \
626            "    mulu    %/d3,%/d4\n"                                    \
627            "    mulu    %/d1,%/d2\n"                                    \
628            "    mulu    %/d0,%/d3\n"                                    \
629            "    mulu    %/d0,%/d1\n"                                    \
630            "    move%.l %/d4,%/d0\n"                                    \
631            "    eor%.w  %/d0,%/d0\n"                                    \
632            "    swap    %/d0\n"                                         \
633            "    add%.l  %/d0,%/d2\n"                                    \
634            "    add%.l  %/d3,%/d2\n"                                    \
635            "    jcc     1f\n"                                           \
636            "    add%.l  %#65536,%/d1\n"                                 \
637            "1:  swap    %/d2\n"                                         \
638            "    moveq   %#0,%/d0\n"                                     \
639            "    move%.w %/d2,%/d0\n"                                    \
640            "    move%.w %/d4,%/d2\n"                                    \
641            "    move%.l %/d2,%1\n"                                      \
642            "    add%.l  %/d1,%/d0\n"                                    \
643            "    move%.l %/d0,%0"                                        \
644            : "=g" ((USItype) (xh)),                                     \
645              "=g" ((USItype) (xl))                                      \
646            : "g" ((USItype) (a)),                                       \
647              "g" ((USItype) (b))                                        \
648            : "d0", "d1", "d2", "d3", "d4")
649 #define UMUL_TIME 100
650 #define UDIV_TIME 400
651
652 #endif /* not mc68020 */
653
654 /* The '020, '030, '040 and '060 have bitfield insns.
655    cpu32 disguises as a 68020, but lacks them.  */
656 #if defined (__mc68020__) && !defined (__mcpu32__)
657 #define count_leading_zeros(count, x) \
658   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
659            : "=d" ((USItype) (count))                                   \
660            : "od" ((USItype) (x)), "n" (0))
661 /* Some ColdFire architectures have a ff1 instruction supported via
662    __builtin_clz. */
663 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
664 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
665 #define COUNT_LEADING_ZEROS_0 32
666 #endif
667 #endif /* mc68000 */
668
669 #if defined (__m88000__) && W_TYPE_SIZE == 32
670 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
671   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                   \
672            : "=r" ((USItype) (sh)),                                     \
673              "=&r" ((USItype) (sl))                                     \
674            : "%rJ" ((USItype) (ah)),                                    \
675              "rJ" ((USItype) (bh)),                                     \
676              "%rJ" ((USItype) (al)),                                    \
677              "rJ" ((USItype) (bl)))
678 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
679   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                   \
680            : "=r" ((USItype) (sh)),                                     \
681              "=&r" ((USItype) (sl))                                     \
682            : "rJ" ((USItype) (ah)),                                     \
683              "rJ" ((USItype) (bh)),                                     \
684              "rJ" ((USItype) (al)),                                     \
685              "rJ" ((USItype) (bl)))
686 #define count_leading_zeros(count, x) \
687   do {                                                                  \
688     USItype __cbtmp;                                                    \
689     __asm__ ("ff1 %0,%1"                                                \
690              : "=r" (__cbtmp)                                           \
691              : "r" ((USItype) (x)));                                    \
692     (count) = __cbtmp ^ 31;                                             \
693   } while (0)
694 #define COUNT_LEADING_ZEROS_0 63 /* sic */
695 #if defined (__mc88110__)
696 #define umul_ppmm(wh, wl, u, v) \
697   do {                                                                  \
698     union {UDItype __ll;                                                \
699            struct {USItype __h, __l;} __i;                              \
700           } __xx;                                                       \
701     __asm__ ("mulu.d    %0,%1,%2"                                       \
702              : "=r" (__xx.__ll)                                         \
703              : "r" ((USItype) (u)),                                     \
704                "r" ((USItype) (v)));                                    \
705     (wh) = __xx.__i.__h;                                                \
706     (wl) = __xx.__i.__l;                                                \
707   } while (0)
708 #define udiv_qrnnd(q, r, n1, n0, d) \
709   ({union {UDItype __ll;                                                \
710            struct {USItype __h, __l;} __i;                              \
711           } __xx;                                                       \
712   USItype __q;                                                          \
713   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
714   __asm__ ("divu.d %0,%1,%2"                                            \
715            : "=r" (__q)                                                 \
716            : "r" (__xx.__ll),                                           \
717              "r" ((USItype) (d)));                                      \
718   (r) = (n0) - __q * (d); (q) = __q; })
719 #define UMUL_TIME 5
720 #define UDIV_TIME 25
721 #else
722 #define UMUL_TIME 17
723 #define UDIV_TIME 150
724 #endif /* __mc88110__ */
725 #endif /* __m88000__ */
726
727 #if defined (__mn10300__)
728 # if defined (__AM33__)
729 #  define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
730 #  define umul_ppmm(w1, w0, u, v)               \
731     asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
732 #  define smul_ppmm(w1, w0, u, v)               \
733     asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
734 # else
735 #  define umul_ppmm(w1, w0, u, v)               \
736     asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
737 #  define smul_ppmm(w1, w0, u, v)               \
738     asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
739 # endif
740 # define add_ssaaaa(sh, sl, ah, al, bh, bl)     \
741   do {                                          \
742     DWunion __s, __a, __b;                      \
743     __a.s.low = (al); __a.s.high = (ah);        \
744     __b.s.low = (bl); __b.s.high = (bh);        \
745     __s.ll = __a.ll + __b.ll;                   \
746     (sl) = __s.s.low; (sh) = __s.s.high;        \
747   } while (0)
748 # define sub_ddmmss(sh, sl, ah, al, bh, bl)     \
749   do {                                          \
750     DWunion __s, __a, __b;                      \
751     __a.s.low = (al); __a.s.high = (ah);        \
752     __b.s.low = (bl); __b.s.high = (bh);        \
753     __s.ll = __a.ll - __b.ll;                   \
754     (sl) = __s.s.low; (sh) = __s.s.high;        \
755   } while (0)
756 # define udiv_qrnnd(q, r, nh, nl, d)            \
757   asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
758 # define sdiv_qrnnd(q, r, nh, nl, d)            \
759   asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
760 # define UMUL_TIME 3
761 # define UDIV_TIME 38
762 #endif
763
764 #if defined (__mips__) && W_TYPE_SIZE == 32
765 #define umul_ppmm(w1, w0, u, v)                                         \
766   do {                                                                  \
767     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
768     (w1) = (USItype) (__x >> 32);                                       \
769     (w0) = (USItype) (__x);                                             \
770   } while (0)
771 #define UMUL_TIME 10
772 #define UDIV_TIME 100
773
774 #if (__mips == 32 || __mips == 64) && ! __mips16
775 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
776 #define COUNT_LEADING_ZEROS_0 32
777 #endif
778 #endif /* __mips__ */
779
780 #if defined (__ns32000__) && W_TYPE_SIZE == 32
781 #define umul_ppmm(w1, w0, u, v) \
782   ({union {UDItype __ll;                                                \
783            struct {USItype __l, __h;} __i;                              \
784           } __xx;                                                       \
785   __asm__ ("meid %2,%0"                                                 \
786            : "=g" (__xx.__ll)                                           \
787            : "%0" ((USItype) (u)),                                      \
788              "g" ((USItype) (v)));                                      \
789   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
790 #define __umulsidi3(u, v) \
791   ({UDItype __w;                                                        \
792     __asm__ ("meid %2,%0"                                               \
793              : "=g" (__w)                                               \
794              : "%0" ((USItype) (u)),                                    \
795                "g" ((USItype) (v)));                                    \
796     __w; })
797 #define udiv_qrnnd(q, r, n1, n0, d) \
798   ({union {UDItype __ll;                                                \
799            struct {USItype __l, __h;} __i;                              \
800           } __xx;                                                       \
801   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
802   __asm__ ("deid %2,%0"                                                 \
803            : "=g" (__xx.__ll)                                           \
804            : "0" (__xx.__ll),                                           \
805              "g" ((USItype) (d)));                                      \
806   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
807 #define count_trailing_zeros(count,x) \
808   do {                                                                  \
809     __asm__ ("ffsd     %2,%0"                                           \
810             : "=r" ((USItype) (count))                                  \
811             : "0" ((USItype) 0),                                        \
812               "r" ((USItype) (x)));                                     \
813   } while (0)
814 #endif /* __ns32000__ */
815
816 /* FIXME: We should test _IBMR2 here when we add assembly support for the
817    system vendor compilers.
818    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
819    enough, since that hits ARM and m68k too.  */
820 #if (defined (_ARCH_PPC)        /* AIX */                               \
821      || defined (_ARCH_PWR)     /* AIX */                               \
822      || defined (_ARCH_COM)     /* AIX */                               \
823      || defined (__powerpc__)   /* gcc */                               \
824      || defined (__POWERPC__)   /* BEOS */                              \
825      || defined (__ppc__)       /* Darwin */                            \
826      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
827      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
828          && CPU_FAMILY == PPC)                                                \
829      ) && W_TYPE_SIZE == 32
830 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
831   do {                                                                  \
832     if (__builtin_constant_p (bh) && (bh) == 0)                         \
833       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
834              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
835     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
836       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
837              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
838     else                                                                \
839       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
840              : "=r" (sh), "=&r" (sl)                                    \
841              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
842   } while (0)
843 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
844   do {                                                                  \
845     if (__builtin_constant_p (ah) && (ah) == 0)                         \
846       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
847                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
848     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)         \
849       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
850                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
851     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
852       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
853                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
854     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
855       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
856                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
857     else                                                                \
858       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
859                : "=r" (sh), "=&r" (sl)                                  \
860                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
861   } while (0)
862 #define count_leading_zeros(count, x) \
863   __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
864 #define COUNT_LEADING_ZEROS_0 32
865 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
866   || defined (__ppc__)                                                    \
867   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
868   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
869          && CPU_FAMILY == PPC)
870 #define umul_ppmm(ph, pl, m0, m1) \
871   do {                                                                  \
872     USItype __m0 = (m0), __m1 = (m1);                                   \
873     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
874     (pl) = __m0 * __m1;                                                 \
875   } while (0)
876 #define UMUL_TIME 15
877 #define smul_ppmm(ph, pl, m0, m1) \
878   do {                                                                  \
879     SItype __m0 = (m0), __m1 = (m1);                                    \
880     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
881     (pl) = __m0 * __m1;                                                 \
882   } while (0)
883 #define SMUL_TIME 14
884 #define UDIV_TIME 120
885 #elif defined (_ARCH_PWR)
886 #define UMUL_TIME 8
887 #define smul_ppmm(xh, xl, m0, m1) \
888   __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
889 #define SMUL_TIME 4
890 #define sdiv_qrnnd(q, r, nh, nl, d) \
891   __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
892 #define UDIV_TIME 100
893 #endif
894 #endif /* 32-bit POWER architecture variants.  */
895
896 /* We should test _IBMR2 here when we add assembly support for the system
897    vendor compilers.  */
898 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
899 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
900   do {                                                                  \
901     if (__builtin_constant_p (bh) && (bh) == 0)                         \
902       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
903              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
904     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
905       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
906              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
907     else                                                                \
908       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
909              : "=r" (sh), "=&r" (sl)                                    \
910              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
911   } while (0)
912 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
913   do {                                                                  \
914     if (__builtin_constant_p (ah) && (ah) == 0)                         \
915       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
916                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
917     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)         \
918       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
919                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
920     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
921       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
922                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
923     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
924       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
925                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
926     else                                                                \
927       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
928                : "=r" (sh), "=&r" (sl)                                  \
929                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
930   } while (0)
931 #define count_leading_zeros(count, x) \
932   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
933 #define COUNT_LEADING_ZEROS_0 64
934 #define umul_ppmm(ph, pl, m0, m1) \
935   do {                                                                  \
936     UDItype __m0 = (m0), __m1 = (m1);                                   \
937     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
938     (pl) = __m0 * __m1;                                                 \
939   } while (0)
940 #define UMUL_TIME 15
941 #define smul_ppmm(ph, pl, m0, m1) \
942   do {                                                                  \
943     DItype __m0 = (m0), __m1 = (m1);                                    \
944     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
945     (pl) = __m0 * __m1;                                                 \
946   } while (0)
947 #define SMUL_TIME 14  /* ??? */
948 #define UDIV_TIME 120 /* ??? */
949 #endif /* 64-bit PowerPC.  */
950
951 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
952 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
953   __asm__ ("a %1,%5\n\tae %0,%3"                                        \
954            : "=r" ((USItype) (sh)),                                     \
955              "=&r" ((USItype) (sl))                                     \
956            : "%0" ((USItype) (ah)),                                     \
957              "r" ((USItype) (bh)),                                      \
958              "%1" ((USItype) (al)),                                     \
959              "r" ((USItype) (bl)))
960 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
961   __asm__ ("s %1,%5\n\tse %0,%3"                                        \
962            : "=r" ((USItype) (sh)),                                     \
963              "=&r" ((USItype) (sl))                                     \
964            : "0" ((USItype) (ah)),                                      \
965              "r" ((USItype) (bh)),                                      \
966              "1" ((USItype) (al)),                                      \
967              "r" ((USItype) (bl)))
968 #define umul_ppmm(ph, pl, m0, m1) \
969   do {                                                                  \
970     USItype __m0 = (m0), __m1 = (m1);                                   \
971     __asm__ (                                                           \
972        "s       r2,r2\n"                                                \
973 "       mts     r10,%2\n"                                               \
974 "       m       r2,%3\n"                                                \
975 "       m       r2,%3\n"                                                \
976 "       m       r2,%3\n"                                                \
977 "       m       r2,%3\n"                                                \
978 "       m       r2,%3\n"                                                \
979 "       m       r2,%3\n"                                                \
980 "       m       r2,%3\n"                                                \
981 "       m       r2,%3\n"                                                \
982 "       m       r2,%3\n"                                                \
983 "       m       r2,%3\n"                                                \
984 "       m       r2,%3\n"                                                \
985 "       m       r2,%3\n"                                                \
986 "       m       r2,%3\n"                                                \
987 "       m       r2,%3\n"                                                \
988 "       m       r2,%3\n"                                                \
989 "       m       r2,%3\n"                                                \
990 "       cas     %0,r2,r0\n"                                             \
991 "       mfs     r10,%1"                                                 \
992              : "=r" ((USItype) (ph)),                                   \
993                "=r" ((USItype) (pl))                                    \
994              : "%r" (__m0),                                             \
995                 "r" (__m1)                                              \
996              : "r2");                                                   \
997     (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
998              + (((SItype) __m1 >> 31) & __m0));                         \
999   } while (0)
1000 #define UMUL_TIME 20
1001 #define UDIV_TIME 200
1002 #define count_leading_zeros(count, x) \
1003   do {                                                                  \
1004     if ((x) >= 0x10000)                                                 \
1005       __asm__ ("clz     %0,%1"                                          \
1006                : "=r" ((USItype) (count))                               \
1007                : "r" ((USItype) (x) >> 16));                            \
1008     else                                                                \
1009       {                                                                 \
1010         __asm__ ("clz   %0,%1"                                          \
1011                  : "=r" ((USItype) (count))                             \
1012                  : "r" ((USItype) (x)));                                        \
1013         (count) += 16;                                                  \
1014       }                                                                 \
1015   } while (0)
1016 #endif
1017
1018 #if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
1019 #ifndef __sh1__
1020 #define umul_ppmm(w1, w0, u, v) \
1021   __asm__ (                                                             \
1022        "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0       mach,%0"        \
1023            : "=r<" ((USItype)(w1)),                                     \
1024              "=r<" ((USItype)(w0))                                      \
1025            : "r" ((USItype)(u)),                                        \
1026              "r" ((USItype)(v))                                         \
1027            : "macl", "mach")
1028 #define UMUL_TIME 5
1029 #endif
1030
1031 /* This is the same algorithm as __udiv_qrnnd_c.  */
1032 #define UDIV_NEEDS_NORMALIZATION 1
1033
1034 #define udiv_qrnnd(q, r, n1, n0, d) \
1035   do {                                                                  \
1036     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
1037                         __attribute__ ((visibility ("hidden")));        \
1038     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
1039     __asm__ (                                                           \
1040         "mov%M4 %4,r5\n"                                                \
1041 "       swap.w %3,r4\n"                                                 \
1042 "       swap.w r5,r6\n"                                                 \
1043 "       jsr @%5\n"                                                      \
1044 "       shll16 r6\n"                                                    \
1045 "       swap.w r4,r4\n"                                                 \
1046 "       jsr @%5\n"                                                      \
1047 "       swap.w r1,%0\n"                                                 \
1048 "       or r1,%0"                                                       \
1049         : "=r" (q), "=&z" (r)                                           \
1050         : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
1051         : "r1", "r2", "r4", "r5", "r6", "pr", "t");                     \
1052   } while (0)
1053
1054 #define UDIV_TIME 80
1055
1056 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1057   __asm__ ("clrt;subc %5,%1; subc %4,%0"                                \
1058            : "=r" (sh), "=r" (sl)                                       \
1059            : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
1060
1061 #endif /* __sh__ */
1062
1063 #if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1064 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1065 #define count_leading_zeros(count, x) \
1066   do                                                                    \
1067     {                                                                   \
1068       UDItype x_ = (USItype)(x);                                        \
1069       SItype c_;                                                        \
1070                                                                         \
1071       __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));                    \
1072       (count) = c_ - 31;                                                \
1073     }                                                                   \
1074   while (0)
1075 #define COUNT_LEADING_ZEROS_0 32
1076 #endif
1077
1078 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1079     && W_TYPE_SIZE == 32
1080 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1081   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                          \
1082            : "=r" ((USItype) (sh)),                                     \
1083              "=&r" ((USItype) (sl))                                     \
1084            : "%rJ" ((USItype) (ah)),                                    \
1085              "rI" ((USItype) (bh)),                                     \
1086              "%rJ" ((USItype) (al)),                                    \
1087              "rI" ((USItype) (bl))                                      \
1088            __CLOBBER_CC)
1089 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1090   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                          \
1091            : "=r" ((USItype) (sh)),                                     \
1092              "=&r" ((USItype) (sl))                                     \
1093            : "rJ" ((USItype) (ah)),                                     \
1094              "rI" ((USItype) (bh)),                                     \
1095              "rJ" ((USItype) (al)),                                     \
1096              "rI" ((USItype) (bl))                                      \
1097            __CLOBBER_CC)
1098 #if defined (__sparc_v8__)
1099 #define umul_ppmm(w1, w0, u, v) \
1100   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1101            : "=r" ((USItype) (w1)),                                     \
1102              "=r" ((USItype) (w0))                                      \
1103            : "r" ((USItype) (u)),                                       \
1104              "r" ((USItype) (v)))
1105 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1106   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1107            : "=&r" ((USItype) (__q)),                                   \
1108              "=&r" ((USItype) (__r))                                    \
1109            : "r" ((USItype) (__n1)),                                    \
1110              "r" ((USItype) (__n0)),                                    \
1111              "r" ((USItype) (__d)))
1112 #else
1113 #if defined (__sparclite__)
1114 /* This has hardware multiply but not divide.  It also has two additional
1115    instructions scan (ffs from high bit) and divscc.  */
1116 #define umul_ppmm(w1, w0, u, v) \
1117   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1118            : "=r" ((USItype) (w1)),                                     \
1119              "=r" ((USItype) (w0))                                      \
1120            : "r" ((USItype) (u)),                                       \
1121              "r" ((USItype) (v)))
1122 #define udiv_qrnnd(q, r, n1, n0, d) \
1123   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1124 "       wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
1125 "       tst     %%g0\n"                                                 \
1126 "       divscc  %3,%4,%%g1\n"                                           \
1127 "       divscc  %%g1,%4,%%g1\n"                                         \
1128 "       divscc  %%g1,%4,%%g1\n"                                         \
1129 "       divscc  %%g1,%4,%%g1\n"                                         \
1130 "       divscc  %%g1,%4,%%g1\n"                                         \
1131 "       divscc  %%g1,%4,%%g1\n"                                         \
1132 "       divscc  %%g1,%4,%%g1\n"                                         \
1133 "       divscc  %%g1,%4,%%g1\n"                                         \
1134 "       divscc  %%g1,%4,%%g1\n"                                         \
1135 "       divscc  %%g1,%4,%%g1\n"                                         \
1136 "       divscc  %%g1,%4,%%g1\n"                                         \
1137 "       divscc  %%g1,%4,%%g1\n"                                         \
1138 "       divscc  %%g1,%4,%%g1\n"                                         \
1139 "       divscc  %%g1,%4,%%g1\n"                                         \
1140 "       divscc  %%g1,%4,%%g1\n"                                         \
1141 "       divscc  %%g1,%4,%%g1\n"                                         \
1142 "       divscc  %%g1,%4,%%g1\n"                                         \
1143 "       divscc  %%g1,%4,%%g1\n"                                         \
1144 "       divscc  %%g1,%4,%%g1\n"                                         \
1145 "       divscc  %%g1,%4,%%g1\n"                                         \
1146 "       divscc  %%g1,%4,%%g1\n"                                         \
1147 "       divscc  %%g1,%4,%%g1\n"                                         \
1148 "       divscc  %%g1,%4,%%g1\n"                                         \
1149 "       divscc  %%g1,%4,%%g1\n"                                         \
1150 "       divscc  %%g1,%4,%%g1\n"                                         \
1151 "       divscc  %%g1,%4,%%g1\n"                                         \
1152 "       divscc  %%g1,%4,%%g1\n"                                         \
1153 "       divscc  %%g1,%4,%%g1\n"                                         \
1154 "       divscc  %%g1,%4,%%g1\n"                                         \
1155 "       divscc  %%g1,%4,%%g1\n"                                         \
1156 "       divscc  %%g1,%4,%%g1\n"                                         \
1157 "       divscc  %%g1,%4,%0\n"                                           \
1158 "       rd      %%y,%1\n"                                               \
1159 "       bl,a 1f\n"                                                      \
1160 "       add     %1,%4,%1\n"                                             \
1161 "1:     ! End of inline udiv_qrnnd"                                     \
1162            : "=r" ((USItype) (q)),                                      \
1163              "=r" ((USItype) (r))                                       \
1164            : "r" ((USItype) (n1)),                                      \
1165              "r" ((USItype) (n0)),                                      \
1166              "rI" ((USItype) (d))                                       \
1167            : "g1" __AND_CLOBBER_CC)
1168 #define UDIV_TIME 37
1169 #define count_leading_zeros(count, x) \
1170   do {                                                                  \
1171   __asm__ ("scan %1,1,%0"                                               \
1172            : "=r" ((USItype) (count))                                   \
1173            : "r" ((USItype) (x)));                                      \
1174   } while (0)
1175 /* Early sparclites return 63 for an argument of 0, but they warn that future
1176    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1177    undefined.  */
1178 #else
1179 /* SPARC without integer multiplication and divide instructions.
1180    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1181 #define umul_ppmm(w1, w0, u, v) \
1182   __asm__ ("! Inlined umul_ppmm\n"                                      \
1183 "       wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n"\
1184 "       sra     %3,31,%%o5      ! Don't move this insn\n"               \
1185 "       and     %2,%%o5,%%o5    ! Don't move this insn\n"               \
1186 "       andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
1187 "       mulscc  %%g1,%3,%%g1\n"                                         \
1188 "       mulscc  %%g1,%3,%%g1\n"                                         \
1189 "       mulscc  %%g1,%3,%%g1\n"                                         \
1190 "       mulscc  %%g1,%3,%%g1\n"                                         \
1191 "       mulscc  %%g1,%3,%%g1\n"                                         \
1192 "       mulscc  %%g1,%3,%%g1\n"                                         \
1193 "       mulscc  %%g1,%3,%%g1\n"                                         \
1194 "       mulscc  %%g1,%3,%%g1\n"                                         \
1195 "       mulscc  %%g1,%3,%%g1\n"                                         \
1196 "       mulscc  %%g1,%3,%%g1\n"                                         \
1197 "       mulscc  %%g1,%3,%%g1\n"                                         \
1198 "       mulscc  %%g1,%3,%%g1\n"                                         \
1199 "       mulscc  %%g1,%3,%%g1\n"                                         \
1200 "       mulscc  %%g1,%3,%%g1\n"                                         \
1201 "       mulscc  %%g1,%3,%%g1\n"                                         \
1202 "       mulscc  %%g1,%3,%%g1\n"                                         \
1203 "       mulscc  %%g1,%3,%%g1\n"                                         \
1204 "       mulscc  %%g1,%3,%%g1\n"                                         \
1205 "       mulscc  %%g1,%3,%%g1\n"                                         \
1206 "       mulscc  %%g1,%3,%%g1\n"                                         \
1207 "       mulscc  %%g1,%3,%%g1\n"                                         \
1208 "       mulscc  %%g1,%3,%%g1\n"                                         \
1209 "       mulscc  %%g1,%3,%%g1\n"                                         \
1210 "       mulscc  %%g1,%3,%%g1\n"                                         \
1211 "       mulscc  %%g1,%3,%%g1\n"                                         \
1212 "       mulscc  %%g1,%3,%%g1\n"                                         \
1213 "       mulscc  %%g1,%3,%%g1\n"                                         \
1214 "       mulscc  %%g1,%3,%%g1\n"                                         \
1215 "       mulscc  %%g1,%3,%%g1\n"                                         \
1216 "       mulscc  %%g1,%3,%%g1\n"                                         \
1217 "       mulscc  %%g1,%3,%%g1\n"                                         \
1218 "       mulscc  %%g1,%3,%%g1\n"                                         \
1219 "       mulscc  %%g1,0,%%g1\n"                                          \
1220 "       add     %%g1,%%o5,%0\n"                                         \
1221 "       rd      %%y,%1"                                                 \
1222            : "=r" ((USItype) (w1)),                                     \
1223              "=r" ((USItype) (w0))                                      \
1224            : "%rI" ((USItype) (u)),                                     \
1225              "r" ((USItype) (v))                                                \
1226            : "g1", "o5" __AND_CLOBBER_CC)
1227 #define UMUL_TIME 39            /* 39 instructions */
1228 /* It's quite necessary to add this much assembler for the sparc.
1229    The default udiv_qrnnd (in C) is more than 10 times slower!  */
1230 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1231   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1232 "       mov     32,%%g1\n"                                              \
1233 "       subcc   %1,%2,%%g0\n"                                           \
1234 "1:     bcs     5f\n"                                                   \
1235 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1236 "       sub     %1,%2,%1        ! this kills msb of n\n"                \
1237 "       addx    %1,%1,%1        ! so this can't give carry\n"           \
1238 "       subcc   %%g1,1,%%g1\n"                                          \
1239 "2:     bne     1b\n"                                                   \
1240 "        subcc  %1,%2,%%g0\n"                                           \
1241 "       bcs     3f\n"                                                   \
1242 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1243 "       b       3f\n"                                                   \
1244 "        sub    %1,%2,%1        ! this kills msb of n\n"                \
1245 "4:     sub     %1,%2,%1\n"                                             \
1246 "5:     addxcc  %1,%1,%1\n"                                             \
1247 "       bcc     2b\n"                                                   \
1248 "        subcc  %%g1,1,%%g1\n"                                          \
1249 "! Got carry from n.  Subtract next step to cancel this carry.\n"       \
1250 "       bne     4b\n"                                                   \
1251 "        addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n"      \
1252 "       sub     %1,%2,%1\n"                                             \
1253 "3:     xnor    %0,0,%0\n"                                              \
1254 "       ! End of inline udiv_qrnnd"                                     \
1255            : "=&r" ((USItype) (__q)),                                   \
1256              "=&r" ((USItype) (__r))                                    \
1257            : "r" ((USItype) (__d)),                                     \
1258              "1" ((USItype) (__n1)),                                    \
1259              "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1260 #define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1261 #endif /* __sparclite__ */
1262 #endif /* __sparc_v8__ */
1263 #endif /* sparc32 */
1264
1265 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1266     && W_TYPE_SIZE == 64
1267 #define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
1268   __asm__ ("addcc %r4,%5,%1\n\t"                                        \
1269            "add %r2,%3,%0\n\t"                                          \
1270            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1271            "add %0, 1, %0\n"                                            \
1272            "1:"                                                         \
1273            : "=r" ((UDItype)(sh)),                                      \
1274              "=&r" ((UDItype)(sl))                                      \
1275            : "%rJ" ((UDItype)(ah)),                                     \
1276              "rI" ((UDItype)(bh)),                                      \
1277              "%rJ" ((UDItype)(al)),                                     \
1278              "rI" ((UDItype)(bl))                                       \
1279            __CLOBBER_CC)
1280
1281 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1282   __asm__ ("subcc %r4,%5,%1\n\t"                                        \
1283            "sub %r2,%3,%0\n\t"                                          \
1284            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1285            "sub %0, 1, %0\n\t"                                          \
1286            "1:"                                                         \
1287            : "=r" ((UDItype)(sh)),                                      \
1288              "=&r" ((UDItype)(sl))                                      \
1289            : "rJ" ((UDItype)(ah)),                                      \
1290              "rI" ((UDItype)(bh)),                                      \
1291              "rJ" ((UDItype)(al)),                                      \
1292              "rI" ((UDItype)(bl))                                       \
1293            __CLOBBER_CC)
1294
1295 #define umul_ppmm(wh, wl, u, v)                                         \
1296   do {                                                                  \
1297           UDItype tmp1, tmp2, tmp3, tmp4;                               \
1298           __asm__ __volatile__ (                                        \
1299                    "srl %7,0,%3\n\t"                                    \
1300                    "mulx %3,%6,%1\n\t"                                  \
1301                    "srlx %6,32,%2\n\t"                                  \
1302                    "mulx %2,%3,%4\n\t"                                  \
1303                    "sllx %4,32,%5\n\t"                                  \
1304                    "srl %6,0,%3\n\t"                                    \
1305                    "sub %1,%5,%5\n\t"                                   \
1306                    "srlx %5,32,%5\n\t"                                  \
1307                    "addcc %4,%5,%4\n\t"                                 \
1308                    "srlx %7,32,%5\n\t"                                  \
1309                    "mulx %3,%5,%3\n\t"                                  \
1310                    "mulx %2,%5,%5\n\t"                                  \
1311                    "sethi %%hi(0x80000000),%2\n\t"                      \
1312                    "addcc %4,%3,%4\n\t"                                 \
1313                    "srlx %4,32,%4\n\t"                                  \
1314                    "add %2,%2,%2\n\t"                                   \
1315                    "movcc %%xcc,%%g0,%2\n\t"                            \
1316                    "addcc %5,%4,%5\n\t"                                 \
1317                    "sllx %3,32,%3\n\t"                                  \
1318                    "add %1,%3,%1\n\t"                                   \
1319                    "add %5,%2,%0"                                       \
1320            : "=r" ((UDItype)(wh)),                                      \
1321              "=&r" ((UDItype)(wl)),                                     \
1322              "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)     \
1323            : "r" ((UDItype)(u)),                                        \
1324              "r" ((UDItype)(v))                                         \
1325            __CLOBBER_CC);                                               \
1326   } while (0)
1327 #define UMUL_TIME 96
1328 #define UDIV_TIME 230
1329 #endif /* sparc64 */
1330
1331 #if defined (__vax__) && W_TYPE_SIZE == 32
1332 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1333   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                  \
1334            : "=g" ((USItype) (sh)),                                     \
1335              "=&g" ((USItype) (sl))                                     \
1336            : "%0" ((USItype) (ah)),                                     \
1337              "g" ((USItype) (bh)),                                      \
1338              "%1" ((USItype) (al)),                                     \
1339              "g" ((USItype) (bl)))
1340 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1341   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                  \
1342            : "=g" ((USItype) (sh)),                                     \
1343              "=&g" ((USItype) (sl))                                     \
1344            : "0" ((USItype) (ah)),                                      \
1345              "g" ((USItype) (bh)),                                      \
1346              "1" ((USItype) (al)),                                      \
1347              "g" ((USItype) (bl)))
1348 #define umul_ppmm(xh, xl, m0, m1) \
1349   do {                                                                  \
1350     union {                                                             \
1351         UDItype __ll;                                                   \
1352         struct {USItype __l, __h;} __i;                                 \
1353       } __xx;                                                           \
1354     USItype __m0 = (m0), __m1 = (m1);                                   \
1355     __asm__ ("emul %1,%2,$0,%0"                                         \
1356              : "=r" (__xx.__ll)                                         \
1357              : "g" (__m0),                                              \
1358                "g" (__m1));                                             \
1359     (xh) = __xx.__i.__h;                                                \
1360     (xl) = __xx.__i.__l;                                                \
1361     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1362              + (((SItype) __m1 >> 31) & __m0));                         \
1363   } while (0)
1364 #define sdiv_qrnnd(q, r, n1, n0, d) \
1365   do {                                                                  \
1366     union {DItype __ll;                                                 \
1367            struct {SItype __l, __h;} __i;                               \
1368           } __xx;                                                       \
1369     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1370     __asm__ ("ediv %3,%2,%0,%1"                                         \
1371              : "=g" (q), "=g" (r)                                       \
1372              : "g" (__xx.__ll), "g" (d));                               \
1373   } while (0)
1374 #endif /* __vax__ */
1375
1376 #if defined (__xtensa__) && W_TYPE_SIZE == 32
1377 /* This code is not Xtensa-configuration-specific, so rely on the compiler
1378    to expand builtin functions depending on what configuration features
1379    are available.  This avoids library calls when the operation can be
1380    performed in-line.  */
1381 #define umul_ppmm(w1, w0, u, v)                                         \
1382   do {                                                                  \
1383     DWunion __w;                                                        \
1384     __w.ll = __builtin_umulsidi3 (u, v);                                \
1385     w1 = __w.s.high;                                                    \
1386     w0 = __w.s.low;                                                     \
1387   } while (0)
1388 #define __umulsidi3(u, v)               __builtin_umulsidi3 (u, v)
1389 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
1390 #define count_trailing_zeros(COUNT, X)  ((COUNT) = __builtin_ctz (X))
1391 #endif /* __xtensa__ */
1392
1393 #if defined xstormy16
1394 extern UHItype __stormy16_count_leading_zeros (UHItype);
1395 #define count_leading_zeros(count, x)                                   \
1396   do                                                                    \
1397     {                                                                   \
1398       UHItype size;                                                     \
1399                                                                         \
1400       /* We assume that W_TYPE_SIZE is a multiple of 16...  */          \
1401       for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)           \
1402         {                                                               \
1403           UHItype c;                                                    \
1404                                                                         \
1405           c = __clzhi2 ((x) >> (size - 16));                            \
1406           (count) += c;                                                 \
1407           if (c != 16)                                                  \
1408             break;                                                      \
1409         }                                                               \
1410     }                                                                   \
1411   while (0)
1412 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1413 #endif
1414
1415 #if defined (__z8000__) && W_TYPE_SIZE == 16
1416 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1417   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1418            : "=r" ((unsigned int)(sh)),                                 \
1419              "=&r" ((unsigned int)(sl))                                 \
1420            : "%0" ((unsigned int)(ah)),                                 \
1421              "r" ((unsigned int)(bh)),                                  \
1422              "%1" ((unsigned int)(al)),                                 \
1423              "rQR" ((unsigned int)(bl)))
1424 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1425   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1426            : "=r" ((unsigned int)(sh)),                                 \
1427              "=&r" ((unsigned int)(sl))                                 \
1428            : "0" ((unsigned int)(ah)),                                  \
1429              "r" ((unsigned int)(bh)),                                  \
1430              "1" ((unsigned int)(al)),                                  \
1431              "rQR" ((unsigned int)(bl)))
1432 #define umul_ppmm(xh, xl, m0, m1) \
1433   do {                                                                  \
1434     union {long int __ll;                                               \
1435            struct {unsigned int __h, __l;} __i;                         \
1436           } __xx;                                                       \
1437     unsigned int __m0 = (m0), __m1 = (m1);                              \
1438     __asm__ ("mult      %S0,%H3"                                        \
1439              : "=r" (__xx.__i.__h),                                     \
1440                "=r" (__xx.__i.__l)                                      \
1441              : "%1" (__m0),                                             \
1442                "rQR" (__m1));                                           \
1443     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1444     (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1445              + (((signed int) __m1 >> 15) & __m0));                     \
1446   } while (0)
1447 #endif /* __z8000__ */
1448
1449 #endif /* __GNUC__ */
1450
1451 /* If this machine has no inline assembler, use C macros.  */
1452
1453 #if !defined (add_ssaaaa)
1454 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1455   do {                                                                  \
1456     UWtype __x;                                                         \
1457     __x = (al) + (bl);                                                  \
1458     (sh) = (ah) + (bh) + (__x < (al));                                  \
1459     (sl) = __x;                                                         \
1460   } while (0)
1461 #endif
1462
1463 #if !defined (sub_ddmmss)
1464 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1465   do {                                                                  \
1466     UWtype __x;                                                         \
1467     __x = (al) - (bl);                                                  \
1468     (sh) = (ah) - (bh) - (__x > (al));                                  \
1469     (sl) = __x;                                                         \
1470   } while (0)
1471 #endif
1472
1473 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1474    smul_ppmm.  */
1475 #if !defined (umul_ppmm) && defined (smul_ppmm)
1476 #define umul_ppmm(w1, w0, u, v)                                         \
1477   do {                                                                  \
1478     UWtype __w1;                                                        \
1479     UWtype __xm0 = (u), __xm1 = (v);                                    \
1480     smul_ppmm (__w1, w0, __xm0, __xm1);                                 \
1481     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)               \
1482                 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
1483   } while (0)
1484 #endif
1485
1486 /* If we still don't have umul_ppmm, define it using plain C.  */
1487 #if !defined (umul_ppmm)
1488 #define umul_ppmm(w1, w0, u, v)                                         \
1489   do {                                                                  \
1490     UWtype __x0, __x1, __x2, __x3;                                      \
1491     UHWtype __ul, __vl, __uh, __vh;                                     \
1492                                                                         \
1493     __ul = __ll_lowpart (u);                                            \
1494     __uh = __ll_highpart (u);                                           \
1495     __vl = __ll_lowpart (v);                                            \
1496     __vh = __ll_highpart (v);                                           \
1497                                                                         \
1498     __x0 = (UWtype) __ul * __vl;                                        \
1499     __x1 = (UWtype) __ul * __vh;                                        \
1500     __x2 = (UWtype) __uh * __vl;                                        \
1501     __x3 = (UWtype) __uh * __vh;                                        \
1502                                                                         \
1503     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1504     __x1 += __x2;               /* but this indeed can */               \
1505     if (__x1 < __x2)            /* did we get it? */                    \
1506       __x3 += __ll_B;           /* yes, add it in the proper pos.  */   \
1507                                                                         \
1508     (w1) = __x3 + __ll_highpart (__x1);                                 \
1509     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);          \
1510   } while (0)
1511 #endif
1512
1513 #if !defined (__umulsidi3)
1514 #define __umulsidi3(u, v) \
1515   ({DWunion __w;                                                        \
1516     umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
1517     __w.ll; })
1518 #endif
1519
1520 /* Define this unconditionally, so it can be used for debugging.  */
1521 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1522   do {                                                                  \
1523     UWtype __d1, __d0, __q1, __q0;                                      \
1524     UWtype __r1, __r0, __m;                                             \
1525     __d1 = __ll_highpart (d);                                           \
1526     __d0 = __ll_lowpart (d);                                            \
1527                                                                         \
1528     __r1 = (n1) % __d1;                                                 \
1529     __q1 = (n1) / __d1;                                                 \
1530     __m = (UWtype) __q1 * __d0;                                         \
1531     __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1532     if (__r1 < __m)                                                     \
1533       {                                                                 \
1534         __q1--, __r1 += (d);                                            \
1535         if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1536           if (__r1 < __m)                                               \
1537             __q1--, __r1 += (d);                                        \
1538       }                                                                 \
1539     __r1 -= __m;                                                        \
1540                                                                         \
1541     __r0 = __r1 % __d1;                                                 \
1542     __q0 = __r1 / __d1;                                                 \
1543     __m = (UWtype) __q0 * __d0;                                         \
1544     __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1545     if (__r0 < __m)                                                     \
1546       {                                                                 \
1547         __q0--, __r0 += (d);                                            \
1548         if (__r0 >= (d))                                                \
1549           if (__r0 < __m)                                               \
1550             __q0--, __r0 += (d);                                        \
1551       }                                                                 \
1552     __r0 -= __m;                                                        \
1553                                                                         \
1554     (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1555     (r) = __r0;                                                         \
1556   } while (0)
1557
1558 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1559    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1560 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1561 #define udiv_qrnnd(q, r, nh, nl, d) \
1562   do {                                                                  \
1563     USItype __r;                                                        \
1564     (q) = __udiv_w_sdiv (&__r, nh, nl, d);                              \
1565     (r) = __r;                                                          \
1566   } while (0)
1567 #endif
1568
1569 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1570 #if !defined (udiv_qrnnd)
1571 #define UDIV_NEEDS_NORMALIZATION 1
1572 #define udiv_qrnnd __udiv_qrnnd_c
1573 #endif
1574
1575 #if !defined (count_leading_zeros)
1576 #define count_leading_zeros(count, x) \
1577   do {                                                                  \
1578     UWtype __xr = (x);                                                  \
1579     UWtype __a;                                                         \
1580                                                                         \
1581     if (W_TYPE_SIZE <= 32)                                              \
1582       {                                                                 \
1583         __a = __xr < ((UWtype)1<<2*__BITS4)                             \
1584           ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)                 \
1585           : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);   \
1586       }                                                                 \
1587     else                                                                \
1588       {                                                                 \
1589         for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
1590           if (((__xr >> __a) & 0xff) != 0)                              \
1591             break;                                                      \
1592       }                                                                 \
1593                                                                         \
1594     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);             \
1595   } while (0)
1596 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1597 #endif
1598
1599 #if !defined (count_trailing_zeros)
1600 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1601    defined in asm, but if it is not, the C version above is good enough.  */
1602 #define count_trailing_zeros(count, x) \
1603   do {                                                                  \
1604     UWtype __ctz_x = (x);                                               \
1605     UWtype __ctz_c;                                                     \
1606     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1607     (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1608   } while (0)
1609 #endif
1610
1611 #ifndef UDIV_NEEDS_NORMALIZATION
1612 #define UDIV_NEEDS_NORMALIZATION 0
1613 #endif