OSDN Git Service

* optabs.h (enum optab_index): Add new OTI_significand.
[pf3gnuchains/gcc-fork.git] / gcc / longlong.h
1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2    Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4    Free Software Foundation, Inc.
5
6    This file is part of the GNU C Library.
7
8    The GNU C Library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public
10    License as published by the Free Software Foundation; either
11    version 2.1 of the License, or (at your option) any later version.
12
13    In addition to the permissions in the GNU Lesser General Public
14    License, the Free Software Foundation gives you unlimited
15    permission to link the compiled version of this file into
16    combinations with other programs, and to distribute those
17    combinations without any restriction coming from the use of this
18    file.  (The Lesser General Public License restrictions do apply in
19    other respects; for example, they cover modification of the file,
20    and distribution when not linked into a combine executable.)
21
22    The GNU C Library is distributed in the hope that it will be useful,
23    but WITHOUT ANY WARRANTY; without even the implied warranty of
24    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25    Lesser General Public License for more details.
26
27    You should have received a copy of the GNU Lesser General Public
28    License along with the GNU C Library; if not, write to the Free
29    Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
30    MA 02110-1301, USA.  */
31
32 /* You have to define the following before including this file:
33
34    UWtype -- An unsigned type, default type for operations (typically a "word")
35    UHWtype -- An unsigned type, at least half the size of UWtype.
36    UDWtype -- An unsigned type, at least twice as large a UWtype
37    W_TYPE_SIZE -- size in bits of UWtype
38
39    UQItype -- Unsigned 8 bit type.
40    SItype, USItype -- Signed and unsigned 32 bit types.
41    DItype, UDItype -- Signed and unsigned 64 bit types.
42
43    On a 32 bit machine UWtype should typically be USItype;
44    on a 64 bit machine, UWtype should typically be UDItype.  */
45
46 #define __BITS4 (W_TYPE_SIZE / 4)
47 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
48 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
49 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
50
51 #ifndef W_TYPE_SIZE
52 #define W_TYPE_SIZE     32
53 #define UWtype          USItype
54 #define UHWtype         USItype
55 #define UDWtype         UDItype
56 #endif
57
58 /* Used in glibc only.  */
59 #ifndef attribute_hidden
60 #define attribute_hidden
61 #endif
62
63 extern const UQItype __clz_tab[256] attribute_hidden;
64
65 /* Define auxiliary asm macros.
66
67    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
68    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
69    word product in HIGH_PROD and LOW_PROD.
70
71    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
72    UDWtype product.  This is just a variant of umul_ppmm.
73
74    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
75    denominator) divides a UDWtype, composed by the UWtype integers
76    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
77    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
78    than DENOMINATOR for correct operation.  If, in addition, the most
79    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
80    UDIV_NEEDS_NORMALIZATION is defined to 1.
81
82    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
83    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
84    is rounded towards 0.
85
86    5) count_leading_zeros(count, x) counts the number of zero-bits from the
87    msb to the first nonzero bit in the UWtype X.  This is the number of
88    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
89    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
90
91    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
92    from the least significant end.
93
94    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
95    high_addend_2, low_addend_2) adds two UWtype integers, composed by
96    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
97    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
98    (i.e. carry out) is not stored anywhere, and is lost.
99
100    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
101    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
102    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
103    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
104    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
105    and is lost.
106
107    If any of these macros are left undefined for a particular CPU,
108    C macros are used.  */
109
110 /* The CPUs come in alphabetical order below.
111
112    Please add support for more CPUs here, or improve the current support
113    for the CPUs below!
114    (E.g. WE32100, IBM360.)  */
115
116 #if defined (__GNUC__) && !defined (NO_ASM)
117
118 /* We sometimes need to clobber "cc" with gcc2, but that would not be
119    understood by gcc1.  Use cpp to avoid major code duplication.  */
120 #if __GNUC__ < 2
121 #define __CLOBBER_CC
122 #define __AND_CLOBBER_CC
123 #else /* __GNUC__ >= 2 */
124 #define __CLOBBER_CC : "cc"
125 #define __AND_CLOBBER_CC , "cc"
126 #endif /* __GNUC__ < 2 */
127
128 #if defined (__alpha) && W_TYPE_SIZE == 64
129 #define umul_ppmm(ph, pl, m0, m1) \
130   do {                                                                  \
131     UDItype __m0 = (m0), __m1 = (m1);                                   \
132     (ph) = __builtin_alpha_umulh (__m0, __m1);                          \
133     (pl) = __m0 * __m1;                                                 \
134   } while (0)
135 #define UMUL_TIME 46
136 #ifndef LONGLONG_STANDALONE
137 #define udiv_qrnnd(q, r, n1, n0, d) \
138   do { UDItype __r;                                                     \
139     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
140     (r) = __r;                                                          \
141   } while (0)
142 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
143 #define UDIV_TIME 220
144 #endif /* LONGLONG_STANDALONE */
145 #ifdef __alpha_cix__
146 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
147 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
148 #define COUNT_LEADING_ZEROS_0 64
149 #else
150 #define count_leading_zeros(COUNT,X) \
151   do {                                                                  \
152     UDItype __xr = (X), __t, __a;                                       \
153     __t = __builtin_alpha_cmpbge (0, __xr);                             \
154     __a = __clz_tab[__t ^ 0xff] - 1;                                    \
155     __t = __builtin_alpha_extbl (__xr, __a);                            \
156     (COUNT) = 64 - (__clz_tab[__t] + __a*8);                            \
157   } while (0)
158 #define count_trailing_zeros(COUNT,X) \
159   do {                                                                  \
160     UDItype __xr = (X), __t, __a;                                       \
161     __t = __builtin_alpha_cmpbge (0, __xr);                             \
162     __t = ~__t & -~__t;                                                 \
163     __a = ((__t & 0xCC) != 0) * 2;                                      \
164     __a += ((__t & 0xF0) != 0) * 4;                                     \
165     __a += ((__t & 0xAA) != 0);                                         \
166     __t = __builtin_alpha_extbl (__xr, __a);                            \
167     __a <<= 3;                                                          \
168     __t &= -__t;                                                        \
169     __a += ((__t & 0xCC) != 0) * 2;                                     \
170     __a += ((__t & 0xF0) != 0) * 4;                                     \
171     __a += ((__t & 0xAA) != 0);                                         \
172     (COUNT) = __a;                                                      \
173   } while (0)
174 #endif /* __alpha_cix__ */
175 #endif /* __alpha */
176
177 #if defined (__arc__) && W_TYPE_SIZE == 32
178 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
179   __asm__ ("add.f       %1, %4, %5\n\tadc       %0, %2, %3"             \
180            : "=r" ((USItype) (sh)),                                     \
181              "=&r" ((USItype) (sl))                                     \
182            : "%r" ((USItype) (ah)),                                     \
183              "rIJ" ((USItype) (bh)),                                    \
184              "%r" ((USItype) (al)),                                     \
185              "rIJ" ((USItype) (bl)))
186 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
187   __asm__ ("sub.f       %1, %4, %5\n\tsbc       %0, %2, %3"             \
188            : "=r" ((USItype) (sh)),                                     \
189              "=&r" ((USItype) (sl))                                     \
190            : "r" ((USItype) (ah)),                                      \
191              "rIJ" ((USItype) (bh)),                                    \
192              "r" ((USItype) (al)),                                      \
193              "rIJ" ((USItype) (bl)))
194 /* Call libgcc routine.  */
195 #define umul_ppmm(w1, w0, u, v) \
196 do {                                                                    \
197   DWunion __w;                                                          \
198   __w.ll = __umulsidi3 (u, v);                                          \
199   w1 = __w.s.high;                                                      \
200   w0 = __w.s.low;                                                       \
201 } while (0)
202 #define __umulsidi3 __umulsidi3
203 UDItype __umulsidi3 (USItype, USItype);
204 #endif
205
206 #if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
207 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
208   __asm__ ("adds        %1, %4, %5\n\tadc       %0, %2, %3"             \
209            : "=r" ((USItype) (sh)),                                     \
210              "=&r" ((USItype) (sl))                                     \
211            : "%r" ((USItype) (ah)),                                     \
212              "rI" ((USItype) (bh)),                                     \
213              "%r" ((USItype) (al)),                                     \
214              "rI" ((USItype) (bl)) __CLOBBER_CC)
215 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
216   __asm__ ("subs        %1, %4, %5\n\tsbc       %0, %2, %3"             \
217            : "=r" ((USItype) (sh)),                                     \
218              "=&r" ((USItype) (sl))                                     \
219            : "r" ((USItype) (ah)),                                      \
220              "rI" ((USItype) (bh)),                                     \
221              "r" ((USItype) (al)),                                      \
222              "rI" ((USItype) (bl)) __CLOBBER_CC)
223 #define umul_ppmm(xh, xl, a, b) \
224 {register USItype __t0, __t1, __t2;                                     \
225   __asm__ ("%@ Inlined umul_ppmm\n"                                     \
226            "    mov     %2, %5, lsr #16\n"                              \
227            "    mov     %0, %6, lsr #16\n"                              \
228            "    bic     %3, %5, %2, lsl #16\n"                          \
229            "    bic     %4, %6, %0, lsl #16\n"                          \
230            "    mul     %1, %3, %4\n"                                   \
231            "    mul     %4, %2, %4\n"                                   \
232            "    mul     %3, %0, %3\n"                                   \
233            "    mul     %0, %2, %0\n"                                   \
234            "    adds    %3, %4, %3\n"                                   \
235            "    addcs   %0, %0, #65536\n"                               \
236            "    adds    %1, %1, %3, lsl #16\n"                          \
237            "    adc     %0, %0, %3, lsr #16"                            \
238            : "=&r" ((USItype) (xh)),                                    \
239              "=r" ((USItype) (xl)),                                     \
240              "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
241            : "r" ((USItype) (a)),                                       \
242              "r" ((USItype) (b)) __CLOBBER_CC );}
243 #define UMUL_TIME 20
244 #define UDIV_TIME 100
245 #endif /* __arm__ */
246
247 #if defined(__arm__)
248 /* Let gcc decide how best to implement count_leading_zeros.  */
249 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
250 #define COUNT_LEADING_ZEROS_0 32
251 #endif
252
253 #if defined (__CRIS__) && __CRIS_arch_version >= 3
254 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
255 #if __CRIS_arch_version >= 8
256 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
257 #endif
258 #endif /* __CRIS__ */
259
260 #if defined (__hppa) && W_TYPE_SIZE == 32
261 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
262   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"                              \
263            : "=r" ((USItype) (sh)),                                     \
264              "=&r" ((USItype) (sl))                                     \
265            : "%rM" ((USItype) (ah)),                                    \
266              "rM" ((USItype) (bh)),                                     \
267              "%rM" ((USItype) (al)),                                    \
268              "rM" ((USItype) (bl)))
269 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
270   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"                              \
271            : "=r" ((USItype) (sh)),                                     \
272              "=&r" ((USItype) (sl))                                     \
273            : "rM" ((USItype) (ah)),                                     \
274              "rM" ((USItype) (bh)),                                     \
275              "rM" ((USItype) (al)),                                     \
276              "rM" ((USItype) (bl)))
277 #if defined (_PA_RISC1_1)
278 #define umul_ppmm(w1, w0, u, v) \
279   do {                                                                  \
280     union                                                               \
281       {                                                                 \
282         UDItype __f;                                                    \
283         struct {USItype __w1, __w0;} __w1w0;                            \
284       } __t;                                                            \
285     __asm__ ("xmpyu %1,%2,%0"                                           \
286              : "=x" (__t.__f)                                           \
287              : "x" ((USItype) (u)),                                     \
288                "x" ((USItype) (v)));                                    \
289     (w1) = __t.__w1w0.__w1;                                             \
290     (w0) = __t.__w1w0.__w0;                                             \
291      } while (0)
292 #define UMUL_TIME 8
293 #else
294 #define UMUL_TIME 30
295 #endif
296 #define UDIV_TIME 40
297 #define count_leading_zeros(count, x) \
298   do {                                                                  \
299     USItype __tmp;                                                      \
300     __asm__ (                                                           \
301        "ldi             1,%0\n"                                         \
302 "       extru,=         %1,15,16,%%r0           ; Bits 31..16 zero?\n"  \
303 "       extru,tr        %1,15,16,%1             ; No.  Shift down, skip add.\n"\
304 "       ldo             16(%0),%0               ; Yes.  Perform add.\n" \
305 "       extru,=         %1,23,8,%%r0            ; Bits 15..8 zero?\n"   \
306 "       extru,tr        %1,23,8,%1              ; No.  Shift down, skip add.\n"\
307 "       ldo             8(%0),%0                ; Yes.  Perform add.\n" \
308 "       extru,=         %1,27,4,%%r0            ; Bits 7..4 zero?\n"    \
309 "       extru,tr        %1,27,4,%1              ; No.  Shift down, skip add.\n"\
310 "       ldo             4(%0),%0                ; Yes.  Perform add.\n" \
311 "       extru,=         %1,29,2,%%r0            ; Bits 3..2 zero?\n"    \
312 "       extru,tr        %1,29,2,%1              ; No.  Shift down, skip add.\n"\
313 "       ldo             2(%0),%0                ; Yes.  Perform add.\n" \
314 "       extru           %1,30,1,%1              ; Extract bit 1.\n"     \
315 "       sub             %0,%1,%0                ; Subtract it.\n"       \
316         : "=r" (count), "=r" (__tmp) : "1" (x));                        \
317   } while (0)
318 #endif
319
320 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
321 #define smul_ppmm(xh, xl, m0, m1) \
322   do {                                                                  \
323     union {DItype __ll;                                                 \
324            struct {USItype __h, __l;} __i;                              \
325           } __x;                                                        \
326     __asm__ ("lr %N0,%1\n\tmr %0,%2"                                    \
327              : "=&r" (__x.__ll)                                         \
328              : "r" (m0), "r" (m1));                                     \
329     (xh) = __x.__i.__h; (xl) = __x.__i.__l;                             \
330   } while (0)
331 #define sdiv_qrnnd(q, r, n1, n0, d) \
332   do {                                                                  \
333     union {DItype __ll;                                                 \
334            struct {USItype __h, __l;} __i;                              \
335           } __x;                                                        \
336     __x.__i.__h = n1; __x.__i.__l = n0;                                 \
337     __asm__ ("dr %0,%2"                                                 \
338              : "=r" (__x.__ll)                                          \
339              : "0" (__x.__ll), "r" (d));                                \
340     (q) = __x.__i.__l; (r) = __x.__i.__h;                               \
341   } while (0)
342 #endif
343
344 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
345 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
346   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"               \
347            : "=r" ((USItype) (sh)),                                     \
348              "=&r" ((USItype) (sl))                                     \
349            : "%0" ((USItype) (ah)),                                     \
350              "g" ((USItype) (bh)),                                      \
351              "%1" ((USItype) (al)),                                     \
352              "g" ((USItype) (bl)))
353 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
354   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"               \
355            : "=r" ((USItype) (sh)),                                     \
356              "=&r" ((USItype) (sl))                                     \
357            : "0" ((USItype) (ah)),                                      \
358              "g" ((USItype) (bh)),                                      \
359              "1" ((USItype) (al)),                                      \
360              "g" ((USItype) (bl)))
361 #define umul_ppmm(w1, w0, u, v) \
362   __asm__ ("mul{l} %3"                                                  \
363            : "=a" ((USItype) (w0)),                                     \
364              "=d" ((USItype) (w1))                                      \
365            : "%0" ((USItype) (u)),                                      \
366              "rm" ((USItype) (v)))
367 #define udiv_qrnnd(q, r, n1, n0, dv) \
368   __asm__ ("div{l} %4"                                                  \
369            : "=a" ((USItype) (q)),                                      \
370              "=d" ((USItype) (r))                                       \
371            : "0" ((USItype) (n0)),                                      \
372              "1" ((USItype) (n1)),                                      \
373              "rm" ((USItype) (dv)))
374 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
375 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
376 #define UMUL_TIME 40
377 #define UDIV_TIME 40
378 #endif /* 80x86 */
379
380 #if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
381 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
382   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"               \
383            : "=r" ((UDItype) (sh)),                                     \
384              "=&r" ((UDItype) (sl))                                     \
385            : "%0" ((UDItype) (ah)),                                     \
386              "rme" ((UDItype) (bh)),                                    \
387              "%1" ((UDItype) (al)),                                     \
388              "rme" ((UDItype) (bl)))
389 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
390   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"               \
391            : "=r" ((UDItype) (sh)),                                     \
392              "=&r" ((UDItype) (sl))                                     \
393            : "0" ((UDItype) (ah)),                                      \
394              "rme" ((UDItype) (bh)),                                    \
395              "1" ((UDItype) (al)),                                      \
396              "rme" ((UDItype) (bl)))
397 #define umul_ppmm(w1, w0, u, v) \
398   __asm__ ("mul{q} %3"                                                  \
399            : "=a" ((UDItype) (w0)),                                     \
400              "=d" ((UDItype) (w1))                                      \
401            : "%0" ((UDItype) (u)),                                      \
402              "rm" ((UDItype) (v)))
403 #define udiv_qrnnd(q, r, n1, n0, dv) \
404   __asm__ ("div{q} %4"                                                  \
405            : "=a" ((UDItype) (q)),                                      \
406              "=d" ((UDItype) (r))                                       \
407            : "0" ((UDItype) (n0)),                                      \
408              "1" ((UDItype) (n1)),                                      \
409              "rm" ((UDItype) (dv)))
410 #define count_leading_zeros(count, x)   ((count) = __builtin_clzl (x))
411 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctzl (x))
412 #define UMUL_TIME 40
413 #define UDIV_TIME 40
414 #endif /* x86_64 */
415
416 #if defined (__i960__) && W_TYPE_SIZE == 32
417 #define umul_ppmm(w1, w0, u, v) \
418   ({union {UDItype __ll;                                                \
419            struct {USItype __l, __h;} __i;                              \
420           } __xx;                                                       \
421   __asm__ ("emul        %2,%1,%0"                                       \
422            : "=d" (__xx.__ll)                                           \
423            : "%dI" ((USItype) (u)),                                     \
424              "dI" ((USItype) (v)));                                     \
425   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
426 #define __umulsidi3(u, v) \
427   ({UDItype __w;                                                        \
428     __asm__ ("emul      %2,%1,%0"                                       \
429              : "=d" (__w)                                               \
430              : "%dI" ((USItype) (u)),                                   \
431                "dI" ((USItype) (v)));                                   \
432     __w; })
433 #endif /* __i960__ */
434
435 #if defined (__ia64) && W_TYPE_SIZE == 64
436 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
437    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
438    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
439    register, which takes an extra cycle.  */
440 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
441   do {                                                                  \
442     UWtype __x;                                                         \
443     __x = (al) - (bl);                                                  \
444     if ((al) < (bl))                                                    \
445       (sh) = (ah) - (bh) - 1;                                           \
446     else                                                                \
447       (sh) = (ah) - (bh);                                               \
448     (sl) = __x;                                                         \
449   } while (0)
450
451 /* Do both product parts in assembly, since that gives better code with
452    all gcc versions.  Some callers will just use the upper part, and in
453    that situation we waste an instruction, but not any cycles.  */
454 #define umul_ppmm(ph, pl, m0, m1)                                       \
455   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"            \
456            : "=&f" (ph), "=f" (pl)                                      \
457            : "f" (m0), "f" (m1))
458 #define count_leading_zeros(count, x)                                   \
459   do {                                                                  \
460     UWtype _x = (x), _y, _a, _c;                                        \
461     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));              \
462     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));            \
463     _c = (_a - 1) << 3;                                                 \
464     _x >>= _c;                                                          \
465     if (_x >= 1 << 4)                                                   \
466       _x >>= 4, _c += 4;                                                \
467     if (_x >= 1 << 2)                                                   \
468       _x >>= 2, _c += 2;                                                \
469     _c += _x >> 1;                                                      \
470     (count) =  W_TYPE_SIZE - 1 - _c;                                    \
471   } while (0)
472 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
473    based, and we don't need a special case for x==0 here */
474 #define count_trailing_zeros(count, x)                                  \
475   do {                                                                  \
476     UWtype __ctz_x = (x);                                               \
477     __asm__ ("popcnt %0 = %1"                                           \
478              : "=r" (count)                                             \
479              : "r" ((__ctz_x-1) & ~__ctz_x));                           \
480   } while (0)
481 #define UMUL_TIME 14
482 #endif
483
484 #if defined (__M32R__) && W_TYPE_SIZE == 32
485 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
486   /* The cmp clears the condition bit.  */ \
487   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"                      \
488            : "=r" ((USItype) (sh)),                                     \
489              "=&r" ((USItype) (sl))                                     \
490            : "0" ((USItype) (ah)),                                      \
491              "r" ((USItype) (bh)),                                      \
492              "1" ((USItype) (al)),                                      \
493              "r" ((USItype) (bl))                                       \
494            : "cbit")
495 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
496   /* The cmp clears the condition bit.  */ \
497   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"                      \
498            : "=r" ((USItype) (sh)),                                     \
499              "=&r" ((USItype) (sl))                                     \
500            : "0" ((USItype) (ah)),                                      \
501              "r" ((USItype) (bh)),                                      \
502              "1" ((USItype) (al)),                                      \
503              "r" ((USItype) (bl))                                       \
504            : "cbit")
505 #endif /* __M32R__ */
506
507 #if defined (__mc68000__) && W_TYPE_SIZE == 32
508 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
509   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                              \
510            : "=d" ((USItype) (sh)),                                     \
511              "=&d" ((USItype) (sl))                                     \
512            : "%0" ((USItype) (ah)),                                     \
513              "d" ((USItype) (bh)),                                      \
514              "%1" ((USItype) (al)),                                     \
515              "g" ((USItype) (bl)))
516 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
517   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                              \
518            : "=d" ((USItype) (sh)),                                     \
519              "=&d" ((USItype) (sl))                                     \
520            : "0" ((USItype) (ah)),                                      \
521              "d" ((USItype) (bh)),                                      \
522              "1" ((USItype) (al)),                                      \
523              "g" ((USItype) (bl)))
524
525 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
526 #if (defined (__mc68020__) && !defined (__mc68060__))
527 #define umul_ppmm(w1, w0, u, v) \
528   __asm__ ("mulu%.l %3,%1:%0"                                           \
529            : "=d" ((USItype) (w0)),                                     \
530              "=d" ((USItype) (w1))                                      \
531            : "%0" ((USItype) (u)),                                      \
532              "dmi" ((USItype) (v)))
533 #define UMUL_TIME 45
534 #define udiv_qrnnd(q, r, n1, n0, d) \
535   __asm__ ("divu%.l %4,%1:%0"                                           \
536            : "=d" ((USItype) (q)),                                      \
537              "=d" ((USItype) (r))                                       \
538            : "0" ((USItype) (n0)),                                      \
539              "1" ((USItype) (n1)),                                      \
540              "dmi" ((USItype) (d)))
541 #define UDIV_TIME 90
542 #define sdiv_qrnnd(q, r, n1, n0, d) \
543   __asm__ ("divs%.l %4,%1:%0"                                           \
544            : "=d" ((USItype) (q)),                                      \
545              "=d" ((USItype) (r))                                       \
546            : "0" ((USItype) (n0)),                                      \
547              "1" ((USItype) (n1)),                                      \
548              "dmi" ((USItype) (d)))
549
550 #elif defined (__mcoldfire__) /* not mc68020 */
551
552 #define umul_ppmm(xh, xl, a, b) \
553   __asm__ ("| Inlined umul_ppmm\n"                                      \
554            "    move%.l %2,%/d0\n"                                      \
555            "    move%.l %3,%/d1\n"                                      \
556            "    move%.l %/d0,%/d2\n"                                    \
557            "    swap    %/d0\n"                                         \
558            "    move%.l %/d1,%/d3\n"                                    \
559            "    swap    %/d1\n"                                         \
560            "    move%.w %/d2,%/d4\n"                                    \
561            "    mulu    %/d3,%/d4\n"                                    \
562            "    mulu    %/d1,%/d2\n"                                    \
563            "    mulu    %/d0,%/d3\n"                                    \
564            "    mulu    %/d0,%/d1\n"                                    \
565            "    move%.l %/d4,%/d0\n"                                    \
566            "    clr%.w  %/d0\n"                                         \
567            "    swap    %/d0\n"                                         \
568            "    add%.l  %/d0,%/d2\n"                                    \
569            "    add%.l  %/d3,%/d2\n"                                    \
570            "    jcc     1f\n"                                           \
571            "    add%.l  %#65536,%/d1\n"                                 \
572            "1:  swap    %/d2\n"                                         \
573            "    moveq   %#0,%/d0\n"                                     \
574            "    move%.w %/d2,%/d0\n"                                    \
575            "    move%.w %/d4,%/d2\n"                                    \
576            "    move%.l %/d2,%1\n"                                      \
577            "    add%.l  %/d1,%/d0\n"                                    \
578            "    move%.l %/d0,%0"                                        \
579            : "=g" ((USItype) (xh)),                                     \
580              "=g" ((USItype) (xl))                                      \
581            : "g" ((USItype) (a)),                                       \
582              "g" ((USItype) (b))                                        \
583            : "d0", "d1", "d2", "d3", "d4")
584 #define UMUL_TIME 100
585 #define UDIV_TIME 400
586 #else /* not ColdFire */
587 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
588 #define umul_ppmm(xh, xl, a, b) \
589   __asm__ ("| Inlined umul_ppmm\n"                                      \
590            "    move%.l %2,%/d0\n"                                      \
591            "    move%.l %3,%/d1\n"                                      \
592            "    move%.l %/d0,%/d2\n"                                    \
593            "    swap    %/d0\n"                                         \
594            "    move%.l %/d1,%/d3\n"                                    \
595            "    swap    %/d1\n"                                         \
596            "    move%.w %/d2,%/d4\n"                                    \
597            "    mulu    %/d3,%/d4\n"                                    \
598            "    mulu    %/d1,%/d2\n"                                    \
599            "    mulu    %/d0,%/d3\n"                                    \
600            "    mulu    %/d0,%/d1\n"                                    \
601            "    move%.l %/d4,%/d0\n"                                    \
602            "    eor%.w  %/d0,%/d0\n"                                    \
603            "    swap    %/d0\n"                                         \
604            "    add%.l  %/d0,%/d2\n"                                    \
605            "    add%.l  %/d3,%/d2\n"                                    \
606            "    jcc     1f\n"                                           \
607            "    add%.l  %#65536,%/d1\n"                                 \
608            "1:  swap    %/d2\n"                                         \
609            "    moveq   %#0,%/d0\n"                                     \
610            "    move%.w %/d2,%/d0\n"                                    \
611            "    move%.w %/d4,%/d2\n"                                    \
612            "    move%.l %/d2,%1\n"                                      \
613            "    add%.l  %/d1,%/d0\n"                                    \
614            "    move%.l %/d0,%0"                                        \
615            : "=g" ((USItype) (xh)),                                     \
616              "=g" ((USItype) (xl))                                      \
617            : "g" ((USItype) (a)),                                       \
618              "g" ((USItype) (b))                                        \
619            : "d0", "d1", "d2", "d3", "d4")
620 #define UMUL_TIME 100
621 #define UDIV_TIME 400
622
623 #endif /* not mc68020 */
624
625 /* The '020, '030, '040 and '060 have bitfield insns.
626    cpu32 disguises as a 68020, but lacks them.  */
627 #if defined (__mc68020__) && !defined (__mcpu32__)
628 #define count_leading_zeros(count, x) \
629   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
630            : "=d" ((USItype) (count))                                   \
631            : "od" ((USItype) (x)), "n" (0))
632 /* Some ColdFire architectures have a ff1 instruction supported via
633    __builtin_clz. */
634 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
635 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
636 #define COUNT_LEADING_ZEROS_0 32
637 #endif
638 #endif /* mc68000 */
639
640 #if defined (__m88000__) && W_TYPE_SIZE == 32
641 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
642   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                   \
643            : "=r" ((USItype) (sh)),                                     \
644              "=&r" ((USItype) (sl))                                     \
645            : "%rJ" ((USItype) (ah)),                                    \
646              "rJ" ((USItype) (bh)),                                     \
647              "%rJ" ((USItype) (al)),                                    \
648              "rJ" ((USItype) (bl)))
649 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
650   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                   \
651            : "=r" ((USItype) (sh)),                                     \
652              "=&r" ((USItype) (sl))                                     \
653            : "rJ" ((USItype) (ah)),                                     \
654              "rJ" ((USItype) (bh)),                                     \
655              "rJ" ((USItype) (al)),                                     \
656              "rJ" ((USItype) (bl)))
657 #define count_leading_zeros(count, x) \
658   do {                                                                  \
659     USItype __cbtmp;                                                    \
660     __asm__ ("ff1 %0,%1"                                                \
661              : "=r" (__cbtmp)                                           \
662              : "r" ((USItype) (x)));                                    \
663     (count) = __cbtmp ^ 31;                                             \
664   } while (0)
665 #define COUNT_LEADING_ZEROS_0 63 /* sic */
666 #if defined (__mc88110__)
667 #define umul_ppmm(wh, wl, u, v) \
668   do {                                                                  \
669     union {UDItype __ll;                                                \
670            struct {USItype __h, __l;} __i;                              \
671           } __xx;                                                       \
672     __asm__ ("mulu.d    %0,%1,%2"                                       \
673              : "=r" (__xx.__ll)                                         \
674              : "r" ((USItype) (u)),                                     \
675                "r" ((USItype) (v)));                                    \
676     (wh) = __xx.__i.__h;                                                \
677     (wl) = __xx.__i.__l;                                                \
678   } while (0)
679 #define udiv_qrnnd(q, r, n1, n0, d) \
680   ({union {UDItype __ll;                                                \
681            struct {USItype __h, __l;} __i;                              \
682           } __xx;                                                       \
683   USItype __q;                                                          \
684   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
685   __asm__ ("divu.d %0,%1,%2"                                            \
686            : "=r" (__q)                                                 \
687            : "r" (__xx.__ll),                                           \
688              "r" ((USItype) (d)));                                      \
689   (r) = (n0) - __q * (d); (q) = __q; })
690 #define UMUL_TIME 5
691 #define UDIV_TIME 25
692 #else
693 #define UMUL_TIME 17
694 #define UDIV_TIME 150
695 #endif /* __mc88110__ */
696 #endif /* __m88000__ */
697
698 #if defined (__mips__) && W_TYPE_SIZE == 32
699 #define umul_ppmm(w1, w0, u, v)                                         \
700   do {                                                                  \
701     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
702     (w1) = (USItype) (__x >> 32);                                       \
703     (w0) = (USItype) (__x);                                             \
704   } while (0)
705 #define UMUL_TIME 10
706 #define UDIV_TIME 100
707
708 #if (__mips == 32 || __mips == 64) && ! __mips16
709 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
710 #define COUNT_LEADING_ZEROS_0 32
711 #endif
712 #endif /* __mips__ */
713
714 #if defined (__ns32000__) && W_TYPE_SIZE == 32
715 #define umul_ppmm(w1, w0, u, v) \
716   ({union {UDItype __ll;                                                \
717            struct {USItype __l, __h;} __i;                              \
718           } __xx;                                                       \
719   __asm__ ("meid %2,%0"                                                 \
720            : "=g" (__xx.__ll)                                           \
721            : "%0" ((USItype) (u)),                                      \
722              "g" ((USItype) (v)));                                      \
723   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
724 #define __umulsidi3(u, v) \
725   ({UDItype __w;                                                        \
726     __asm__ ("meid %2,%0"                                               \
727              : "=g" (__w)                                               \
728              : "%0" ((USItype) (u)),                                    \
729                "g" ((USItype) (v)));                                    \
730     __w; })
731 #define udiv_qrnnd(q, r, n1, n0, d) \
732   ({union {UDItype __ll;                                                \
733            struct {USItype __l, __h;} __i;                              \
734           } __xx;                                                       \
735   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
736   __asm__ ("deid %2,%0"                                                 \
737            : "=g" (__xx.__ll)                                           \
738            : "0" (__xx.__ll),                                           \
739              "g" ((USItype) (d)));                                      \
740   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
741 #define count_trailing_zeros(count,x) \
742   do {                                                                  \
743     __asm__ ("ffsd     %2,%0"                                           \
744             : "=r" ((USItype) (count))                                  \
745             : "0" ((USItype) 0),                                        \
746               "r" ((USItype) (x)));                                     \
747   } while (0)
748 #endif /* __ns32000__ */
749
750 /* FIXME: We should test _IBMR2 here when we add assembly support for the
751    system vendor compilers.
752    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
753    enough, since that hits ARM and m68k too.  */
754 #if (defined (_ARCH_PPC)        /* AIX */                               \
755      || defined (_ARCH_PWR)     /* AIX */                               \
756      || defined (_ARCH_COM)     /* AIX */                               \
757      || defined (__powerpc__)   /* gcc */                               \
758      || defined (__POWERPC__)   /* BEOS */                              \
759      || defined (__ppc__)       /* Darwin */                            \
760      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
761      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
762          && CPU_FAMILY == PPC)                                                \
763      ) && W_TYPE_SIZE == 32
764 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
765   do {                                                                  \
766     if (__builtin_constant_p (bh) && (bh) == 0)                         \
767       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
768              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
769     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
770       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
771              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
772     else                                                                \
773       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
774              : "=r" (sh), "=&r" (sl)                                    \
775              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
776   } while (0)
777 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
778   do {                                                                  \
779     if (__builtin_constant_p (ah) && (ah) == 0)                         \
780       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
781                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
782     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)         \
783       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
784                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
785     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
786       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
787                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
788     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
789       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
790                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
791     else                                                                \
792       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
793                : "=r" (sh), "=&r" (sl)                                  \
794                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
795   } while (0)
796 #define count_leading_zeros(count, x) \
797   __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
798 #define COUNT_LEADING_ZEROS_0 32
799 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
800   || defined (__ppc__)                                                    \
801   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
802   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
803          && CPU_FAMILY == PPC)
804 #define umul_ppmm(ph, pl, m0, m1) \
805   do {                                                                  \
806     USItype __m0 = (m0), __m1 = (m1);                                   \
807     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
808     (pl) = __m0 * __m1;                                                 \
809   } while (0)
810 #define UMUL_TIME 15
811 #define smul_ppmm(ph, pl, m0, m1) \
812   do {                                                                  \
813     SItype __m0 = (m0), __m1 = (m1);                                    \
814     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
815     (pl) = __m0 * __m1;                                                 \
816   } while (0)
817 #define SMUL_TIME 14
818 #define UDIV_TIME 120
819 #elif defined (_ARCH_PWR)
820 #define UMUL_TIME 8
821 #define smul_ppmm(xh, xl, m0, m1) \
822   __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
823 #define SMUL_TIME 4
824 #define sdiv_qrnnd(q, r, nh, nl, d) \
825   __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
826 #define UDIV_TIME 100
827 #endif
828 #endif /* 32-bit POWER architecture variants.  */
829
830 /* We should test _IBMR2 here when we add assembly support for the system
831    vendor compilers.  */
832 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
833 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
834   do {                                                                  \
835     if (__builtin_constant_p (bh) && (bh) == 0)                         \
836       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
837              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
838     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
839       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
840              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
841     else                                                                \
842       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
843              : "=r" (sh), "=&r" (sl)                                    \
844              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
845   } while (0)
846 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
847   do {                                                                  \
848     if (__builtin_constant_p (ah) && (ah) == 0)                         \
849       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
850                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
851     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)         \
852       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
853                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
854     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
855       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
856                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
857     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
858       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
859                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
860     else                                                                \
861       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
862                : "=r" (sh), "=&r" (sl)                                  \
863                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
864   } while (0)
865 #define count_leading_zeros(count, x) \
866   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
867 #define COUNT_LEADING_ZEROS_0 64
868 #define umul_ppmm(ph, pl, m0, m1) \
869   do {                                                                  \
870     UDItype __m0 = (m0), __m1 = (m1);                                   \
871     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
872     (pl) = __m0 * __m1;                                                 \
873   } while (0)
874 #define UMUL_TIME 15
875 #define smul_ppmm(ph, pl, m0, m1) \
876   do {                                                                  \
877     DItype __m0 = (m0), __m1 = (m1);                                    \
878     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
879     (pl) = __m0 * __m1;                                                 \
880   } while (0)
881 #define SMUL_TIME 14  /* ??? */
882 #define UDIV_TIME 120 /* ??? */
883 #endif /* 64-bit PowerPC.  */
884
885 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
886 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
887   __asm__ ("a %1,%5\n\tae %0,%3"                                        \
888            : "=r" ((USItype) (sh)),                                     \
889              "=&r" ((USItype) (sl))                                     \
890            : "%0" ((USItype) (ah)),                                     \
891              "r" ((USItype) (bh)),                                      \
892              "%1" ((USItype) (al)),                                     \
893              "r" ((USItype) (bl)))
894 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
895   __asm__ ("s %1,%5\n\tse %0,%3"                                        \
896            : "=r" ((USItype) (sh)),                                     \
897              "=&r" ((USItype) (sl))                                     \
898            : "0" ((USItype) (ah)),                                      \
899              "r" ((USItype) (bh)),                                      \
900              "1" ((USItype) (al)),                                      \
901              "r" ((USItype) (bl)))
902 #define umul_ppmm(ph, pl, m0, m1) \
903   do {                                                                  \
904     USItype __m0 = (m0), __m1 = (m1);                                   \
905     __asm__ (                                                           \
906        "s       r2,r2\n"                                                \
907 "       mts     r10,%2\n"                                               \
908 "       m       r2,%3\n"                                                \
909 "       m       r2,%3\n"                                                \
910 "       m       r2,%3\n"                                                \
911 "       m       r2,%3\n"                                                \
912 "       m       r2,%3\n"                                                \
913 "       m       r2,%3\n"                                                \
914 "       m       r2,%3\n"                                                \
915 "       m       r2,%3\n"                                                \
916 "       m       r2,%3\n"                                                \
917 "       m       r2,%3\n"                                                \
918 "       m       r2,%3\n"                                                \
919 "       m       r2,%3\n"                                                \
920 "       m       r2,%3\n"                                                \
921 "       m       r2,%3\n"                                                \
922 "       m       r2,%3\n"                                                \
923 "       m       r2,%3\n"                                                \
924 "       cas     %0,r2,r0\n"                                             \
925 "       mfs     r10,%1"                                                 \
926              : "=r" ((USItype) (ph)),                                   \
927                "=r" ((USItype) (pl))                                    \
928              : "%r" (__m0),                                             \
929                 "r" (__m1)                                              \
930              : "r2");                                                   \
931     (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
932              + (((SItype) __m1 >> 31) & __m0));                         \
933   } while (0)
934 #define UMUL_TIME 20
935 #define UDIV_TIME 200
936 #define count_leading_zeros(count, x) \
937   do {                                                                  \
938     if ((x) >= 0x10000)                                                 \
939       __asm__ ("clz     %0,%1"                                          \
940                : "=r" ((USItype) (count))                               \
941                : "r" ((USItype) (x) >> 16));                            \
942     else                                                                \
943       {                                                                 \
944         __asm__ ("clz   %0,%1"                                          \
945                  : "=r" ((USItype) (count))                             \
946                  : "r" ((USItype) (x)));                                        \
947         (count) += 16;                                                  \
948       }                                                                 \
949   } while (0)
950 #endif
951
952 #if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
953 #ifndef __sh1__
954 #define umul_ppmm(w1, w0, u, v) \
955   __asm__ (                                                             \
956        "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0       mach,%0"        \
957            : "=r<" ((USItype)(w1)),                                     \
958              "=r<" ((USItype)(w0))                                      \
959            : "r" ((USItype)(u)),                                        \
960              "r" ((USItype)(v))                                         \
961            : "macl", "mach")
962 #define UMUL_TIME 5
963 #endif
964
965 /* This is the same algorithm as __udiv_qrnnd_c.  */
966 #define UDIV_NEEDS_NORMALIZATION 1
967
968 #define udiv_qrnnd(q, r, n1, n0, d) \
969   do {                                                                  \
970     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
971                         __attribute__ ((visibility ("hidden")));        \
972     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
973     __asm__ (                                                           \
974         "mov%M4 %4,r5\n"                                                \
975 "       swap.w %3,r4\n"                                                 \
976 "       swap.w r5,r6\n"                                                 \
977 "       jsr @%5\n"                                                      \
978 "       shll16 r6\n"                                                    \
979 "       swap.w r4,r4\n"                                                 \
980 "       jsr @%5\n"                                                      \
981 "       swap.w r1,%0\n"                                                 \
982 "       or r1,%0"                                                       \
983         : "=r" (q), "=&z" (r)                                           \
984         : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
985         : "r1", "r2", "r4", "r5", "r6", "pr");                          \
986   } while (0)
987
988 #define UDIV_TIME 80
989
990 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
991   __asm__ ("clrt;subc %5,%1; subc %4,%0"                                \
992            : "=r" (sh), "=r" (sl)                                       \
993            : "0" (ah), "1" (al), "r" (bh), "r" (bl))
994
995 #endif /* __sh__ */
996
997 #if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
998 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
999 #define count_leading_zeros(count, x) \
1000   do                                                                    \
1001     {                                                                   \
1002       UDItype x_ = (USItype)(x);                                        \
1003       SItype c_;                                                        \
1004                                                                         \
1005       __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));                    \
1006       (count) = c_ - 31;                                                \
1007     }                                                                   \
1008   while (0)
1009 #define COUNT_LEADING_ZEROS_0 32
1010 #endif
1011
1012 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1013     && W_TYPE_SIZE == 32
1014 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1015   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                          \
1016            : "=r" ((USItype) (sh)),                                     \
1017              "=&r" ((USItype) (sl))                                     \
1018            : "%rJ" ((USItype) (ah)),                                    \
1019              "rI" ((USItype) (bh)),                                     \
1020              "%rJ" ((USItype) (al)),                                    \
1021              "rI" ((USItype) (bl))                                      \
1022            __CLOBBER_CC)
1023 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1024   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                          \
1025            : "=r" ((USItype) (sh)),                                     \
1026              "=&r" ((USItype) (sl))                                     \
1027            : "rJ" ((USItype) (ah)),                                     \
1028              "rI" ((USItype) (bh)),                                     \
1029              "rJ" ((USItype) (al)),                                     \
1030              "rI" ((USItype) (bl))                                      \
1031            __CLOBBER_CC)
1032 #if defined (__sparc_v8__)
1033 #define umul_ppmm(w1, w0, u, v) \
1034   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1035            : "=r" ((USItype) (w1)),                                     \
1036              "=r" ((USItype) (w0))                                      \
1037            : "r" ((USItype) (u)),                                       \
1038              "r" ((USItype) (v)))
1039 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1040   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1041            : "=&r" ((USItype) (__q)),                                   \
1042              "=&r" ((USItype) (__r))                                    \
1043            : "r" ((USItype) (__n1)),                                    \
1044              "r" ((USItype) (__n0)),                                    \
1045              "r" ((USItype) (__d)))
1046 #else
1047 #if defined (__sparclite__)
1048 /* This has hardware multiply but not divide.  It also has two additional
1049    instructions scan (ffs from high bit) and divscc.  */
1050 #define umul_ppmm(w1, w0, u, v) \
1051   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1052            : "=r" ((USItype) (w1)),                                     \
1053              "=r" ((USItype) (w0))                                      \
1054            : "r" ((USItype) (u)),                                       \
1055              "r" ((USItype) (v)))
1056 #define udiv_qrnnd(q, r, n1, n0, d) \
1057   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1058 "       wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
1059 "       tst     %%g0\n"                                                 \
1060 "       divscc  %3,%4,%%g1\n"                                           \
1061 "       divscc  %%g1,%4,%%g1\n"                                         \
1062 "       divscc  %%g1,%4,%%g1\n"                                         \
1063 "       divscc  %%g1,%4,%%g1\n"                                         \
1064 "       divscc  %%g1,%4,%%g1\n"                                         \
1065 "       divscc  %%g1,%4,%%g1\n"                                         \
1066 "       divscc  %%g1,%4,%%g1\n"                                         \
1067 "       divscc  %%g1,%4,%%g1\n"                                         \
1068 "       divscc  %%g1,%4,%%g1\n"                                         \
1069 "       divscc  %%g1,%4,%%g1\n"                                         \
1070 "       divscc  %%g1,%4,%%g1\n"                                         \
1071 "       divscc  %%g1,%4,%%g1\n"                                         \
1072 "       divscc  %%g1,%4,%%g1\n"                                         \
1073 "       divscc  %%g1,%4,%%g1\n"                                         \
1074 "       divscc  %%g1,%4,%%g1\n"                                         \
1075 "       divscc  %%g1,%4,%%g1\n"                                         \
1076 "       divscc  %%g1,%4,%%g1\n"                                         \
1077 "       divscc  %%g1,%4,%%g1\n"                                         \
1078 "       divscc  %%g1,%4,%%g1\n"                                         \
1079 "       divscc  %%g1,%4,%%g1\n"                                         \
1080 "       divscc  %%g1,%4,%%g1\n"                                         \
1081 "       divscc  %%g1,%4,%%g1\n"                                         \
1082 "       divscc  %%g1,%4,%%g1\n"                                         \
1083 "       divscc  %%g1,%4,%%g1\n"                                         \
1084 "       divscc  %%g1,%4,%%g1\n"                                         \
1085 "       divscc  %%g1,%4,%%g1\n"                                         \
1086 "       divscc  %%g1,%4,%%g1\n"                                         \
1087 "       divscc  %%g1,%4,%%g1\n"                                         \
1088 "       divscc  %%g1,%4,%%g1\n"                                         \
1089 "       divscc  %%g1,%4,%%g1\n"                                         \
1090 "       divscc  %%g1,%4,%%g1\n"                                         \
1091 "       divscc  %%g1,%4,%0\n"                                           \
1092 "       rd      %%y,%1\n"                                               \
1093 "       bl,a 1f\n"                                                      \
1094 "       add     %1,%4,%1\n"                                             \
1095 "1:     ! End of inline udiv_qrnnd"                                     \
1096            : "=r" ((USItype) (q)),                                      \
1097              "=r" ((USItype) (r))                                       \
1098            : "r" ((USItype) (n1)),                                      \
1099              "r" ((USItype) (n0)),                                      \
1100              "rI" ((USItype) (d))                                       \
1101            : "g1" __AND_CLOBBER_CC)
1102 #define UDIV_TIME 37
1103 #define count_leading_zeros(count, x) \
1104   do {                                                                  \
1105   __asm__ ("scan %1,1,%0"                                               \
1106            : "=r" ((USItype) (count))                                   \
1107            : "r" ((USItype) (x)));                                      \
1108   } while (0)
1109 /* Early sparclites return 63 for an argument of 0, but they warn that future
1110    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1111    undefined.  */
1112 #else
1113 /* SPARC without integer multiplication and divide instructions.
1114    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1115 #define umul_ppmm(w1, w0, u, v) \
1116   __asm__ ("! Inlined umul_ppmm\n"                                      \
1117 "       wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n"\
1118 "       sra     %3,31,%%o5      ! Don't move this insn\n"               \
1119 "       and     %2,%%o5,%%o5    ! Don't move this insn\n"               \
1120 "       andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
1121 "       mulscc  %%g1,%3,%%g1\n"                                         \
1122 "       mulscc  %%g1,%3,%%g1\n"                                         \
1123 "       mulscc  %%g1,%3,%%g1\n"                                         \
1124 "       mulscc  %%g1,%3,%%g1\n"                                         \
1125 "       mulscc  %%g1,%3,%%g1\n"                                         \
1126 "       mulscc  %%g1,%3,%%g1\n"                                         \
1127 "       mulscc  %%g1,%3,%%g1\n"                                         \
1128 "       mulscc  %%g1,%3,%%g1\n"                                         \
1129 "       mulscc  %%g1,%3,%%g1\n"                                         \
1130 "       mulscc  %%g1,%3,%%g1\n"                                         \
1131 "       mulscc  %%g1,%3,%%g1\n"                                         \
1132 "       mulscc  %%g1,%3,%%g1\n"                                         \
1133 "       mulscc  %%g1,%3,%%g1\n"                                         \
1134 "       mulscc  %%g1,%3,%%g1\n"                                         \
1135 "       mulscc  %%g1,%3,%%g1\n"                                         \
1136 "       mulscc  %%g1,%3,%%g1\n"                                         \
1137 "       mulscc  %%g1,%3,%%g1\n"                                         \
1138 "       mulscc  %%g1,%3,%%g1\n"                                         \
1139 "       mulscc  %%g1,%3,%%g1\n"                                         \
1140 "       mulscc  %%g1,%3,%%g1\n"                                         \
1141 "       mulscc  %%g1,%3,%%g1\n"                                         \
1142 "       mulscc  %%g1,%3,%%g1\n"                                         \
1143 "       mulscc  %%g1,%3,%%g1\n"                                         \
1144 "       mulscc  %%g1,%3,%%g1\n"                                         \
1145 "       mulscc  %%g1,%3,%%g1\n"                                         \
1146 "       mulscc  %%g1,%3,%%g1\n"                                         \
1147 "       mulscc  %%g1,%3,%%g1\n"                                         \
1148 "       mulscc  %%g1,%3,%%g1\n"                                         \
1149 "       mulscc  %%g1,%3,%%g1\n"                                         \
1150 "       mulscc  %%g1,%3,%%g1\n"                                         \
1151 "       mulscc  %%g1,%3,%%g1\n"                                         \
1152 "       mulscc  %%g1,%3,%%g1\n"                                         \
1153 "       mulscc  %%g1,0,%%g1\n"                                          \
1154 "       add     %%g1,%%o5,%0\n"                                         \
1155 "       rd      %%y,%1"                                                 \
1156            : "=r" ((USItype) (w1)),                                     \
1157              "=r" ((USItype) (w0))                                      \
1158            : "%rI" ((USItype) (u)),                                     \
1159              "r" ((USItype) (v))                                                \
1160            : "g1", "o5" __AND_CLOBBER_CC)
1161 #define UMUL_TIME 39            /* 39 instructions */
1162 /* It's quite necessary to add this much assembler for the sparc.
1163    The default udiv_qrnnd (in C) is more than 10 times slower!  */
1164 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1165   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1166 "       mov     32,%%g1\n"                                              \
1167 "       subcc   %1,%2,%%g0\n"                                           \
1168 "1:     bcs     5f\n"                                                   \
1169 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1170 "       sub     %1,%2,%1        ! this kills msb of n\n"                \
1171 "       addx    %1,%1,%1        ! so this can't give carry\n"           \
1172 "       subcc   %%g1,1,%%g1\n"                                          \
1173 "2:     bne     1b\n"                                                   \
1174 "        subcc  %1,%2,%%g0\n"                                           \
1175 "       bcs     3f\n"                                                   \
1176 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1177 "       b       3f\n"                                                   \
1178 "        sub    %1,%2,%1        ! this kills msb of n\n"                \
1179 "4:     sub     %1,%2,%1\n"                                             \
1180 "5:     addxcc  %1,%1,%1\n"                                             \
1181 "       bcc     2b\n"                                                   \
1182 "        subcc  %%g1,1,%%g1\n"                                          \
1183 "! Got carry from n.  Subtract next step to cancel this carry.\n"       \
1184 "       bne     4b\n"                                                   \
1185 "        addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n"      \
1186 "       sub     %1,%2,%1\n"                                             \
1187 "3:     xnor    %0,0,%0\n"                                              \
1188 "       ! End of inline udiv_qrnnd"                                     \
1189            : "=&r" ((USItype) (__q)),                                   \
1190              "=&r" ((USItype) (__r))                                    \
1191            : "r" ((USItype) (__d)),                                     \
1192              "1" ((USItype) (__n1)),                                    \
1193              "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1194 #define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1195 #endif /* __sparclite__ */
1196 #endif /* __sparc_v8__ */
1197 #endif /* sparc32 */
1198
1199 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1200     && W_TYPE_SIZE == 64
1201 #define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
1202   __asm__ ("addcc %r4,%5,%1\n\t"                                        \
1203            "add %r2,%3,%0\n\t"                                          \
1204            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1205            "add %0, 1, %0\n"                                            \
1206            "1:"                                                         \
1207            : "=r" ((UDItype)(sh)),                                      \
1208              "=&r" ((UDItype)(sl))                                      \
1209            : "%rJ" ((UDItype)(ah)),                                     \
1210              "rI" ((UDItype)(bh)),                                      \
1211              "%rJ" ((UDItype)(al)),                                     \
1212              "rI" ((UDItype)(bl))                                       \
1213            __CLOBBER_CC)
1214
1215 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1216   __asm__ ("subcc %r4,%5,%1\n\t"                                        \
1217            "sub %r2,%3,%0\n\t"                                          \
1218            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1219            "sub %0, 1, %0\n\t"                                          \
1220            "1:"                                                         \
1221            : "=r" ((UDItype)(sh)),                                      \
1222              "=&r" ((UDItype)(sl))                                      \
1223            : "rJ" ((UDItype)(ah)),                                      \
1224              "rI" ((UDItype)(bh)),                                      \
1225              "rJ" ((UDItype)(al)),                                      \
1226              "rI" ((UDItype)(bl))                                       \
1227            __CLOBBER_CC)
1228
1229 #define umul_ppmm(wh, wl, u, v)                                         \
1230   do {                                                                  \
1231           UDItype tmp1, tmp2, tmp3, tmp4;                               \
1232           __asm__ __volatile__ (                                        \
1233                    "srl %7,0,%3\n\t"                                    \
1234                    "mulx %3,%6,%1\n\t"                                  \
1235                    "srlx %6,32,%2\n\t"                                  \
1236                    "mulx %2,%3,%4\n\t"                                  \
1237                    "sllx %4,32,%5\n\t"                                  \
1238                    "srl %6,0,%3\n\t"                                    \
1239                    "sub %1,%5,%5\n\t"                                   \
1240                    "srlx %5,32,%5\n\t"                                  \
1241                    "addcc %4,%5,%4\n\t"                                 \
1242                    "srlx %7,32,%5\n\t"                                  \
1243                    "mulx %3,%5,%3\n\t"                                  \
1244                    "mulx %2,%5,%5\n\t"                                  \
1245                    "sethi %%hi(0x80000000),%2\n\t"                      \
1246                    "addcc %4,%3,%4\n\t"                                 \
1247                    "srlx %4,32,%4\n\t"                                  \
1248                    "add %2,%2,%2\n\t"                                   \
1249                    "movcc %%xcc,%%g0,%2\n\t"                            \
1250                    "addcc %5,%4,%5\n\t"                                 \
1251                    "sllx %3,32,%3\n\t"                                  \
1252                    "add %1,%3,%1\n\t"                                   \
1253                    "add %5,%2,%0"                                       \
1254            : "=r" ((UDItype)(wh)),                                      \
1255              "=&r" ((UDItype)(wl)),                                     \
1256              "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)     \
1257            : "r" ((UDItype)(u)),                                        \
1258              "r" ((UDItype)(v))                                         \
1259            __CLOBBER_CC);                                               \
1260   } while (0)
1261 #define UMUL_TIME 96
1262 #define UDIV_TIME 230
1263 #endif /* sparc64 */
1264
1265 #if defined (__vax__) && W_TYPE_SIZE == 32
1266 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1267   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                  \
1268            : "=g" ((USItype) (sh)),                                     \
1269              "=&g" ((USItype) (sl))                                     \
1270            : "%0" ((USItype) (ah)),                                     \
1271              "g" ((USItype) (bh)),                                      \
1272              "%1" ((USItype) (al)),                                     \
1273              "g" ((USItype) (bl)))
1274 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1275   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                  \
1276            : "=g" ((USItype) (sh)),                                     \
1277              "=&g" ((USItype) (sl))                                     \
1278            : "0" ((USItype) (ah)),                                      \
1279              "g" ((USItype) (bh)),                                      \
1280              "1" ((USItype) (al)),                                      \
1281              "g" ((USItype) (bl)))
1282 #define umul_ppmm(xh, xl, m0, m1) \
1283   do {                                                                  \
1284     union {                                                             \
1285         UDItype __ll;                                                   \
1286         struct {USItype __l, __h;} __i;                                 \
1287       } __xx;                                                           \
1288     USItype __m0 = (m0), __m1 = (m1);                                   \
1289     __asm__ ("emul %1,%2,$0,%0"                                         \
1290              : "=r" (__xx.__ll)                                         \
1291              : "g" (__m0),                                              \
1292                "g" (__m1));                                             \
1293     (xh) = __xx.__i.__h;                                                \
1294     (xl) = __xx.__i.__l;                                                \
1295     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1296              + (((SItype) __m1 >> 31) & __m0));                         \
1297   } while (0)
1298 #define sdiv_qrnnd(q, r, n1, n0, d) \
1299   do {                                                                  \
1300     union {DItype __ll;                                                 \
1301            struct {SItype __l, __h;} __i;                               \
1302           } __xx;                                                       \
1303     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1304     __asm__ ("ediv %3,%2,%0,%1"                                         \
1305              : "=g" (q), "=g" (r)                                       \
1306              : "g" (__xx.__ll), "g" (d));                               \
1307   } while (0)
1308 #endif /* __vax__ */
1309
1310 #if defined (__xtensa__) && W_TYPE_SIZE == 32
1311 /* This code is not Xtensa-configuration-specific, so rely on the compiler
1312    to expand builtin functions depending on what configuration features
1313    are available.  This avoids library calls when the operation can be
1314    performed in-line.  */
1315 #define umul_ppmm(w1, w0, u, v)                                         \
1316   do {                                                                  \
1317     DWunion __w;                                                        \
1318     __w.ll = __builtin_umulsidi3 (u, v);                                \
1319     w1 = __w.s.high;                                                    \
1320     w0 = __w.s.low;                                                     \
1321   } while (0)
1322 #define __umulsidi3(u, v)               __builtin_umulsidi3 (u, v)
1323 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
1324 #define count_trailing_zeros(COUNT, X)  ((COUNT) = __builtin_ctz (X))
1325 #endif /* __xtensa__ */
1326
1327 #if defined (__z8000__) && W_TYPE_SIZE == 16
1328 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1329   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1330            : "=r" ((unsigned int)(sh)),                                 \
1331              "=&r" ((unsigned int)(sl))                                 \
1332            : "%0" ((unsigned int)(ah)),                                 \
1333              "r" ((unsigned int)(bh)),                                  \
1334              "%1" ((unsigned int)(al)),                                 \
1335              "rQR" ((unsigned int)(bl)))
1336 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1337   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1338            : "=r" ((unsigned int)(sh)),                                 \
1339              "=&r" ((unsigned int)(sl))                                 \
1340            : "0" ((unsigned int)(ah)),                                  \
1341              "r" ((unsigned int)(bh)),                                  \
1342              "1" ((unsigned int)(al)),                                  \
1343              "rQR" ((unsigned int)(bl)))
1344 #define umul_ppmm(xh, xl, m0, m1) \
1345   do {                                                                  \
1346     union {long int __ll;                                               \
1347            struct {unsigned int __h, __l;} __i;                         \
1348           } __xx;                                                       \
1349     unsigned int __m0 = (m0), __m1 = (m1);                              \
1350     __asm__ ("mult      %S0,%H3"                                        \
1351              : "=r" (__xx.__i.__h),                                     \
1352                "=r" (__xx.__i.__l)                                      \
1353              : "%1" (__m0),                                             \
1354                "rQR" (__m1));                                           \
1355     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1356     (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1357              + (((signed int) __m1 >> 15) & __m0));                     \
1358   } while (0)
1359 #endif /* __z8000__ */
1360
1361 #endif /* __GNUC__ */
1362
1363 /* If this machine has no inline assembler, use C macros.  */
1364
1365 #if !defined (add_ssaaaa)
1366 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1367   do {                                                                  \
1368     UWtype __x;                                                         \
1369     __x = (al) + (bl);                                                  \
1370     (sh) = (ah) + (bh) + (__x < (al));                                  \
1371     (sl) = __x;                                                         \
1372   } while (0)
1373 #endif
1374
1375 #if !defined (sub_ddmmss)
1376 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1377   do {                                                                  \
1378     UWtype __x;                                                         \
1379     __x = (al) - (bl);                                                  \
1380     (sh) = (ah) - (bh) - (__x > (al));                                  \
1381     (sl) = __x;                                                         \
1382   } while (0)
1383 #endif
1384
1385 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1386    smul_ppmm.  */
1387 #if !defined (umul_ppmm) && defined (smul_ppmm)
1388 #define umul_ppmm(w1, w0, u, v)                                         \
1389   do {                                                                  \
1390     UWtype __w1;                                                        \
1391     UWtype __xm0 = (u), __xm1 = (v);                                    \
1392     smul_ppmm (__w1, w0, __xm0, __xm1);                                 \
1393     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)               \
1394                 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
1395   } while (0)
1396 #endif
1397
1398 /* If we still don't have umul_ppmm, define it using plain C.  */
1399 #if !defined (umul_ppmm)
1400 #define umul_ppmm(w1, w0, u, v)                                         \
1401   do {                                                                  \
1402     UWtype __x0, __x1, __x2, __x3;                                      \
1403     UHWtype __ul, __vl, __uh, __vh;                                     \
1404                                                                         \
1405     __ul = __ll_lowpart (u);                                            \
1406     __uh = __ll_highpart (u);                                           \
1407     __vl = __ll_lowpart (v);                                            \
1408     __vh = __ll_highpart (v);                                           \
1409                                                                         \
1410     __x0 = (UWtype) __ul * __vl;                                        \
1411     __x1 = (UWtype) __ul * __vh;                                        \
1412     __x2 = (UWtype) __uh * __vl;                                        \
1413     __x3 = (UWtype) __uh * __vh;                                        \
1414                                                                         \
1415     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1416     __x1 += __x2;               /* but this indeed can */               \
1417     if (__x1 < __x2)            /* did we get it? */                    \
1418       __x3 += __ll_B;           /* yes, add it in the proper pos.  */   \
1419                                                                         \
1420     (w1) = __x3 + __ll_highpart (__x1);                                 \
1421     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);          \
1422   } while (0)
1423 #endif
1424
1425 #if !defined (__umulsidi3)
1426 #define __umulsidi3(u, v) \
1427   ({DWunion __w;                                                        \
1428     umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
1429     __w.ll; })
1430 #endif
1431
1432 /* Define this unconditionally, so it can be used for debugging.  */
1433 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1434   do {                                                                  \
1435     UWtype __d1, __d0, __q1, __q0;                                      \
1436     UWtype __r1, __r0, __m;                                             \
1437     __d1 = __ll_highpart (d);                                           \
1438     __d0 = __ll_lowpart (d);                                            \
1439                                                                         \
1440     __r1 = (n1) % __d1;                                                 \
1441     __q1 = (n1) / __d1;                                                 \
1442     __m = (UWtype) __q1 * __d0;                                         \
1443     __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1444     if (__r1 < __m)                                                     \
1445       {                                                                 \
1446         __q1--, __r1 += (d);                                            \
1447         if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1448           if (__r1 < __m)                                               \
1449             __q1--, __r1 += (d);                                        \
1450       }                                                                 \
1451     __r1 -= __m;                                                        \
1452                                                                         \
1453     __r0 = __r1 % __d1;                                                 \
1454     __q0 = __r1 / __d1;                                                 \
1455     __m = (UWtype) __q0 * __d0;                                         \
1456     __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1457     if (__r0 < __m)                                                     \
1458       {                                                                 \
1459         __q0--, __r0 += (d);                                            \
1460         if (__r0 >= (d))                                                \
1461           if (__r0 < __m)                                               \
1462             __q0--, __r0 += (d);                                        \
1463       }                                                                 \
1464     __r0 -= __m;                                                        \
1465                                                                         \
1466     (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1467     (r) = __r0;                                                         \
1468   } while (0)
1469
1470 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1471    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1472 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1473 #define udiv_qrnnd(q, r, nh, nl, d) \
1474   do {                                                                  \
1475     USItype __r;                                                        \
1476     (q) = __udiv_w_sdiv (&__r, nh, nl, d);                              \
1477     (r) = __r;                                                          \
1478   } while (0)
1479 #endif
1480
1481 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1482 #if !defined (udiv_qrnnd)
1483 #define UDIV_NEEDS_NORMALIZATION 1
1484 #define udiv_qrnnd __udiv_qrnnd_c
1485 #endif
1486
1487 #if !defined (count_leading_zeros)
1488 #define count_leading_zeros(count, x) \
1489   do {                                                                  \
1490     UWtype __xr = (x);                                                  \
1491     UWtype __a;                                                         \
1492                                                                         \
1493     if (W_TYPE_SIZE <= 32)                                              \
1494       {                                                                 \
1495         __a = __xr < ((UWtype)1<<2*__BITS4)                             \
1496           ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)                 \
1497           : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);   \
1498       }                                                                 \
1499     else                                                                \
1500       {                                                                 \
1501         for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
1502           if (((__xr >> __a) & 0xff) != 0)                              \
1503             break;                                                      \
1504       }                                                                 \
1505                                                                         \
1506     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);             \
1507   } while (0)
1508 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1509 #endif
1510
1511 #if !defined (count_trailing_zeros)
1512 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1513    defined in asm, but if it is not, the C version above is good enough.  */
1514 #define count_trailing_zeros(count, x) \
1515   do {                                                                  \
1516     UWtype __ctz_x = (x);                                               \
1517     UWtype __ctz_c;                                                     \
1518     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1519     (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1520   } while (0)
1521 #endif
1522
1523 #ifndef UDIV_NEEDS_NORMALIZATION
1524 #define UDIV_NEEDS_NORMALIZATION 0
1525 #endif