1 @ libgcc routines for ARM cpu.
2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
4 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004
5 Free Software Foundation, Inc.
7 This file is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 In addition to the permissions in the GNU General Public License, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of this file into combinations with other programs,
15 and to distribute those combinations without any restriction coming
16 from the use of this file. (The General Public License restrictions
17 do apply in other respects; for example, they cover modification of
18 the file, and distribution when not linked into a combine
21 This file is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; see the file COPYING. If not, write to
28 the Free Software Foundation, 59 Temple Place - Suite 330,
29 Boston, MA 02111-1307, USA. */
30 /* ------------------------------------------------------------------------ */
32 /* We need to know what prefix to add to function names. */
34 #ifndef __USER_LABEL_PREFIX__
35 #error __USER_LABEL_PREFIX__ not defined
38 /* ANSI concatenation macros. */
40 #define CONCAT1(a, b) CONCAT2(a, b)
41 #define CONCAT2(a, b) a ## b
43 /* Use the right prefix for global labels. */
45 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
49 #define __PLT__ /* Not supported in Thumb assembler (for now). */
53 #define TYPE(x) .type SYM(x),function
54 #define SIZE(x) .size SYM(x), . - SYM(x)
63 /* Function end macros. Variants for 26 bit APCS and interworking. */
65 @ This selects the minimum architecture level required.
66 #define __ARM_ARCH__ 3
68 #if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
69 || defined(__ARM_ARCH_4T__)
70 /* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
71 long multiply instructions. That includes v3M. */
73 # define __ARM_ARCH__ 4
76 #if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
77 || defined(__ARM_ARCH_5TE__)
79 # define __ARM_ARCH__ 5
82 /* How to return from a function call depends on the architecture variant. */
86 # define RET movs pc, lr
87 # define RETc(x) mov##x##s pc, lr
89 #elif (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
92 # define RETc(x) bx##x lr
94 # if (__ARM_ARCH__ == 4) \
95 && (defined(__thumb__) || defined(__THUMB_INTERWORK__))
96 # define __INTERWORKING__
101 # define RET mov pc, lr
102 # define RETc(x) mov##x pc, lr
106 /* Don't pass dirn, it's there just to get token pasting right. */
108 .macro RETLDM regs=, cond=, dirn=ia
111 ldm\cond\dirn sp!, {pc}^
113 ldm\cond\dirn sp!, {\regs, pc}^
115 #elif defined (__INTERWORKING__)
117 ldr\cond lr, [sp], #4
119 ldm\cond\dirn sp!, {\regs, lr}
124 ldr\cond pc, [sp], #4
126 ldm\cond\dirn sp!, {\regs, pc}
135 bl SYM (__div0) __PLT__
136 mov r0, #0 @ About as wrong as it could be.
145 mov r0, #0 @ About as wrong as it could be.
146 #if defined (__INTERWORKING__)
158 .macro DIV_FUNC_END name
168 .macro THUMB_FUNC_START name
175 /* Function start macros. Variants for ARM and Thumb. */
178 #define THUMB_FUNC .thumb_func
179 #define THUMB_CODE .force_thumb
185 .macro FUNC_START name
195 /* Special function that will always be coded in ARM assembly, even if
196 in Thumb-only compilation. */
198 #if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
199 .macro ARM_FUNC_START name
204 _L__\name: /* A hook to tell gdb that we've switched to ARM */
206 #define EQUIV .thumb_set
208 .macro ARM_FUNC_START name
219 .macro ARM_FUNC_ALIAS new old
221 EQUIV SYM (__\new), SYM (__\old)
225 /* Register aliases. */
227 work .req r4 @ XXXX is this safe ?
241 /* ------------------------------------------------------------------------ */
242 /* Bodies of the division and modulo routines. */
243 /* ------------------------------------------------------------------------ */
244 .macro ARM_DIV_BODY dividend, divisor, result, curbit
246 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
248 clz \curbit, \dividend
249 clz \result, \divisor
250 sub \curbit, \result, \curbit
251 rsbs \curbit, \curbit, #31
252 addne \curbit, \curbit, \curbit, lsl #1
254 addne pc, pc, \curbit, lsl #2
258 .set shift, shift - 1
259 cmp \dividend, \divisor, lsl #shift
260 adc \result, \result, \result
261 subcs \dividend, \dividend, \divisor, lsl #shift
264 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
265 #if __ARM_ARCH__ >= 5
267 clz \curbit, \divisor
268 clz \result, \dividend
269 sub \result, \curbit, \result
271 mov \divisor, \divisor, lsl \result
272 mov \curbit, \curbit, lsl \result
275 #else /* __ARM_ARCH__ < 5 */
277 @ Initially shift the divisor left 3 bits if possible,
278 @ set curbit accordingly. This allows for curbit to be located
279 @ at the left end of each 4 bit nibbles in the division loop
280 @ to save one loop in most cases.
281 tst \divisor, #0xe0000000
282 moveq \divisor, \divisor, lsl #3
286 @ Unless the divisor is very big, shift it up in multiples of
287 @ four bits, since this is the amount of unwinding in the main
288 @ division loop. Continue shifting until the divisor is
289 @ larger than the dividend.
290 1: cmp \divisor, #0x10000000
291 cmplo \divisor, \dividend
292 movlo \divisor, \divisor, lsl #4
293 movlo \curbit, \curbit, lsl #4
296 @ For very big divisors, we must shift it a bit at a time, or
297 @ we will be in danger of overflowing.
298 1: cmp \divisor, #0x80000000
299 cmplo \divisor, \dividend
300 movlo \divisor, \divisor, lsl #1
301 movlo \curbit, \curbit, lsl #1
306 #endif /* __ARM_ARCH__ < 5 */
309 1: cmp \dividend, \divisor
310 subhs \dividend, \dividend, \divisor
311 orrhs \result, \result, \curbit
312 cmp \dividend, \divisor, lsr #1
313 subhs \dividend, \dividend, \divisor, lsr #1
314 orrhs \result, \result, \curbit, lsr #1
315 cmp \dividend, \divisor, lsr #2
316 subhs \dividend, \dividend, \divisor, lsr #2
317 orrhs \result, \result, \curbit, lsr #2
318 cmp \dividend, \divisor, lsr #3
319 subhs \dividend, \dividend, \divisor, lsr #3
320 orrhs \result, \result, \curbit, lsr #3
321 cmp \dividend, #0 @ Early termination?
322 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
323 movne \divisor, \divisor, lsr #4
326 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
329 /* ------------------------------------------------------------------------ */
330 .macro ARM_DIV2_ORDER divisor, order
332 #if __ARM_ARCH__ >= 5
335 rsb \order, \order, #31
339 cmp \divisor, #(1 << 16)
340 movhs \divisor, \divisor, lsr #16
344 cmp \divisor, #(1 << 8)
345 movhs \divisor, \divisor, lsr #8
346 addhs \order, \order, #8
348 cmp \divisor, #(1 << 4)
349 movhs \divisor, \divisor, lsr #4
350 addhs \order, \order, #4
352 cmp \divisor, #(1 << 2)
353 addhi \order, \order, #3
354 addls \order, \order, \divisor, lsr #1
359 /* ------------------------------------------------------------------------ */
360 .macro ARM_MOD_BODY dividend, divisor, order, spare
362 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
365 clz \spare, \dividend
366 sub \order, \order, \spare
367 rsbs \order, \order, #31
368 addne pc, pc, \order, lsl #3
372 .set shift, shift - 1
373 cmp \dividend, \divisor, lsl #shift
374 subcs \dividend, \dividend, \divisor, lsl #shift
377 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
378 #if __ARM_ARCH__ >= 5
381 clz \spare, \dividend
382 sub \order, \order, \spare
383 mov \divisor, \divisor, lsl \order
385 #else /* __ARM_ARCH__ < 5 */
389 @ Unless the divisor is very big, shift it up in multiples of
390 @ four bits, since this is the amount of unwinding in the main
391 @ division loop. Continue shifting until the divisor is
392 @ larger than the dividend.
393 1: cmp \divisor, #0x10000000
394 cmplo \divisor, \dividend
395 movlo \divisor, \divisor, lsl #4
396 addlo \order, \order, #4
399 @ For very big divisors, we must shift it a bit at a time, or
400 @ we will be in danger of overflowing.
401 1: cmp \divisor, #0x80000000
402 cmplo \divisor, \dividend
403 movlo \divisor, \divisor, lsl #1
404 addlo \order, \order, #1
407 #endif /* __ARM_ARCH__ < 5 */
409 @ Perform all needed substractions to keep only the reminder.
410 @ Do comparisons in batch of 4 first.
411 subs \order, \order, #3 @ yes, 3 is intended here
414 1: cmp \dividend, \divisor
415 subhs \dividend, \dividend, \divisor
416 cmp \dividend, \divisor, lsr #1
417 subhs \dividend, \dividend, \divisor, lsr #1
418 cmp \dividend, \divisor, lsr #2
419 subhs \dividend, \dividend, \divisor, lsr #2
420 cmp \dividend, \divisor, lsr #3
421 subhs \dividend, \dividend, \divisor, lsr #3
423 mov \divisor, \divisor, lsr #4
424 subges \order, \order, #4
431 @ Either 1, 2 or 3 comparison/substractions are left.
435 cmp \dividend, \divisor
436 subhs \dividend, \dividend, \divisor
437 mov \divisor, \divisor, lsr #1
438 3: cmp \dividend, \divisor
439 subhs \dividend, \dividend, \divisor
440 mov \divisor, \divisor, lsr #1
441 4: cmp \dividend, \divisor
442 subhs \dividend, \dividend, \divisor
445 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
448 /* ------------------------------------------------------------------------ */
449 .macro THUMB_DIV_MOD_BODY modulo
450 @ Load the constant 0x10000000 into our work register.
454 @ Unless the divisor is very big, shift it up in multiples of
455 @ four bits, since this is the amount of unwinding in the main
456 @ division loop. Continue shifting until the divisor is
457 @ larger than the dividend.
460 cmp divisor, dividend
466 @ Set work to 0x80000000
469 @ For very big divisors, we must shift it a bit at a time, or
470 @ we will be in danger of overflowing.
473 cmp divisor, dividend
479 @ Test for possible subtractions ...
481 @ ... On the final pass, this may subtract too much from the dividend,
482 @ so keep track of which subtractions are done, we can fix them up
485 cmp dividend, divisor
487 sub dividend, dividend, divisor
489 lsr work, divisor, #1
492 sub dividend, dividend, work
499 lsr work, divisor, #2
502 sub dividend, dividend, work
509 lsr work, divisor, #3
512 sub dividend, dividend, work
521 @ ... and note which bits are done in the result. On the final pass,
522 @ this may subtract too much from the dividend, but the result will be ok,
523 @ since the "bit" will have been shifted out at the bottom.
524 cmp dividend, divisor
526 sub dividend, dividend, divisor
527 orr result, result, curbit
529 lsr work, divisor, #1
532 sub dividend, dividend, work
536 lsr work, divisor, #2
539 sub dividend, dividend, work
543 lsr work, divisor, #3
546 sub dividend, dividend, work
552 cmp dividend, #0 @ Early termination?
554 lsr curbit, #4 @ No, any more bits to do?
560 @ Any subtractions that we should not have done will be recorded in
561 @ the top three bits of "overdone". Exactly which were not needed
562 @ are governed by the position of the bit, stored in ip.
566 beq LSYM(Lgot_result)
568 @ If we terminated early, because dividend became zero, then the
569 @ bit in ip will not be in the bottom nibble, and we should not
570 @ perform the additions below. We must test for this though
571 @ (rather relying upon the TSTs to prevent the additions) since
572 @ the bit in ip could be in the top two bits which might then match
573 @ with one of the smaller RORs.
577 beq LSYM(Lgot_result)
584 lsr work, divisor, #3
592 lsr work, divisor, #2
599 beq LSYM(Lgot_result)
600 lsr work, divisor, #1
605 /* ------------------------------------------------------------------------ */
606 /* Start of the Real Functions */
607 /* ------------------------------------------------------------------------ */
620 cmp dividend, divisor
621 blo LSYM(Lgot_result)
629 #else /* ARM version. */
639 ARM_DIV_BODY r0, r1, r2, r3
648 12: ARM_DIV2_ORDER r1, r2
653 #endif /* ARM version */
657 #endif /* L_udivsi3 */
658 /* ------------------------------------------------------------------------ */
668 cmp dividend, divisor
680 #else /* ARM version. */
682 subs r2, r1, #1 @ compare divisor with 1
684 cmpne r0, r1 @ compare dividend with divisor
686 tsthi r1, r2 @ see if divisor is power of 2
690 ARM_MOD_BODY r0, r1, r2, r3
694 #endif /* ARM version. */
698 #endif /* L_umodsi3 */
699 /* ------------------------------------------------------------------------ */
710 eor work, divisor @ Save the sign of the result.
716 neg divisor, divisor @ Loops below use unsigned.
720 neg dividend, dividend
722 cmp dividend, divisor
723 blo LSYM(Lgot_result)
736 #else /* ARM version. */
739 eor ip, r0, r1 @ save the sign of the result.
741 rsbmi r1, r1, #0 @ loops below use unsigned.
742 subs r2, r1, #1 @ division by 1 or -1 ?
745 rsbmi r3, r0, #0 @ positive dividend value
748 tst r1, r2 @ divisor is power of 2 ?
751 ARM_DIV_BODY r3, r1, r0, r2
757 10: teq ip, r0 @ same sign ?
762 moveq r0, ip, asr #31
766 12: ARM_DIV2_ORDER r1, r2
773 #endif /* ARM version */
777 #endif /* L_divsi3 */
778 /* ------------------------------------------------------------------------ */
789 neg divisor, divisor @ Loops below use unsigned.
792 @ Need to save the sign of the dividend, unfortunately, we need
793 @ work later on. Must do this after saving the original value of
794 @ the work register, because we will pop this value off first.
798 neg dividend, dividend
800 cmp dividend, divisor
801 blo LSYM(Lgot_result)
808 neg dividend, dividend
813 #else /* ARM version. */
817 rsbmi r1, r1, #0 @ loops below use unsigned.
818 movs ip, r0 @ preserve sign of dividend
819 rsbmi r0, r0, #0 @ if negative make positive
820 subs r2, r1, #1 @ compare divisor with 1
821 cmpne r0, r1 @ compare dividend with divisor
823 tsthi r1, r2 @ see if divisor is power of 2
827 ARM_MOD_BODY r0, r1, r2, r3
833 #endif /* ARM version */
837 #endif /* L_modsi3 */
838 /* ------------------------------------------------------------------------ */
847 #endif /* L_divmodsi_tools */
848 /* ------------------------------------------------------------------------ */
850 @ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls
852 /* Constants taken from <asm/unistd.h> and <asm/signal.h> */
854 #define __NR_SYSCALL_BASE 0x900000
855 #define __NR_getpid (__NR_SYSCALL_BASE+ 20)
856 #define __NR_kill (__NR_SYSCALL_BASE+ 37)
871 #endif /* L_dvmd_lnx */
872 /* ------------------------------------------------------------------------ */
873 /* These next two sections are here despite the fact that they contain Thumb
874 assembler because their presence allows interworked code to be linked even
875 when the GCC library is this one. */
877 /* Do not build the interworking functions when the target architecture does
878 not support Thumb instructions. (This can be a multilib option). */
879 #if defined L_call_via_rX && (defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__)
881 /* These labels & instructions are used by the Arm/Thumb interworking code.
882 The address of function to be called is loaded into a register and then
883 one of these labels is called via a BL instruction. This puts the
884 return address into the link register with the bottom bit set, and the
885 code here switches to the correct mode before executing the function. */
891 .macro call_via register
892 THUMB_FUNC_START _call_via_\register
897 SIZE (_call_via_\register)
916 #endif /* L_call_via_rX */
917 /* ------------------------------------------------------------------------ */
918 /* Do not build the interworking functions when the target architecture does
919 not support Thumb instructions. (This can be a multilib option). */
920 #if defined L_interwork_call_via_rX && (defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__)
922 /* These labels & instructions are used by the Arm/Thumb interworking code,
923 when the target address is in an unknown instruction set. The address
924 of function to be called is loaded into a register and then one of these
925 labels is called via a BL instruction. This puts the return address
926 into the link register with the bottom bit set, and the code here
927 switches to the correct mode before executing the function. Unfortunately
928 the target code cannot be relied upon to return via a BX instruction, so
929 instead we have to store the resturn address on the stack and allow the
930 called function to return here instead. Upon return we recover the real
931 return address and use a BX to get back to Thumb mode. */
942 .macro interwork register
945 THUMB_FUNC_START _interwork_call_via_\register
951 .globl LSYM(Lchange_\register)
952 LSYM(Lchange_\register):
955 adreq lr, _arm_return
958 SIZE (_interwork_call_via_\register)
976 /* The LR case has to be handled a little differently... */
979 THUMB_FUNC_START _interwork_call_via_lr
990 adreq lr, _arm_return
993 SIZE (_interwork_call_via_lr)
995 #endif /* L_interwork_call_via_rX */
997 #include "ieee754-df.S"
998 #include "ieee754-sf.S"