1 @ libgcc routines for ARM cpu.
2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
4 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004
5 Free Software Foundation, Inc.
7 This file is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 In addition to the permissions in the GNU General Public License, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of this file into combinations with other programs,
15 and to distribute those combinations without any restriction coming
16 from the use of this file. (The General Public License restrictions
17 do apply in other respects; for example, they cover modification of
18 the file, and distribution when not linked into a combine
21 This file is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; see the file COPYING. If not, write to
28 the Free Software Foundation, 59 Temple Place - Suite 330,
29 Boston, MA 02111-1307, USA. */
30 /* ------------------------------------------------------------------------ */
32 /* We need to know what prefix to add to function names. */
34 #ifndef __USER_LABEL_PREFIX__
35 #error __USER_LABEL_PREFIX__ not defined
38 /* ANSI concatenation macros. */
40 #define CONCAT1(a, b) CONCAT2(a, b)
41 #define CONCAT2(a, b) a ## b
43 /* Use the right prefix for global labels. */
45 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
49 #define __PLT__ /* Not supported in Thumb assembler (for now). */
53 #define TYPE(x) .type SYM(x),function
54 #define SIZE(x) .size SYM(x), . - SYM(x)
63 /* Function end macros. Variants for interworking. */
65 @ This selects the minimum architecture level required.
66 #define __ARM_ARCH__ 3
68 #if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
69 || defined(__ARM_ARCH_4T__)
70 /* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
71 long multiply instructions. That includes v3M. */
73 # define __ARM_ARCH__ 4
76 #if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
77 || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
78 || defined(__ARM_ARCH_5TEJ__)
80 # define __ARM_ARCH__ 5
83 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__)
85 # define __ARM_ARCH__ 6
88 /* How to return from a function call depends on the architecture variant. */
90 #if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
93 # define RETc(x) bx##x lr
95 # if (__ARM_ARCH__ == 4) \
96 && (defined(__thumb__) || defined(__THUMB_INTERWORK__))
97 # define __INTERWORKING__
102 # define RET mov pc, lr
103 # define RETc(x) mov##x pc, lr
107 /* Don't pass dirn, it's there just to get token pasting right. */
109 .macro RETLDM regs=, cond=, dirn=ia
110 #if defined (__INTERWORKING__)
112 ldr\cond lr, [sp], #4
114 ldm\cond\dirn sp!, {\regs, lr}
119 ldr\cond pc, [sp], #4
121 ldm\cond\dirn sp!, {\regs, pc}
130 bl SYM (__div0) __PLT__
131 mov r0, #0 @ About as wrong as it could be.
140 mov r0, #0 @ About as wrong as it could be.
141 #if defined (__INTERWORKING__)
153 .macro DIV_FUNC_END name
163 .macro THUMB_FUNC_START name
170 /* Function start macros. Variants for ARM and Thumb. */
173 #define THUMB_FUNC .thumb_func
174 #define THUMB_CODE .force_thumb
180 .macro FUNC_START name
190 /* Special function that will always be coded in ARM assembly, even if
191 in Thumb-only compilation. */
193 #if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
194 .macro ARM_FUNC_START name
199 /* A hook to tell gdb that we've switched to ARM mode. Also used to call
200 directly from other local arm routines. */
203 #define EQUIV .thumb_set
204 /* Branch directly to a function declared with ARM_FUNC_START.
205 Must be called in arm mode. */
210 .macro ARM_FUNC_START name
224 .macro ARM_FUNC_ALIAS new old
226 EQUIV SYM (__\new), SYM (__\old)
228 .set SYM (_L__\new), SYM (_L__\old)
233 /* Register aliases. */
235 work .req r4 @ XXXX is this safe ?
249 /* ------------------------------------------------------------------------ */
250 /* Bodies of the division and modulo routines. */
251 /* ------------------------------------------------------------------------ */
252 .macro ARM_DIV_BODY dividend, divisor, result, curbit
254 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
256 clz \curbit, \dividend
257 clz \result, \divisor
258 sub \curbit, \result, \curbit
259 rsbs \curbit, \curbit, #31
260 addne \curbit, \curbit, \curbit, lsl #1
262 addne pc, pc, \curbit, lsl #2
266 .set shift, shift - 1
267 cmp \dividend, \divisor, lsl #shift
268 adc \result, \result, \result
269 subcs \dividend, \dividend, \divisor, lsl #shift
272 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
273 #if __ARM_ARCH__ >= 5
275 clz \curbit, \divisor
276 clz \result, \dividend
277 sub \result, \curbit, \result
279 mov \divisor, \divisor, lsl \result
280 mov \curbit, \curbit, lsl \result
283 #else /* __ARM_ARCH__ < 5 */
285 @ Initially shift the divisor left 3 bits if possible,
286 @ set curbit accordingly. This allows for curbit to be located
287 @ at the left end of each 4 bit nibbles in the division loop
288 @ to save one loop in most cases.
289 tst \divisor, #0xe0000000
290 moveq \divisor, \divisor, lsl #3
294 @ Unless the divisor is very big, shift it up in multiples of
295 @ four bits, since this is the amount of unwinding in the main
296 @ division loop. Continue shifting until the divisor is
297 @ larger than the dividend.
298 1: cmp \divisor, #0x10000000
299 cmplo \divisor, \dividend
300 movlo \divisor, \divisor, lsl #4
301 movlo \curbit, \curbit, lsl #4
304 @ For very big divisors, we must shift it a bit at a time, or
305 @ we will be in danger of overflowing.
306 1: cmp \divisor, #0x80000000
307 cmplo \divisor, \dividend
308 movlo \divisor, \divisor, lsl #1
309 movlo \curbit, \curbit, lsl #1
314 #endif /* __ARM_ARCH__ < 5 */
317 1: cmp \dividend, \divisor
318 subhs \dividend, \dividend, \divisor
319 orrhs \result, \result, \curbit
320 cmp \dividend, \divisor, lsr #1
321 subhs \dividend, \dividend, \divisor, lsr #1
322 orrhs \result, \result, \curbit, lsr #1
323 cmp \dividend, \divisor, lsr #2
324 subhs \dividend, \dividend, \divisor, lsr #2
325 orrhs \result, \result, \curbit, lsr #2
326 cmp \dividend, \divisor, lsr #3
327 subhs \dividend, \dividend, \divisor, lsr #3
328 orrhs \result, \result, \curbit, lsr #3
329 cmp \dividend, #0 @ Early termination?
330 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
331 movne \divisor, \divisor, lsr #4
334 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
337 /* ------------------------------------------------------------------------ */
338 .macro ARM_DIV2_ORDER divisor, order
340 #if __ARM_ARCH__ >= 5
343 rsb \order, \order, #31
347 cmp \divisor, #(1 << 16)
348 movhs \divisor, \divisor, lsr #16
352 cmp \divisor, #(1 << 8)
353 movhs \divisor, \divisor, lsr #8
354 addhs \order, \order, #8
356 cmp \divisor, #(1 << 4)
357 movhs \divisor, \divisor, lsr #4
358 addhs \order, \order, #4
360 cmp \divisor, #(1 << 2)
361 addhi \order, \order, #3
362 addls \order, \order, \divisor, lsr #1
367 /* ------------------------------------------------------------------------ */
368 .macro ARM_MOD_BODY dividend, divisor, order, spare
370 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
373 clz \spare, \dividend
374 sub \order, \order, \spare
375 rsbs \order, \order, #31
376 addne pc, pc, \order, lsl #3
380 .set shift, shift - 1
381 cmp \dividend, \divisor, lsl #shift
382 subcs \dividend, \dividend, \divisor, lsl #shift
385 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
386 #if __ARM_ARCH__ >= 5
389 clz \spare, \dividend
390 sub \order, \order, \spare
391 mov \divisor, \divisor, lsl \order
393 #else /* __ARM_ARCH__ < 5 */
397 @ Unless the divisor is very big, shift it up in multiples of
398 @ four bits, since this is the amount of unwinding in the main
399 @ division loop. Continue shifting until the divisor is
400 @ larger than the dividend.
401 1: cmp \divisor, #0x10000000
402 cmplo \divisor, \dividend
403 movlo \divisor, \divisor, lsl #4
404 addlo \order, \order, #4
407 @ For very big divisors, we must shift it a bit at a time, or
408 @ we will be in danger of overflowing.
409 1: cmp \divisor, #0x80000000
410 cmplo \divisor, \dividend
411 movlo \divisor, \divisor, lsl #1
412 addlo \order, \order, #1
415 #endif /* __ARM_ARCH__ < 5 */
417 @ Perform all needed substractions to keep only the reminder.
418 @ Do comparisons in batch of 4 first.
419 subs \order, \order, #3 @ yes, 3 is intended here
422 1: cmp \dividend, \divisor
423 subhs \dividend, \dividend, \divisor
424 cmp \dividend, \divisor, lsr #1
425 subhs \dividend, \dividend, \divisor, lsr #1
426 cmp \dividend, \divisor, lsr #2
427 subhs \dividend, \dividend, \divisor, lsr #2
428 cmp \dividend, \divisor, lsr #3
429 subhs \dividend, \dividend, \divisor, lsr #3
431 mov \divisor, \divisor, lsr #4
432 subges \order, \order, #4
439 @ Either 1, 2 or 3 comparison/substractions are left.
443 cmp \dividend, \divisor
444 subhs \dividend, \dividend, \divisor
445 mov \divisor, \divisor, lsr #1
446 3: cmp \dividend, \divisor
447 subhs \dividend, \dividend, \divisor
448 mov \divisor, \divisor, lsr #1
449 4: cmp \dividend, \divisor
450 subhs \dividend, \dividend, \divisor
453 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
456 /* ------------------------------------------------------------------------ */
457 .macro THUMB_DIV_MOD_BODY modulo
458 @ Load the constant 0x10000000 into our work register.
462 @ Unless the divisor is very big, shift it up in multiples of
463 @ four bits, since this is the amount of unwinding in the main
464 @ division loop. Continue shifting until the divisor is
465 @ larger than the dividend.
468 cmp divisor, dividend
474 @ Set work to 0x80000000
477 @ For very big divisors, we must shift it a bit at a time, or
478 @ we will be in danger of overflowing.
481 cmp divisor, dividend
487 @ Test for possible subtractions ...
489 @ ... On the final pass, this may subtract too much from the dividend,
490 @ so keep track of which subtractions are done, we can fix them up
493 cmp dividend, divisor
495 sub dividend, dividend, divisor
497 lsr work, divisor, #1
500 sub dividend, dividend, work
507 lsr work, divisor, #2
510 sub dividend, dividend, work
517 lsr work, divisor, #3
520 sub dividend, dividend, work
529 @ ... and note which bits are done in the result. On the final pass,
530 @ this may subtract too much from the dividend, but the result will be ok,
531 @ since the "bit" will have been shifted out at the bottom.
532 cmp dividend, divisor
534 sub dividend, dividend, divisor
535 orr result, result, curbit
537 lsr work, divisor, #1
540 sub dividend, dividend, work
544 lsr work, divisor, #2
547 sub dividend, dividend, work
551 lsr work, divisor, #3
554 sub dividend, dividend, work
560 cmp dividend, #0 @ Early termination?
562 lsr curbit, #4 @ No, any more bits to do?
568 @ Any subtractions that we should not have done will be recorded in
569 @ the top three bits of "overdone". Exactly which were not needed
570 @ are governed by the position of the bit, stored in ip.
574 beq LSYM(Lgot_result)
576 @ If we terminated early, because dividend became zero, then the
577 @ bit in ip will not be in the bottom nibble, and we should not
578 @ perform the additions below. We must test for this though
579 @ (rather relying upon the TSTs to prevent the additions) since
580 @ the bit in ip could be in the top two bits which might then match
581 @ with one of the smaller RORs.
585 beq LSYM(Lgot_result)
592 lsr work, divisor, #3
600 lsr work, divisor, #2
607 beq LSYM(Lgot_result)
608 lsr work, divisor, #1
613 /* ------------------------------------------------------------------------ */
614 /* Start of the Real Functions */
615 /* ------------------------------------------------------------------------ */
628 cmp dividend, divisor
629 blo LSYM(Lgot_result)
637 #else /* ARM version. */
647 ARM_DIV_BODY r0, r1, r2, r3
656 12: ARM_DIV2_ORDER r1, r2
661 #endif /* ARM version */
665 FUNC_START aeabi_uidivmod
674 stmfd sp!, { r0, r1, lr }
676 ldmfd sp!, { r1, r2, lr }
681 FUNC_END aeabi_uidivmod
683 #endif /* L_udivsi3 */
684 /* ------------------------------------------------------------------------ */
694 cmp dividend, divisor
706 #else /* ARM version. */
708 subs r2, r1, #1 @ compare divisor with 1
710 cmpne r0, r1 @ compare dividend with divisor
712 tsthi r1, r2 @ see if divisor is power of 2
716 ARM_MOD_BODY r0, r1, r2, r3
720 #endif /* ARM version. */
724 #endif /* L_umodsi3 */
725 /* ------------------------------------------------------------------------ */
736 eor work, divisor @ Save the sign of the result.
742 neg divisor, divisor @ Loops below use unsigned.
746 neg dividend, dividend
748 cmp dividend, divisor
749 blo LSYM(Lgot_result)
762 #else /* ARM version. */
765 eor ip, r0, r1 @ save the sign of the result.
767 rsbmi r1, r1, #0 @ loops below use unsigned.
768 subs r2, r1, #1 @ division by 1 or -1 ?
771 rsbmi r3, r0, #0 @ positive dividend value
774 tst r1, r2 @ divisor is power of 2 ?
777 ARM_DIV_BODY r3, r1, r0, r2
783 10: teq ip, r0 @ same sign ?
788 moveq r0, ip, asr #31
792 12: ARM_DIV2_ORDER r1, r2
799 #endif /* ARM version */
803 FUNC_START aeabi_idivmod
812 stmfd sp!, { r0, r1, lr }
814 ldmfd sp!, { r1, r2, lr }
819 FUNC_END aeabi_idivmod
821 #endif /* L_divsi3 */
822 /* ------------------------------------------------------------------------ */
833 neg divisor, divisor @ Loops below use unsigned.
836 @ Need to save the sign of the dividend, unfortunately, we need
837 @ work later on. Must do this after saving the original value of
838 @ the work register, because we will pop this value off first.
842 neg dividend, dividend
844 cmp dividend, divisor
845 blo LSYM(Lgot_result)
852 neg dividend, dividend
857 #else /* ARM version. */
861 rsbmi r1, r1, #0 @ loops below use unsigned.
862 movs ip, r0 @ preserve sign of dividend
863 rsbmi r0, r0, #0 @ if negative make positive
864 subs r2, r1, #1 @ compare divisor with 1
865 cmpne r0, r1 @ compare dividend with divisor
867 tsthi r1, r2 @ see if divisor is power of 2
871 ARM_MOD_BODY r0, r1, r2, r3
877 #endif /* ARM version */
881 #endif /* L_modsi3 */
882 /* ------------------------------------------------------------------------ */
886 ARM_FUNC_ALIAS aeabi_idiv0 div0
887 ARM_FUNC_ALIAS aeabi_ldiv0 div0
895 #endif /* L_divmodsi_tools */
896 /* ------------------------------------------------------------------------ */
898 @ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls
900 /* Constants taken from <asm/unistd.h> and <asm/signal.h> */
902 #define __NR_SYSCALL_BASE 0x900000
903 #define __NR_getpid (__NR_SYSCALL_BASE+ 20)
904 #define __NR_kill (__NR_SYSCALL_BASE+ 37)
919 #endif /* L_dvmd_lnx */
920 /* ------------------------------------------------------------------------ */
921 /* Dword shift operations. */
922 /* All the following Dword shift variants rely on the fact that
925 shft xxx, (Reg & 255)
926 so for Reg value in (32...63) and (-1...-31) we will get zero (in the
927 case of logical shifts) or the sign (for asr). */
940 ARM_FUNC_ALIAS aeabi_llsr lshrdi3
960 orrmi al, al, ah, lsl ip
972 ARM_FUNC_ALIAS aeabi_lasr ashrdi3
979 @ If r2 is negative at this point the following step would OR
980 @ the sign bit into all of AL. That's not what we want...
996 orrmi al, al, ah, lsl ip
1009 ARM_FUNC_ALIAS aeabi_llsl ashldi3
1027 movmi ah, ah, lsl r2
1028 movpl ah, al, lsl r3
1029 orrmi ah, ah, al, lsr ip
1038 /* ------------------------------------------------------------------------ */
1039 /* These next two sections are here despite the fact that they contain Thumb
1040 assembler because their presence allows interworked code to be linked even
1041 when the GCC library is this one. */
1043 /* Do not build the interworking functions when the target architecture does
1044 not support Thumb instructions. (This can be a multilib option). */
1045 #if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1046 || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1047 || __ARM_ARCH__ >= 6
1049 #if defined L_call_via_rX
1051 /* These labels & instructions are used by the Arm/Thumb interworking code.
1052 The address of function to be called is loaded into a register and then
1053 one of these labels is called via a BL instruction. This puts the
1054 return address into the link register with the bottom bit set, and the
1055 code here switches to the correct mode before executing the function. */
1061 .macro call_via register
1062 THUMB_FUNC_START _call_via_\register
1067 SIZE (_call_via_\register)
1086 #endif /* L_call_via_rX */
1088 #if defined L_interwork_call_via_rX
1090 /* These labels & instructions are used by the Arm/Thumb interworking code,
1091 when the target address is in an unknown instruction set. The address
1092 of function to be called is loaded into a register and then one of these
1093 labels is called via a BL instruction. This puts the return address
1094 into the link register with the bottom bit set, and the code here
1095 switches to the correct mode before executing the function. Unfortunately
1096 the target code cannot be relied upon to return via a BX instruction, so
1097 instead we have to store the resturn address on the stack and allow the
1098 called function to return here instead. Upon return we recover the real
1099 return address and use a BX to get back to Thumb mode. */
1110 .macro interwork register
1113 THUMB_FUNC_START _interwork_call_via_\register
1119 .globl LSYM(Lchange_\register)
1120 LSYM(Lchange_\register):
1122 streq lr, [sp, #-4]!
1123 adreq lr, _arm_return
1126 SIZE (_interwork_call_via_\register)
1144 /* The LR case has to be handled a little differently... */
1147 THUMB_FUNC_START _interwork_call_via_lr
1158 adreq lr, _arm_return
1161 SIZE (_interwork_call_via_lr)
1163 #endif /* L_interwork_call_via_rX */
1164 #endif /* Arch supports thumb. */
1167 #include "ieee754-df.S"
1168 #include "ieee754-sf.S"
1170 #endif /* __symbian__ */