1 @ libgcc1 routines for ARM cpu.
2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
4 /* Copyright (C) 1995, 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 In addition to the permissions in the GNU General Public License, the
12 Free Software Foundation gives you unlimited permission to link the
13 compiled version of this file with other programs, and to distribute
14 those programs without any restriction coming from the use of this
15 file. (The General Public License restrictions do apply in other
16 respects; for example, they cover modification of the file, and
17 distribution when not linked into another program.)
19 This file is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; see the file COPYING. If not, write to
26 the Free Software Foundation, 59 Temple Place - Suite 330,
27 Boston, MA 02111-1307, USA. */
29 /* As a special exception, if you link this library with other files,
30 some of which are compiled with GCC, to produce an executable,
31 this library does not by itself cause the resulting executable
32 to be covered by the GNU General Public License.
33 This exception does not however invalidate any other reasons why
34 the executable file might be covered by the GNU General Public License. */
37 #define RET movs pc, lr
38 #define RETc(x) mov##x##s pc, lr
41 #define RET mov pc, lr
42 #define RETc(x) mov##x pc, lr
46 #ifndef __USER_LABEL_PREFIX__
47 #error __USER_LABEL_PREFIX__ not defined
50 /* ANSI concatenation macros. */
52 #define CONCAT1(a, b) CONCAT2(a, b)
53 #define CONCAT2(a, b) a ## b
55 /* Use the right prefix for global labels. */
57 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
61 #define __PLT__ /* Not supported in thumb assembler (for now). */
65 #define TYPE(x) .type SYM(x),function
66 #define SIZE(x) .size SYM(x), . - SYM(x)
74 #define THUMB_FUNC .thumb_func
75 #define THUMB_CODE .force_thumb
82 .macro FUNC_START name
92 /* Used for Thumb code. */
93 work .req r4 @ XXXX is this safe ?
116 cmp dividend, divisor
119 @ Load the constant 0x10000000 into our work register
123 @ Unless the divisor is very big, shift it up in multiples of
124 @ four bits, since this is the amount of unwinding in the main
125 @ division loop. Continue shifting until the divisor is
126 @ larger than the dividend.
129 cmp divisor, dividend
136 @ Set work to 0x80000000
139 @ For very big divisors, we must shift it a bit at a time, or
140 @ we will be in danger of overflowing.
143 cmp divisor, dividend
150 @ Test for possible subtractions, and note which bits
151 @ are done in the result. On the final pass, this may subtract
152 @ too much from the dividend, but the result will be ok, since the
153 @ "bit" will have been shifted out at the bottom.
154 cmp dividend, divisor
156 sub dividend, dividend, divisor
157 orr result, result, curbit
159 lsr work, divisor, #1
162 sub dividend, dividend, work
166 lsr work, divisor, #2
169 sub dividend, dividend, work
173 lsr work, divisor, #3
176 sub dividend, dividend, work
180 cmp dividend, #0 @ Early termination?
182 lsr curbit, #4 @ No, any more bits to do?
193 bl SYM (__div0) __PLT__
194 mov r0, #0 @ about as wrong as it could be
197 #else /* arm version */
203 cmp dividend, divisor
206 @ Unless the divisor is very big, shift it up in multiples of
207 @ four bits, since this is the amount of unwinding in the main
208 @ division loop. Continue shifting until the divisor is
209 @ larger than the dividend.
210 cmp divisor, #0x10000000
211 cmpcc divisor, dividend
212 movcc divisor, divisor, lsl #4
213 movcc curbit, curbit, lsl #4
217 @ For very big divisors, we must shift it a bit at a time, or
218 @ we will be in danger of overflowing.
219 cmp divisor, #0x80000000
220 cmpcc divisor, dividend
221 movcc divisor, divisor, lsl #1
222 movcc curbit, curbit, lsl #1
226 @ Test for possible subtractions, and note which bits
227 @ are done in the result. On the final pass, this may subtract
228 @ too much from the dividend, but the result will be ok, since the
229 @ "bit" will have been shifted out at the bottom.
230 cmp dividend, divisor
231 subcs dividend, dividend, divisor
232 orrcs result, result, curbit
233 cmp dividend, divisor, lsr #1
234 subcs dividend, dividend, divisor, lsr #1
235 orrcs result, result, curbit, lsr #1
236 cmp dividend, divisor, lsr #2
237 subcs dividend, dividend, divisor, lsr #2
238 orrcs result, result, curbit, lsr #2
239 cmp dividend, divisor, lsr #3
240 subcs dividend, dividend, divisor, lsr #3
241 orrcs result, result, curbit, lsr #3
242 cmp dividend, #0 @ Early termination?
243 movnes curbit, curbit, lsr #4 @ No, any more bits to do?
244 movne divisor, divisor, lsr #4
252 bl SYM (__div0) __PLT__
253 mov r0, #0 @ about as wrong as it could be
254 ldmia sp!, {pc}RETCOND
256 #endif /* arm version */
260 #endif /* L_udivsi3 */
280 cmp dividend, divisor
285 @ Load the constant 0x10000000 into our work register
290 @ Unless the divisor is very big, shift it up in multiples of
291 @ four bits, since this is the amount of unwinding in the main
292 @ division loop. Continue shifting until the divisor is
293 @ larger than the dividend.
296 cmp divisor, dividend
302 @ Set work to 0x80000000
305 @ For very big divisors, we must shift it a bit at a time, or
306 @ we will be in danger of overflowing.
309 cmp divisor, dividend
315 @ Test for possible subtractions. On the final pass, this may
316 @ subtract too much from the dividend, so keep track of which
317 @ subtractions are done, we can fix them up afterwards...
319 cmp dividend, divisor
321 sub dividend, dividend, divisor
323 lsr work, divisor, #1
326 sub dividend, dividend, work
333 lsr work, divisor, #2
336 sub dividend, dividend, work
343 lsr work, divisor, #3
346 sub dividend, dividend, work
354 cmp dividend, #0 @ Early termination?
356 lsr curbit, #4 @ No, any more bits to do?
361 @ Any subtractions that we should not have done will be recorded in
362 @ the top three bits of "overdone". Exactly which were not needed
363 @ are governed by the position of the bit, stored in ip.
364 @ If we terminated early, because dividend became zero,
365 @ then none of the below will match, since the bit in ip will not be
366 @ in the bottom nibble.
373 RET @ No fixups needed
380 lsr work, divisor, #3
381 add dividend, dividend, work
388 lsr work, divisor, #2
389 add dividend, dividend, work
396 lsr work, divisor, #1
397 add dividend, dividend, work
403 bl SYM (__div0) __PLT__
404 mov r0, #0 @ about as wrong as it could be
407 #else /* arm version */
412 cmp dividend, divisor
415 @ Unless the divisor is very big, shift it up in multiples of
416 @ four bits, since this is the amount of unwinding in the main
417 @ division loop. Continue shifting until the divisor is
418 @ larger than the dividend.
419 cmp divisor, #0x10000000
420 cmpcc divisor, dividend
421 movcc divisor, divisor, lsl #4
422 movcc curbit, curbit, lsl #4
426 @ For very big divisors, we must shift it a bit at a time, or
427 @ we will be in danger of overflowing.
428 cmp divisor, #0x80000000
429 cmpcc divisor, dividend
430 movcc divisor, divisor, lsl #1
431 movcc curbit, curbit, lsl #1
435 @ Test for possible subtractions. On the final pass, this may
436 @ subtract too much from the dividend, so keep track of which
437 @ subtractions are done, we can fix them up afterwards...
439 cmp dividend, divisor
440 subcs dividend, dividend, divisor
441 cmp dividend, divisor, lsr #1
442 subcs dividend, dividend, divisor, lsr #1
443 orrcs overdone, overdone, curbit, ror #1
444 cmp dividend, divisor, lsr #2
445 subcs dividend, dividend, divisor, lsr #2
446 orrcs overdone, overdone, curbit, ror #2
447 cmp dividend, divisor, lsr #3
448 subcs dividend, dividend, divisor, lsr #3
449 orrcs overdone, overdone, curbit, ror #3
451 cmp dividend, #0 @ Early termination?
452 movnes curbit, curbit, lsr #4 @ No, any more bits to do?
453 movne divisor, divisor, lsr #4
456 @ Any subtractions that we should not have done will be recorded in
457 @ the top three bits of "overdone". Exactly which were not needed
458 @ are governed by the position of the bit, stored in ip.
459 @ If we terminated early, because dividend became zero,
460 @ then none of the below will match, since the bit in ip will not be
461 @ in the bottom nibble.
462 ands overdone, overdone, #0xe0000000
463 RETc(eq) @ No fixups needed
464 tst overdone, ip, ror #3
465 addne dividend, dividend, divisor, lsr #3
466 tst overdone, ip, ror #2
467 addne dividend, dividend, divisor, lsr #2
468 tst overdone, ip, ror #1
469 addne dividend, dividend, divisor, lsr #1
474 bl SYM (__div0) __PLT__
475 mov r0, #0 @ about as wrong as it could be
476 ldmia sp!, {pc}RETCOND
478 #endif /* arm version */
482 #endif /* L_umodsi3 */
503 eor work, divisor @ Save the sign of the result.
509 neg divisor, divisor @ Loops below use unsigned.
513 neg dividend, dividend
515 cmp dividend, divisor
521 @ Unless the divisor is very big, shift it up in multiples of
522 @ four bits, since this is the amount of unwinding in the main
523 @ division loop. Continue shifting until the divisor is
524 @ larger than the dividend.
527 cmp divisor, dividend
534 @ For very big divisors, we must shift it a bit at a time, or
535 @ we will be in danger of overflowing.
540 cmp divisor, dividend
547 @ Test for possible subtractions, and note which bits
548 @ are done in the result. On the final pass, this may subtract
549 @ too much from the dividend, but the result will be ok, since the
550 @ "bit" will have been shifted out at the bottom.
551 cmp dividend, divisor
553 sub dividend, dividend, divisor
554 orr result, result, curbit
556 lsr work, divisor, #1
559 sub dividend, dividend, work
563 lsr work, divisor, #2
566 sub dividend, dividend, work
568 orr result, result, work
570 lsr work, divisor, #3
573 sub dividend, dividend, work
575 orr result, result, work
577 cmp dividend, #0 @ Early termination?
579 lsr curbit, #4 @ No, any more bits to do?
596 bl SYM (__div0) __PLT__
597 mov r0, #0 @ about as wrong as it could be
600 #else /* arm version */
602 eor ip, dividend, divisor @ Save the sign of the result.
606 rsbmi divisor, divisor, #0 @ Loops below use unsigned.
609 rsbmi dividend, dividend, #0
610 cmp dividend, divisor
614 @ Unless the divisor is very big, shift it up in multiples of
615 @ four bits, since this is the amount of unwinding in the main
616 @ division loop. Continue shifting until the divisor is
617 @ larger than the dividend.
618 cmp divisor, #0x10000000
619 cmpcc divisor, dividend
620 movcc divisor, divisor, lsl #4
621 movcc curbit, curbit, lsl #4
625 @ For very big divisors, we must shift it a bit at a time, or
626 @ we will be in danger of overflowing.
627 cmp divisor, #0x80000000
628 cmpcc divisor, dividend
629 movcc divisor, divisor, lsl #1
630 movcc curbit, curbit, lsl #1
634 @ Test for possible subtractions, and note which bits
635 @ are done in the result. On the final pass, this may subtract
636 @ too much from the dividend, but the result will be ok, since the
637 @ "bit" will have been shifted out at the bottom.
638 cmp dividend, divisor
639 subcs dividend, dividend, divisor
640 orrcs result, result, curbit
641 cmp dividend, divisor, lsr #1
642 subcs dividend, dividend, divisor, lsr #1
643 orrcs result, result, curbit, lsr #1
644 cmp dividend, divisor, lsr #2
645 subcs dividend, dividend, divisor, lsr #2
646 orrcs result, result, curbit, lsr #2
647 cmp dividend, divisor, lsr #3
648 subcs dividend, dividend, divisor, lsr #3
649 orrcs result, result, curbit, lsr #3
650 cmp dividend, #0 @ Early termination?
651 movnes curbit, curbit, lsr #4 @ No, any more bits to do?
652 movne divisor, divisor, lsr #4
662 bl SYM (__div0) __PLT__
663 mov r0, #0 @ about as wrong as it could be
664 ldmia sp!, {pc}RETCOND
666 #endif /* arm version */
670 #endif /* L_divsi3 */
691 neg divisor, divisor @ Loops below use unsigned.
694 @ Need to save the sign of the dividend, unfortunately, we need
695 @ ip later on. Must do this after saving the original value of
696 @ the work register, because we will pop this value off first.
700 neg dividend, dividend
702 cmp dividend, divisor
707 @ Unless the divisor is very big, shift it up in multiples of
708 @ four bits, since this is the amount of unwinding in the main
709 @ division loop. Continue shifting until the divisor is
710 @ larger than the dividend.
713 cmp divisor, dividend
720 @ Set work to 0x80000000
723 @ For very big divisors, we must shift it a bit at a time, or
724 @ we will be in danger of overflowing.
727 cmp divisor, dividend
734 @ Test for possible subtractions. On the final pass, this may
735 @ subtract too much from the dividend, so keep track of which
736 @ subtractions are done, we can fix them up afterwards...
738 cmp dividend, divisor
740 sub dividend, dividend, divisor
742 lsr work, divisor, #1
745 sub dividend, dividend, work
752 lsr work, divisor, #2
755 sub dividend, dividend, work
762 lsr work, divisor, #3
765 sub dividend, dividend, work
773 cmp dividend, #0 @ Early termination?
775 lsr curbit, #4 @ No, any more bits to do?
781 @ Any subtractions that we should not have done will be recorded in
782 @ the top three bits of "overdone". Exactly which were not needed
783 @ are governed by the position of the bit, stored in ip.
784 @ If we terminated early, because dividend became zero,
785 @ then none of the below will match, since the bit in ip will not be
786 @ in the bottom nibble.
797 lsr work, divisor, #3
798 add dividend, dividend, work
805 lsr work, divisor, #2
806 add dividend, dividend, work
813 lsr work, divisor, #1
814 add dividend, dividend, work
819 neg dividend, dividend
826 bl SYM (__div0) __PLT__
827 mov r0, #0 @ about as wrong as it could be
830 #else /* arm version */
834 rsbmi divisor, divisor, #0 @ Loops below use unsigned.
836 @ Need to save the sign of the dividend, unfortunately, we need
837 @ ip later on; this is faster than pushing lr and using that.
838 str dividend, [sp, #-4]!
840 rsbmi dividend, dividend, #0
841 cmp dividend, divisor
845 @ Unless the divisor is very big, shift it up in multiples of
846 @ four bits, since this is the amount of unwinding in the main
847 @ division loop. Continue shifting until the divisor is
848 @ larger than the dividend.
849 cmp divisor, #0x10000000
850 cmpcc divisor, dividend
851 movcc divisor, divisor, lsl #4
852 movcc curbit, curbit, lsl #4
856 @ For very big divisors, we must shift it a bit at a time, or
857 @ we will be in danger of overflowing.
858 cmp divisor, #0x80000000
859 cmpcc divisor, dividend
860 movcc divisor, divisor, lsl #1
861 movcc curbit, curbit, lsl #1
865 @ Test for possible subtractions. On the final pass, this may
866 @ subtract too much from the dividend, so keep track of which
867 @ subtractions are done, we can fix them up afterwards...
869 cmp dividend, divisor
870 subcs dividend, dividend, divisor
871 cmp dividend, divisor, lsr #1
872 subcs dividend, dividend, divisor, lsr #1
873 orrcs overdone, overdone, curbit, ror #1
874 cmp dividend, divisor, lsr #2
875 subcs dividend, dividend, divisor, lsr #2
876 orrcs overdone, overdone, curbit, ror #2
877 cmp dividend, divisor, lsr #3
878 subcs dividend, dividend, divisor, lsr #3
879 orrcs overdone, overdone, curbit, ror #3
881 cmp dividend, #0 @ Early termination?
882 movnes curbit, curbit, lsr #4 @ No, any more bits to do?
883 movne divisor, divisor, lsr #4
886 @ Any subtractions that we should not have done will be recorded in
887 @ the top three bits of "overdone". Exactly which were not needed
888 @ are governed by the position of the bit, stored in ip.
889 @ If we terminated early, because dividend became zero,
890 @ then none of the below will match, since the bit in ip will not be
891 @ in the bottom nibble.
892 ands overdone, overdone, #0xe0000000
894 tst overdone, ip, ror #3
895 addne dividend, dividend, divisor, lsr #3
896 tst overdone, ip, ror #2
897 addne dividend, dividend, divisor, lsr #2
898 tst overdone, ip, ror #1
899 addne dividend, dividend, divisor, lsr #1
903 rsbmi dividend, dividend, #0
908 bl SYM (__div0) __PLT__
909 mov r0, #0 @ about as wrong as it could be
910 ldmia sp!, {pc}RETCOND
912 #endif /* arm version */
916 #endif /* L_modsi3 */
926 #endif /* L_divmodsi_tools */
929 @ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls
931 #include <asm/unistd.h>
933 #define SIGFPE 8 @ cant use <asm/signal.h> as it
934 @ contains too much C rubbish
940 ldmhsfd sp!, {r1, pc}RETCOND @ not much we can do
943 ldmfd sp!, {r1, pc}RETCOND
947 #endif /* L_dvmd_lnx */
949 /* These next two sections are here despite the fact that they contain Thumb
950 assembler because their presence allows interworked code to be linked even
951 when the GCC library is this one. */
953 /* Do not build the interworking functions when the target cpu
954 is the arm v3 architecture. (This is one of the multilib
956 #if defined L_call_via_rX && ! defined __ARM_ARCH_3__
958 /* These labels & instructions are used by the Arm/Thumb interworking code.
959 The address of function to be called is loaded into a register and then
960 one of these labels is called via a BL instruction. This puts the
961 return address into the link register with the bottom bit set, and the
962 code here switches to the correct mode before executing the function. */
967 .macro call_via register
968 .globl SYM (_call_via_\register)
969 TYPE (_call_via_\register)
971 SYM (_call_via_\register):
975 SIZE (_call_via_\register)
994 #endif /* L_call_via_rX */
996 /* Do not build the interworking functions when the target cpu
997 is the arm v3 architecture. (This is one of the multilib
999 #if defined L_interwork_call_via_rX && ! defined __ARM_ARCH_3__
1001 /* These labels & instructions are used by the Arm/Thumb interworking code,
1002 when the target address is in an unknown instruction set. The address
1003 of function to be called is loaded into a register and then one of these
1004 labels is called via a BL instruction. This puts the return address
1005 into the link register with the bottom bit set, and the code here
1006 switches to the correct mode before executing the function. Unfortunately
1007 the target code cannot be relied upon to return via a BX instruction, so
1008 instead we have to store the resturn address on the stack and allow the
1009 called function to return here instead. Upon return we recover the real
1010 return address and use a BX to get back to Thumb mode. */
1022 .macro interwork register
1024 .globl SYM (_interwork_call_via_\register)
1025 TYPE (_interwork_call_via_\register)
1027 SYM (_interwork_call_via_\register):
1032 .globl .Lchange_\register
1036 adreq lr, _arm_return
1039 SIZE (_interwork_call_via_\register)
1057 /* The lr case has to be handled a little differently...*/
1059 .globl SYM (_interwork_call_via_lr)
1060 TYPE (_interwork_call_via_lr)
1062 SYM (_interwork_call_via_lr):
1072 adreq lr, _arm_return
1075 SIZE (_interwork_call_via_lr)
1077 #endif /* L_interwork_call_via_rX */