1 /* ieee754-sf.S single-precision floating point support for ARM
3 Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by Nicolas Pitre (nico@cam.org)
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
28 * The goal of this code is to be as fast as possible. This is
29 * not meant to be easy to understand for the casual reader.
31 * Only the default rounding mode is intended for best performances.
32 * Exceptions aren't supported yet, but that can be added quite easily
33 * if necessary without impacting performances.
39 ARM_FUNC_ALIAS aeabi_fneg negsf2
41 eor r0, r0, #0x80000000 @ flip sign bit
49 #ifdef L_arm_addsubsf3
51 ARM_FUNC_START aeabi_frsub
53 eor r0, r0, #0x80000000 @ flip sign bit of first arg
57 ARM_FUNC_ALIAS aeabi_fsub subsf3
59 eor r1, r1, #0x80000000 @ flip sign bit of second arg
60 #if defined(__INTERWORKING_STUBS__)
61 b 1f @ Skip Thumb-code prologue
65 ARM_FUNC_ALIAS aeabi_fadd addsf3
67 1: @ Look for zeroes, equal values, INF, or NAN.
70 COND(mov,s,ne) r3, r1, lsl #1
72 COND(mvn,s,ne) ip, r2, asr #24
73 COND(mvn,s,ne) ip, r3, asr #24
76 @ Compute exponent difference. Make largest exponent in r2,
77 @ corresponding arg in r0, and positive exponent difference in r3.
79 rsbs r3, r2, r3, lsr #24
88 @ If exponent difference is too large, return largest argument
89 @ already in r0. We need up to 25 bit to handle proper rounding
95 @ Convert mantissa to signed integer.
97 orr r0, r0, #0x00800000
98 bic r0, r0, #0xff000000
102 orr r1, r1, #0x00800000
103 bic r1, r1, #0xff000000
107 @ If exponent == difference, one or both args were denormalized.
108 @ Since this is not common case, rescale them off line.
113 @ Compensate for the exponent overlapping the mantissa MSB added later
116 @ Shift and add second arg to first arg in r0.
117 @ Keep leftover bits into r1.
118 shiftop adds r0 r0 r1 asr r3 ip
120 shift1 lsl, r1, r1, r3
122 @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
123 and r3, r0, #0x80000000
125 #if defined(__thumb2__)
127 sbc r0, r0, r0, lsl #1
133 @ Determine how to normalize the result.
140 @ Result needs to be shifted right.
145 @ Make sure we did not bust our exponent.
149 @ Our result is now properly aligned into r0, remaining bits in r1.
150 @ Pack final result together.
151 @ Round with MSB of r1. If halfway between two numbers, round towards
155 adc r0, r0, r2, lsl #23
161 @ Result must be shifted left and exponent adjusted.
169 @ No rounding necessary since r1 will always be 0 at this point.
175 moveq r0, r0, lsl #12
195 shift1 lsl, r0, r0, ip
199 @ Final result with sign
200 @ If exponent negative, denormalize result.
202 addge r0, r0, r2, lsl #23
205 #if defined(__thumb2__)
210 orrlt r0, r3, r0, lsr r2
214 @ Fixup and adjust bit position for denormalized arguments.
215 @ Note that r2 must not remain equal to 0.
218 eor r1, r1, #0x00800000
220 eoreq r0, r0, #0x00800000
230 COND(mvn,s,ne) ip, r3, asr #24
236 @ Result is x + 0.0 = x or 0.0 + y = y.
244 @ Result is x - x = 0.
249 @ Result is x + x = 2x.
254 orrcs r0, r0, #0x80000000
256 2: adds r2, r2, #(2 << 24)
258 addcc r0, r0, #(1 << 23)
260 and r3, r0, #0x80000000
262 @ Overflow: return INF.
264 orr r0, r3, #0x7f000000
265 orr r0, r0, #0x00800000
268 @ At least one of r0/r1 is INF/NAN.
269 @ if r0 != INF/NAN: return r1 (which is INF/NAN)
270 @ if r1 != INF/NAN: return r0 (which is INF/NAN)
271 @ if r0 or r1 is NAN: return NAN
272 @ if opposite sign: return NAN
273 @ otherwise return r0 (which is INF or -INF)
278 COND(mvn,s,eq) r3, r3, asr #24
282 COND(mov,s,eq) r3, r1, lsl #9
284 orrne r0, r0, #0x00400000 @ quiet NAN
293 ARM_FUNC_START floatunsisf
294 ARM_FUNC_ALIAS aeabi_ui2f floatunsisf
299 ARM_FUNC_START floatsisf
300 ARM_FUNC_ALIAS aeabi_i2f floatsisf
302 ands r3, r0, #0x80000000
310 @ Add initial exponent to sign
311 orr r3, r3, #((127 + 23) << 23)
324 ARM_FUNC_START floatundisf
325 ARM_FUNC_ALIAS aeabi_ul2f floatundisf
328 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
339 ARM_FUNC_START floatdisf
340 ARM_FUNC_ALIAS aeabi_l2f floatdisf
343 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
351 ands r3, ah, #0x80000000 @ sign bit in r3
353 #if defined(__thumb2__)
355 sbc ah, ah, ah, lsl #1
361 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
362 @ For hard FPA code we want to return via the tail below so that
363 @ we can return the result in f0 as well as in r0 for backwards
375 @ Add initial exponent to sign
376 orr r3, r3, #((127 + 23 + 32) << 23)
378 subeq r3, r3, #(32 << 23)
379 2: sub r3, r3, #(1 << 23)
386 movhs ip, ip, lsr #16
399 sublo r2, r2, ip, lsr #1
400 subs r2, r2, ip, lsr #3
409 sub r3, r3, r2, lsl #23
412 shiftop add r3 r3 ah lsl r2 ip
413 shift1 lsl, ip, al, r2
416 shiftop adc r0 r3 al lsr r2 r2
422 shift1 lsl, ip, ah, r2
424 orrs al, al, ip, lsl #1
425 shiftop adc r0 r3 ah lsr r2 r2
427 biceq r0, r0, ip, lsr #31
430 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
444 #endif /* L_addsubsf3 */
446 #ifdef L_arm_muldivsf3
448 ARM_FUNC_START mulsf3
449 ARM_FUNC_ALIAS aeabi_fmul mulsf3
451 @ Mask out exponents, trap any zero/denormal/INF/NAN.
453 ands r2, ip, r0, lsr #23
455 COND(and,s,ne) r3, ip, r1, lsr #23
461 @ Add exponents together
464 @ Determine final sign.
467 @ Convert mantissa to unsigned integer.
468 @ If power of two, branch to a separate path.
469 @ Make up for final alignment.
472 COND(mov,s,ne) r1, r1, lsl #9
475 orr r0, r3, r0, lsr #5
476 orr r1, r3, r1, lsr #5
480 @ Put sign bit in r3, which will be restored into r0 later.
481 and r3, ip, #0x80000000
483 @ Well, no way to make it shorter without the umull instruction.
487 bic r0, r0, r4, lsl #16
488 bic r1, r1, r5, lsl #16
493 adds r3, r3, r0, lsl #16
494 adc r1, ip, r0, lsr #16
499 @ The actual multiplication.
502 @ Put final sign in r0.
503 and r0, ip, #0x80000000
507 @ Adjust result upon the MSB position.
511 orrcc r1, r1, r3, lsr #31
514 @ Add sign to result.
517 @ Apply exponent bias, check for under/overflow.
522 @ Round the result, merge final exponent.
524 adc r0, r0, r2, lsl #23
529 @ Multiplication by 0x1p*: let''s shortcut a lot of code.
532 and ip, ip, #0x80000000
535 orr r0, ip, r0, lsr #9
536 orr r0, r0, r1, lsr #9
539 COND(rsb,s,gt) r3, r2, #255
540 orrgt r0, r0, r2, lsl #23
543 @ Under/overflow: fix things up for the code below.
544 orr r0, r0, #0x00800000
552 @ Check if denormalized result is possible, otherwise return signed 0.
555 bicle r0, r0, #0x7fffffff
558 @ Shift value right, round, etc.
561 shift1 lsr, r1, r1, r2
563 shift1 lsl, ip, r0, r2
566 orrs r3, r3, ip, lsl #1
568 biceq r0, r0, ip, lsr #31
571 @ One or both arguments are denormalized.
572 @ Scale them leftwards and preserve sign bit.
575 and ip, r0, #0x80000000
578 tsteq r0, #0x00800000
583 and ip, r1, #0x80000000
586 tsteq r1, #0x00800000
593 @ Isolate the INF and NAN cases away
594 and r3, ip, r1, lsr #23
600 @ Here, one or more arguments are either denormalized or zero.
601 bics ip, r0, #0x80000000
603 COND(bic,s,ne) ip, r1, #0x80000000
606 @ Result is 0, but determine sign anyway.
609 bic r0, r0, #0x7fffffff
612 1: @ One or both args are INF or NAN.
615 teqne r0, #0x80000000
618 teqne r1, #0x80000000
619 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
623 bne LSYM(Lml_n) @ NAN * <anything> -> NAN
629 bne LSYM(Lml_n) @ <anything> * NAN -> NAN
631 @ Result is INF, but we need to determine its sign.
635 @ Overflow: return INF (sign already in r0).
637 and r0, r0, #0x80000000
638 orr r0, r0, #0x7f000000
639 orr r0, r0, #0x00800000
642 @ Return a quiet NAN.
644 orr r0, r0, #0x7f000000
645 orr r0, r0, #0x00c00000
651 ARM_FUNC_START divsf3
652 ARM_FUNC_ALIAS aeabi_fdiv divsf3
654 @ Mask out exponents, trap any zero/denormal/INF/NAN.
656 ands r2, ip, r0, lsr #23
658 COND(and,s,ne) r3, ip, r1, lsr #23
664 @ Substract divisor exponent from dividend''s
667 @ Preserve final sign into ip.
670 @ Convert mantissa to unsigned integer.
671 @ Dividend -> r3, divisor -> r1.
676 orr r1, r3, r1, lsr #4
677 orr r3, r3, r0, lsr #4
679 @ Initialize r0 (result) with final sign bit.
680 and r0, ip, #0x80000000
682 @ Ensure result will land to known bit position.
683 @ Apply exponent bias accordingly.
687 adc r2, r2, #(127 - 2)
689 @ The actual division loop.
697 subcs r3, r3, r1, lsr #1
698 orrcs r0, r0, ip, lsr #1
701 subcs r3, r3, r1, lsr #2
702 orrcs r0, r0, ip, lsr #2
705 subcs r3, r3, r1, lsr #3
706 orrcs r0, r0, ip, lsr #3
709 COND(mov,s,ne) ip, ip, lsr #4
712 @ Check exponent for under/overflow.
716 @ Round the result, merge final exponent.
718 adc r0, r0, r2, lsl #23
723 @ Division by 0x1p*: let''s shortcut a lot of code.
725 and ip, ip, #0x80000000
726 orr r0, ip, r0, lsr #9
729 COND(rsb,s,gt) r3, r2, #255
730 orrgt r0, r0, r2, lsl #23
733 orr r0, r0, #0x00800000
738 @ One or both arguments are denormalized.
739 @ Scale them leftwards and preserve sign bit.
742 and ip, r0, #0x80000000
745 tsteq r0, #0x00800000
750 and ip, r1, #0x80000000
753 tsteq r1, #0x00800000
759 @ One or both arguments are either INF, NAN, zero or denormalized.
761 and r3, ip, r1, lsr #23
765 bne LSYM(Lml_n) @ NAN / <anything> -> NAN
767 bne LSYM(Lml_i) @ INF / <anything> -> INF
769 b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
773 beq LSYM(Lml_z) @ <anything> / INF -> 0
775 b LSYM(Lml_n) @ <anything> / NAN -> NAN
776 2: @ If both are nonzero, we need to normalize and resume above.
777 bics ip, r0, #0x80000000
779 COND(bic,s,ne) ip, r1, #0x80000000
781 @ One or both arguments are zero.
782 bics r2, r0, #0x80000000
783 bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
784 bics r3, r1, #0x80000000
785 bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
786 b LSYM(Lml_n) @ 0 / 0 -> NAN
791 #endif /* L_muldivsf3 */
795 @ The return value in r0 is
797 @ 0 if the operands are equal
798 @ 1 if the first operand is greater than the second, or
799 @ the operands are unordered and the operation is
800 @ CMP, LT, LE, NE, or EQ.
801 @ -1 if the first operand is less than the second, or
802 @ the operands are unordered and the operation is GT
805 @ The Z flag will be set iff the operands are equal.
807 @ The following registers are clobbered by this function:
811 ARM_FUNC_ALIAS gesf2 gtsf2
816 ARM_FUNC_ALIAS lesf2 ltsf2
820 ARM_FUNC_START cmpsf2
821 ARM_FUNC_ALIAS nesf2 cmpsf2
822 ARM_FUNC_ALIAS eqsf2 cmpsf2
823 mov ip, #1 @ how should we specify unordered here?
825 1: str ip, [sp, #-4]!
827 @ Trap any INF/NAN first.
832 COND(mvn,s,ne) ip, r3, asr #24
836 @ Note that 0.0 is equal to -0.0.
838 orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag
840 teqne r0, r1 @ if not 0 compare sign
842 COND(sub,s,pl) r0, r2, r3 @ if same sign compare values, set r0
846 movhi r0, r1, asr #31
848 mvnlo r0, r1, asr #31
854 3: mvns ip, r2, asr #24
858 4: mvns ip, r3, asr #24
861 beq 2b @ r1 is not NAN
862 5: ldr r0, [sp], #4 @ return unordered code.
873 ARM_FUNC_START aeabi_cfrcmple
880 ARM_FUNC_START aeabi_cfcmpeq
881 ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
883 @ The status-returning routines are required to preserve all
884 @ registers except ip, lr, and cpsr.
885 6: do_push {r0, r1, r2, r3, lr}
887 @ Set the Z flag correctly, and the C flag unconditionally.
889 @ Clear the C flag if the return value was -1, indicating
890 @ that the first operand was smaller than the second.
893 RETLDM "r0, r1, r2, r3"
895 FUNC_END aeabi_cfcmple
896 FUNC_END aeabi_cfcmpeq
897 FUNC_END aeabi_cfrcmple
899 ARM_FUNC_START aeabi_fcmpeq
902 ARM_CALL aeabi_cfcmple
904 moveq r0, #1 @ Equal to.
905 movne r0, #0 @ Less than, greater than, or unordered.
908 FUNC_END aeabi_fcmpeq
910 ARM_FUNC_START aeabi_fcmplt
913 ARM_CALL aeabi_cfcmple
915 movcc r0, #1 @ Less than.
916 movcs r0, #0 @ Equal to, greater than, or unordered.
919 FUNC_END aeabi_fcmplt
921 ARM_FUNC_START aeabi_fcmple
924 ARM_CALL aeabi_cfcmple
926 movls r0, #1 @ Less than or equal to.
927 movhi r0, #0 @ Greater than or unordered.
930 FUNC_END aeabi_fcmple
932 ARM_FUNC_START aeabi_fcmpge
935 ARM_CALL aeabi_cfrcmple
937 movls r0, #1 @ Operand 2 is less than or equal to operand 1.
938 movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
941 FUNC_END aeabi_fcmpge
943 ARM_FUNC_START aeabi_fcmpgt
946 ARM_CALL aeabi_cfrcmple
948 movcc r0, #1 @ Operand 2 is less than operand 1.
949 movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
950 @ or they are unordered.
953 FUNC_END aeabi_fcmpgt
955 #endif /* L_cmpsf2 */
957 #ifdef L_arm_unordsf2
959 ARM_FUNC_START unordsf2
960 ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
968 1: mvns ip, r3, asr #24
972 2: mov r0, #0 @ arguments are ordered.
974 3: mov r0, #1 @ arguments are unordered.
977 FUNC_END aeabi_fcmpun
980 #endif /* L_unordsf2 */
984 ARM_FUNC_START fixsfsi
985 ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
987 @ check exponent range.
990 bcc 1f @ value is too small
992 subs r2, r3, r2, lsr #24
993 bls 2f @ value is too large
997 orr r3, r3, #0x80000000
998 tst r0, #0x80000000 @ the sign bit
999 shift1 lsr, r0, r3, r2
1007 2: cmp r2, #(127 + 31 - 0xff)
1011 3: ands r0, r0, #0x80000000 @ the sign bit
1013 moveq r0, #0x7fffffff @ the maximum signed positive si
1016 4: mov r0, #0 @ What should we convert NAN to?
1022 #endif /* L_fixsfsi */
1024 #ifdef L_arm_fixunssfsi
1026 ARM_FUNC_START fixunssfsi
1027 ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
1029 @ check exponent range.
1031 bcs 1f @ value is negative
1032 cmp r2, #(127 << 24)
1033 bcc 1f @ value is too small
1035 subs r2, r3, r2, lsr #24
1036 bmi 2f @ value is too large
1040 orr r3, r3, #0x80000000
1041 shift1 lsr, r0, r3, r2
1047 2: cmp r2, #(127 + 31 - 0xff)
1051 3: mov r0, #0xffffffff @ maximum unsigned si
1054 4: mov r0, #0 @ What should we convert NAN to?
1057 FUNC_END aeabi_f2uiz
1060 #endif /* L_fixunssfsi */