/* ieee754-df.S double-precision floating point support for ARM Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. Contributed by Nicolas Pitre (nico@cam.org) This file is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ /* * Notes: * * The goal of this code is to be as fast as possible. This is * not meant to be easy to understand for the casual reader. * For slightly simpler code please see the single precision version * of this file. * * Only the default rounding mode is intended for best performances. * Exceptions aren't supported yet, but that can be added quite easily * if necessary without impacting performances. */ @ For FPA, float words are always big-endian. @ For VFP, floats words follow the memory system mode. #if defined(__VFP_FP__) && !defined(__ARMEB__) #define xl r0 #define xh r1 #define yl r2 #define yh r3 #else #define xh r0 #define xl r1 #define yh r2 #define yl r3 #endif #ifdef L_arm_negdf2 ARM_FUNC_START negdf2 ARM_FUNC_ALIAS aeabi_dneg negdf2 @ flip sign bit eor xh, xh, #0x80000000 RET FUNC_END aeabi_dneg FUNC_END negdf2 #endif #ifdef L_arm_addsubdf3 ARM_FUNC_START aeabi_drsub eor xh, xh, #0x80000000 @ flip sign bit of first arg b 1f ARM_FUNC_START subdf3 ARM_FUNC_ALIAS aeabi_dsub subdf3 eor yh, yh, #0x80000000 @ flip sign bit of second arg #if defined(__INTERWORKING_STUBS__) b 1f @ Skip Thumb-code prologue #endif ARM_FUNC_START adddf3 ARM_FUNC_ALIAS aeabi_dadd adddf3 1: do_push {r4, r5, lr} @ Look for zeroes, equal values, INF, or NAN. shift1 lsl, r4, xh, #1 shift1 lsl, r5, yh, #1 teq r4, r5 do_it eq teqeq xl, yl do_it ne, ttt COND(orr,s,ne) ip, r4, xl COND(orr,s,ne) ip, r5, yl COND(mvn,s,ne) ip, r4, asr #21 COND(mvn,s,ne) ip, r5, asr #21 beq LSYM(Lad_s) @ Compute exponent difference. Make largest exponent in r4, @ corresponding arg in xh-xl, and positive exponent difference in r5. shift1 lsr, r4, r4, #21 rsbs r5, r4, r5, lsr #21 do_it lt rsblt r5, r5, #0 ble 1f add r4, r4, r5 eor yl, xl, yl eor yh, xh, yh eor xl, yl, xl eor xh, yh, xh eor yl, xl, yl eor yh, xh, yh 1: @ If exponent difference is too large, return largest argument @ already in xh-xl. We need up to 54 bit to handle proper rounding @ of 0x1p54 - 1.1. cmp r5, #54 do_it hi RETLDM "r4, r5" hi @ Convert mantissa to signed integer. tst xh, #0x80000000 mov xh, xh, lsl #12 mov ip, #0x00100000 orr xh, ip, xh, lsr #12 beq 1f #if defined(__thumb2__) negs xl, xl sbc xh, xh, xh, lsl #1 #else rsbs xl, xl, #0 rsc xh, xh, #0 #endif 1: tst yh, #0x80000000 mov yh, yh, lsl #12 orr yh, ip, yh, lsr #12 beq 1f #if defined(__thumb2__) negs yl, yl sbc yh, yh, yh, lsl #1 #else rsbs yl, yl, #0 rsc yh, yh, #0 #endif 1: @ If exponent == difference, one or both args were denormalized. @ Since this is not common case, rescale them off line. teq r4, r5 beq LSYM(Lad_d) LSYM(Lad_x): @ Compensate for the exponent overlapping the mantissa MSB added later sub r4, r4, #1 @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip. rsbs lr, r5, #32 blt 1f shift1 lsl, ip, yl, lr shiftop adds xl xl yl lsr r5 yl adc xh, xh, #0 shiftop adds xl xl yh lsl lr yl shiftop adcs xh xh yh asr r5 yh b 2f 1: sub r5, r5, #32 add lr, lr, #32 cmp yl, #1 shift1 lsl,ip, yh, lr do_it cs orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later shiftop adds xl xl yh asr r5 yh adcs xh, xh, yh, asr #31 2: @ We now have a result in xh-xl-ip. @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above) and r5, xh, #0x80000000 bpl LSYM(Lad_p) #if defined(__thumb2__) mov lr, #0 negs ip, ip sbcs xl, lr, xl sbc xh, lr, xh #else rsbs ip, ip, #0 rscs xl, xl, #0 rsc xh, xh, #0 #endif @ Determine how to normalize the result. LSYM(Lad_p): cmp xh, #0x00100000 bcc LSYM(Lad_a) cmp xh, #0x00200000 bcc LSYM(Lad_e) @ Result needs to be shifted right. movs xh, xh, lsr #1 movs xl, xl, rrx mov ip, ip, rrx add r4, r4, #1 @ Make sure we did not bust our exponent. mov r2, r4, lsl #21 cmn r2, #(2 << 21) bcs LSYM(Lad_o) @ Our result is now properly aligned into xh-xl, remaining bits in ip. @ Round with MSB of ip. If halfway between two numbers, round towards @ LSB of xl = 0. @ Pack final result together. LSYM(Lad_e): cmp ip, #0x80000000 do_it eq COND(mov,s,eq) ip, xl, lsr #1 adcs xl, xl, #0 adc xh, xh, r4, lsl #20 orr xh, xh, r5 RETLDM "r4, r5" @ Result must be shifted left and exponent adjusted. LSYM(Lad_a): movs ip, ip, lsl #1 adcs xl, xl, xl adc xh, xh, xh tst xh, #0x00100000 sub r4, r4, #1 bne LSYM(Lad_e) @ No rounding necessary since ip will always be 0 at this point. LSYM(Lad_l): #if __ARM_ARCH__ < 5 teq xh, #0 movne r3, #20 moveq r3, #52 moveq xh, xl moveq xl, #0 mov r2, xh cmp r2, #(1 << 16) movhs r2, r2, lsr #16 subhs r3, r3, #16 cmp r2, #(1 << 8) movhs r2, r2, lsr #8 subhs r3, r3, #8 cmp r2, #(1 << 4) movhs r2, r2, lsr #4 subhs r3, r3, #4 cmp r2, #(1 << 2) subhs r3, r3, #2 sublo r3, r3, r2, lsr #1 sub r3, r3, r2, lsr #3 #else teq xh, #0 do_it eq, t moveq xh, xl moveq xl, #0 clz r3, xh do_it eq addeq r3, r3, #32 sub r3, r3, #11 #endif @ determine how to shift the value. subs r2, r3, #32 bge 2f adds r2, r2, #12 ble 1f @ shift value left 21 to 31 bits, or actually right 11 to 1 bits @ since a register switch happened above. add ip, r2, #20 rsb r2, r2, #12 shift1 lsl, xl, xh, ip shift1 lsr, xh, xh, r2 b 3f @ actually shift value left 1 to 20 bits, which might also represent @ 32 to 52 bits if counting the register switch that happened earlier. 1: add r2, r2, #20 2: do_it le rsble ip, r2, #32 shift1 lsl, xh, xh, r2 #if defined(__thumb2__) lsr ip, xl, ip itt le orrle xh, xh, ip lslle xl, xl, r2 #else orrle xh, xh, xl, lsr ip movle xl, xl, lsl r2 #endif @ adjust exponent accordingly. 3: subs r4, r4, r3 do_it ge, tt addge xh, xh, r4, lsl #20 orrge xh, xh, r5 RETLDM "r4, r5" ge @ Exponent too small, denormalize result. @ Find out proper shift value. mvn r4, r4 subs r4, r4, #31 bge 2f adds r4, r4, #12 bgt 1f @ shift result right of 1 to 20 bits, sign is in r5. add r4, r4, #20 rsb r2, r4, #32 shift1 lsr, xl, xl, r4 shiftop orr xl xl xh lsl r2 yh shiftop orr xh r5 xh lsr r4 yh RETLDM "r4, r5" @ shift result right of 21 to 31 bits, or left 11 to 1 bits after @ a register switch from xh to xl. 1: rsb r4, r4, #12 rsb r2, r4, #32 shift1 lsr, xl, xl, r2 shiftop orr xl xl xh lsl r4 yh mov xh, r5 RETLDM "r4, r5" @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch @ from xh to xl. 2: shift1 lsr, xl, xh, r4 mov xh, r5 RETLDM "r4, r5" @ Adjust exponents for denormalized arguments. @ Note that r4 must not remain equal to 0. LSYM(Lad_d): teq r4, #0 eor yh, yh, #0x00100000 do_it eq, te eoreq xh, xh, #0x00100000 addeq r4, r4, #1 subne r5, r5, #1 b LSYM(Lad_x) LSYM(Lad_s): mvns ip, r4, asr #21 do_it ne COND(mvn,s,ne) ip, r5, asr #21 beq LSYM(Lad_i) teq r4, r5 do_it eq teqeq xl, yl beq 1f @ Result is x + 0.0 = x or 0.0 + y = y. orrs ip, r4, xl do_it eq, t moveq xh, yh moveq xl, yl RETLDM "r4, r5" 1: teq xh, yh @ Result is x - x = 0. do_it ne, tt movne xh, #0 movne xl, #0 RETLDM "r4, r5" ne @ Result is x + x = 2x. movs ip, r4, lsr #21 bne 2f movs xl, xl, lsl #1 adcs xh, xh, xh do_it cs orrcs xh, xh, #0x80000000 RETLDM "r4, r5" 2: adds r4, r4, #(2 << 21) do_it cc, t addcc xh, xh, #(1 << 20) RETLDM "r4, r5" cc and r5, xh, #0x80000000 @ Overflow: return INF. LSYM(Lad_o): orr xh, r5, #0x7f000000 orr xh, xh, #0x00f00000 mov xl, #0 RETLDM "r4, r5" @ At least one of x or y is INF/NAN. @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN) @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN) @ if either is NAN: return NAN @ if opposite sign: return NAN @ otherwise return xh-xl (which is INF or -INF) LSYM(Lad_i): mvns ip, r4, asr #21 do_it ne, te movne xh, yh movne xl, yl COND(mvn,s,eq) ip, r5, asr #21 do_it ne, t movne yh, xh movne yl, xl orrs r4, xl, xh, lsl #12 do_it eq, te COND(orr,s,eq) r5, yl, yh, lsl #12 teqeq xh, yh orrne xh, xh, #0x00080000 @ quiet NAN RETLDM "r4, r5" FUNC_END aeabi_dsub FUNC_END subdf3 FUNC_END aeabi_dadd FUNC_END adddf3 ARM_FUNC_START floatunsidf ARM_FUNC_ALIAS aeabi_ui2d floatunsidf teq r0, #0 do_it eq, t moveq r1, #0 RETc(eq) do_push {r4, r5, lr} mov r4, #0x400 @ initial exponent add r4, r4, #(52-1 - 1) mov r5, #0 @ sign bit is 0 .ifnc xl, r0 mov xl, r0 .endif mov xh, #0 b LSYM(Lad_l) FUNC_END aeabi_ui2d FUNC_END floatunsidf ARM_FUNC_START floatsidf ARM_FUNC_ALIAS aeabi_i2d floatsidf teq r0, #0 do_it eq, t moveq r1, #0 RETc(eq) do_push {r4, r5, lr} mov r4, #0x400 @ initial exponent add r4, r4, #(52-1 - 1) ands r5, r0, #0x80000000 @ sign bit in r5 do_it mi rsbmi r0, r0, #0 @ absolute value .ifnc xl, r0 mov xl, r0 .endif mov xh, #0 b LSYM(Lad_l) FUNC_END aeabi_i2d FUNC_END floatsidf ARM_FUNC_START extendsfdf2 ARM_FUNC_ALIAS aeabi_f2d extendsfdf2 movs r2, r0, lsl #1 @ toss sign bit mov xh, r2, asr #3 @ stretch exponent mov xh, xh, rrx @ retrieve sign bit mov xl, r2, lsl #28 @ retrieve remaining bits do_it ne, ttt COND(and,s,ne) r3, r2, #0xff000000 @ isolate exponent teqne r3, #0xff000000 @ if not 0, check if INF or NAN eorne xh, xh, #0x38000000 @ fixup exponent otherwise. RETc(ne) @ and return it. teq r2, #0 @ if actually 0 do_it ne, e teqne r3, #0xff000000 @ or INF or NAN RETc(eq) @ we are done already. @ value was denormalized. We can normalize it now. do_push {r4, r5, lr} mov r4, #0x380 @ setup corresponding exponent and r5, xh, #0x80000000 @ move sign bit in r5 bic xh, xh, #0x80000000 b LSYM(Lad_l) FUNC_END aeabi_f2d FUNC_END extendsfdf2 ARM_FUNC_START floatundidf ARM_FUNC_ALIAS aeabi_ul2d floatundidf orrs r2, r0, r1 #if !defined (__VFP_FP__) && !defined(__SOFTFP__) do_it eq, t mvfeqd f0, #0.0 #else do_it eq #endif RETc(eq) #if !defined (__VFP_FP__) && !defined(__SOFTFP__) @ For hard FPA code we want to return via the tail below so that @ we can return the result in f0 as well as in r0/r1 for backwards @ compatibility. adr ip, LSYM(f0_ret) @ Push pc as well so that RETLDM works correctly. do_push {r4, r5, ip, lr, pc} #else do_push {r4, r5, lr} #endif mov r5, #0 b 2f ARM_FUNC_START floatdidf ARM_FUNC_ALIAS aeabi_l2d floatdidf orrs r2, r0, r1 #if !defined (__VFP_FP__) && !defined(__SOFTFP__) do_it eq, t mvfeqd f0, #0.0 #else do_it eq #endif RETc(eq) #if !defined (__VFP_FP__) && !defined(__SOFTFP__) @ For hard FPA code we want to return via the tail below so that @ we can return the result in f0 as well as in r0/r1 for backwards @ compatibility. adr ip, LSYM(f0_ret) @ Push pc as well so that RETLDM works correctly. do_push {r4, r5, ip, lr, pc} #else do_push {r4, r5, lr} #endif ands r5, ah, #0x80000000 @ sign bit in r5 bpl 2f #if defined(__thumb2__) negs al, al sbc ah, ah, ah, lsl #1 #else rsbs al, al, #0 rsc ah, ah, #0 #endif 2: mov r4, #0x400 @ initial exponent add r4, r4, #(52-1 - 1) @ FPA little-endian: must swap the word order. .ifnc xh, ah mov ip, al mov xh, ah mov xl, ip .endif movs ip, xh, lsr #22 beq LSYM(Lad_p) @ The value is too big. Scale it down a bit... mov r2, #3 movs ip, ip, lsr #3 do_it ne addne r2, r2, #3 movs ip, ip, lsr #3 do_it ne addne r2, r2, #3 add r2, r2, ip, lsr #3 rsb r3, r2, #32 shift1 lsl, ip, xl, r3 shift1 lsr, xl, xl, r2 shiftop orr xl xl xh lsl r3 lr shift1 lsr, xh, xh, r2 add r4, r4, r2 b LSYM(Lad_p) #if !defined (__VFP_FP__) && !defined(__SOFTFP__) @ Legacy code expects the result to be returned in f0. Copy it @ there as well. LSYM(f0_ret): do_push {r0, r1} ldfd f0, [sp], #8 RETLDM #endif FUNC_END floatdidf FUNC_END aeabi_l2d FUNC_END floatundidf FUNC_END aeabi_ul2d #endif /* L_addsubdf3 */ #ifdef L_arm_muldivdf3 ARM_FUNC_START muldf3 ARM_FUNC_ALIAS aeabi_dmul muldf3 do_push {r4, r5, r6, lr} @ Mask out exponents, trap any zero/denormal/INF/NAN. mov ip, #0xff orr ip, ip, #0x700 ands r4, ip, xh, lsr #20 do_it ne, tte COND(and,s,ne) r5, ip, yh, lsr #20 teqne r4, ip teqne r5, ip bleq LSYM(Lml_s) @ Add exponents together add r4, r4, r5 @ Determine final sign. eor r6, xh, yh @ Convert mantissa to unsigned integer. @ If power of two, branch to a separate path. bic xh, xh, ip, lsl #21 bic yh, yh, ip, lsl #21 orrs r5, xl, xh, lsl #12 do_it ne COND(orr,s,ne) r5, yl, yh, lsl #12 orr xh, xh, #0x00100000 orr yh, yh, #0x00100000 beq LSYM(Lml_1) #if __ARM_ARCH__ < 4 @ Put sign bit in r6, which will be restored in yl later. and r6, r6, #0x80000000 @ Well, no way to make it shorter without the umull instruction. stmfd sp!, {r6, r7, r8, r9, sl, fp} mov r7, xl, lsr #16 mov r8, yl, lsr #16 mov r9, xh, lsr #16 mov sl, yh, lsr #16 bic xl, xl, r7, lsl #16 bic yl, yl, r8, lsl #16 bic xh, xh, r9, lsl #16 bic yh, yh, sl, lsl #16 mul ip, xl, yl mul fp, xl, r8 mov lr, #0 adds ip, ip, fp, lsl #16 adc lr, lr, fp, lsr #16 mul fp, r7, yl adds ip, ip, fp, lsl #16 adc lr, lr, fp, lsr #16 mul fp, xl, sl mov r5, #0 adds lr, lr, fp, lsl #16 adc r5, r5, fp, lsr #16 mul fp, r7, yh adds lr, lr, fp, lsl #16 adc r5, r5, fp, lsr #16 mul fp, xh, r8 adds lr, lr, fp, lsl #16 adc r5, r5, fp, lsr #16 mul fp, r9, yl adds lr, lr, fp, lsl #16 adc r5, r5, fp, lsr #16 mul fp, xh, sl mul r6, r9, sl adds r5, r5, fp, lsl #16 adc r6, r6, fp, lsr #16 mul fp, r9, yh adds r5, r5, fp, lsl #16 adc r6, r6, fp, lsr #16 mul fp, xl, yh adds lr, lr, fp mul fp, r7, sl adcs r5, r5, fp mul fp, xh, yl adc r6, r6, #0 adds lr, lr, fp mul fp, r9, r8 adcs r5, r5, fp mul fp, r7, r8 adc r6, r6, #0 adds lr, lr, fp mul fp, xh, yh adcs r5, r5, fp adc r6, r6, #0 ldmfd sp!, {yl, r7, r8, r9, sl, fp} #else @ Here is the actual multiplication. umull ip, lr, xl, yl mov r5, #0 umlal lr, r5, xh, yl and yl, r6, #0x80000000 umlal lr, r5, xl, yh mov r6, #0 umlal r5, r6, xh, yh #endif @ The LSBs in ip are only significant for the final rounding. @ Fold them into lr. teq ip, #0 do_it ne orrne lr, lr, #1 @ Adjust result upon the MSB position. sub r4, r4, #0xff cmp r6, #(1 << (20-11)) sbc r4, r4, #0x300 bcs 1f movs lr, lr, lsl #1 adcs r5, r5, r5 adc r6, r6, r6 1: @ Shift to final position, add sign to result. orr xh, yl, r6, lsl #11 orr xh, xh, r5, lsr #21 mov xl, r5, lsl #11 orr xl, xl, lr, lsr #21 mov lr, lr, lsl #11 @ Check exponent range for under/overflow. subs ip, r4, #(254 - 1) do_it hi cmphi ip, #0x700 bhi LSYM(Lml_u) @ Round the result, merge final exponent. cmp lr, #0x80000000 do_it eq COND(mov,s,eq) lr, xl, lsr #1 adcs xl, xl, #0 adc xh, xh, r4, lsl #20 RETLDM "r4, r5, r6" @ Multiplication by 0x1p*: let''s shortcut a lot of code. LSYM(Lml_1): and r6, r6, #0x80000000 orr xh, r6, xh orr xl, xl, yl eor xh, xh, yh subs r4, r4, ip, lsr #1 do_it gt, tt COND(rsb,s,gt) r5, r4, ip orrgt xh, xh, r4, lsl #20 RETLDM "r4, r5, r6" gt @ Under/overflow: fix things up for the code below. orr xh, xh, #0x00100000 mov lr, #0 subs r4, r4, #1 LSYM(Lml_u): @ Overflow? bgt LSYM(Lml_o) @ Check if denormalized result is possible, otherwise return signed 0. cmn r4, #(53 + 1) do_it le, tt movle xl, #0 bicle xh, xh, #0x7fffffff RETLDM "r4, r5, r6" le @ Find out proper shift value. rsb r4, r4, #0 subs r4, r4, #32 bge 2f adds r4, r4, #12 bgt 1f @ shift result right of 1 to 20 bits, preserve sign bit, round, etc. add r4, r4, #20 rsb r5, r4, #32 shift1 lsl, r3, xl, r5 shift1 lsr, xl, xl, r4 shiftop orr xl xl xh lsl r5 r2 and r2, xh, #0x80000000 bic xh, xh, #0x80000000 adds xl, xl, r3, lsr #31 shiftop adc xh r2 xh lsr r4 r6 orrs lr, lr, r3, lsl #1 do_it eq biceq xl, xl, r3, lsr #31 RETLDM "r4, r5, r6" @ shift result right of 21 to 31 bits, or left 11 to 1 bits after @ a register switch from xh to xl. Then round. 1: rsb r4, r4, #12 rsb r5, r4, #32 shift1 lsl, r3, xl, r4 shift1 lsr, xl, xl, r5 shiftop orr xl xl xh lsl r4 r2 bic xh, xh, #0x7fffffff adds xl, xl, r3, lsr #31 adc xh, xh, #0 orrs lr, lr, r3, lsl #1 do_it eq biceq xl, xl, r3, lsr #31 RETLDM "r4, r5, r6" @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch @ from xh to xl. Leftover bits are in r3-r6-lr for rounding. 2: rsb r5, r4, #32 shiftop orr lr lr xl lsl r5 r2 shift1 lsr, r3, xl, r4 shiftop orr r3 r3 xh lsl r5 r2 shift1 lsr, xl, xh, r4 bic xh, xh, #0x7fffffff shiftop bic xl xl xh lsr r4 r2 add xl, xl, r3, lsr #31 orrs lr, lr, r3, lsl #1 do_it eq biceq xl, xl, r3, lsr #31 RETLDM "r4, r5, r6" @ One or both arguments are denormalized. @ Scale them leftwards and preserve sign bit. LSYM(Lml_d): teq r4, #0 bne 2f and r6, xh, #0x80000000 1: movs xl, xl, lsl #1 adc xh, xh, xh tst xh, #0x00100000 do_it eq subeq r4, r4, #1 beq 1b orr xh, xh, r6 teq r5, #0 do_it ne RETc(ne) 2: and r6, yh, #0x80000000 3: movs yl, yl, lsl #1 adc yh, yh, yh tst yh, #0x00100000 do_it eq subeq r5, r5, #1 beq 3b orr yh, yh, r6 RET LSYM(Lml_s): @ Isolate the INF and NAN cases away teq r4, ip and r5, ip, yh, lsr #20 do_it ne teqne r5, ip beq 1f @ Here, one or more arguments are either denormalized or zero. orrs r6, xl, xh, lsl #1 do_it ne COND(orr,s,ne) r6, yl, yh, lsl #1 bne LSYM(Lml_d) @ Result is 0, but determine sign anyway. LSYM(Lml_z): eor xh, xh, yh and xh, xh, #0x80000000 mov xl, #0 RETLDM "r4, r5, r6" 1: @ One or both args are INF or NAN. orrs r6, xl, xh, lsl #1 do_it eq, te moveq xl, yl moveq xh, yh COND(orr,s,ne) r6, yl, yh, lsl #1 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN teq r4, ip bne 1f orrs r6, xl, xh, lsl #12 bne LSYM(Lml_n) @ NAN * -> NAN 1: teq r5, ip bne LSYM(Lml_i) orrs r6, yl, yh, lsl #12 do_it ne, t movne xl, yl movne xh, yh bne LSYM(Lml_n) @ * NAN -> NAN @ Result is INF, but we need to determine its sign. LSYM(Lml_i): eor xh, xh, yh @ Overflow: return INF (sign already in xh). LSYM(Lml_o): and xh, xh, #0x80000000 orr xh, xh, #0x7f000000 orr xh, xh, #0x00f00000 mov xl, #0 RETLDM "r4, r5, r6" @ Return a quiet NAN. LSYM(Lml_n): orr xh, xh, #0x7f000000 orr xh, xh, #0x00f80000 RETLDM "r4, r5, r6" FUNC_END aeabi_dmul FUNC_END muldf3 ARM_FUNC_START divdf3 ARM_FUNC_ALIAS aeabi_ddiv divdf3 do_push {r4, r5, r6, lr} @ Mask out exponents, trap any zero/denormal/INF/NAN. mov ip, #0xff orr ip, ip, #0x700 ands r4, ip, xh, lsr #20 do_it ne, tte COND(and,s,ne) r5, ip, yh, lsr #20 teqne r4, ip teqne r5, ip bleq LSYM(Ldv_s) @ Substract divisor exponent from dividend''s. sub r4, r4, r5 @ Preserve final sign into lr. eor lr, xh, yh @ Convert mantissa to unsigned integer. @ Dividend -> r5-r6, divisor -> yh-yl. orrs r5, yl, yh, lsl #12 mov xh, xh, lsl #12 beq LSYM(Ldv_1) mov yh, yh, lsl #12 mov r5, #0x10000000 orr yh, r5, yh, lsr #4 orr yh, yh, yl, lsr #24 mov yl, yl, lsl #8 orr r5, r5, xh, lsr #4 orr r5, r5, xl, lsr #24 mov r6, xl, lsl #8 @ Initialize xh with final sign bit. and xh, lr, #0x80000000 @ Ensure result will land to known bit position. @ Apply exponent bias accordingly. cmp r5, yh do_it eq cmpeq r6, yl adc r4, r4, #(255 - 2) add r4, r4, #0x300 bcs 1f movs yh, yh, lsr #1 mov yl, yl, rrx 1: @ Perform first substraction to align result to a nibble. subs r6, r6, yl sbc r5, r5, yh movs yh, yh, lsr #1 mov yl, yl, rrx mov xl, #0x00100000 mov ip, #0x00080000 @ The actual division loop. 1: subs lr, r6, yl sbcs lr, r5, yh do_it cs, tt subcs r6, r6, yl movcs r5, lr orrcs xl, xl, ip movs yh, yh, lsr #1 mov yl, yl, rrx subs lr, r6, yl sbcs lr, r5, yh do_it cs, tt subcs r6, r6, yl movcs r5, lr orrcs xl, xl, ip, lsr #1 movs yh, yh, lsr #1 mov yl, yl, rrx subs lr, r6, yl sbcs lr, r5, yh do_it cs, tt subcs r6, r6, yl movcs r5, lr orrcs xl, xl, ip, lsr #2 movs yh, yh, lsr #1 mov yl, yl, rrx subs lr, r6, yl sbcs lr, r5, yh do_it cs, tt subcs r6, r6, yl movcs r5, lr orrcs xl, xl, ip, lsr #3 orrs lr, r5, r6 beq 2f mov r5, r5, lsl #4 orr r5, r5, r6, lsr #28 mov r6, r6, lsl #4 mov yh, yh, lsl #3 orr yh, yh, yl, lsr #29 mov yl, yl, lsl #3 movs ip, ip, lsr #4 bne 1b @ We are done with a word of the result. @ Loop again for the low word if this pass was for the high word. tst xh, #0x00100000 bne 3f orr xh, xh, xl mov xl, #0 mov ip, #0x80000000 b 1b 2: @ Be sure result starts in the high word. tst xh, #0x00100000 do_it eq, t orreq xh, xh, xl moveq xl, #0 3: @ Check exponent range for under/overflow. subs ip, r4, #(254 - 1) do_it hi cmphi ip, #0x700 bhi LSYM(Lml_u) @ Round the result, merge final exponent. subs ip, r5, yh do_it eq, t COND(sub,s,eq) ip, r6, yl COND(mov,s,eq) ip, xl, lsr #1 adcs xl, xl, #0 adc xh, xh, r4, lsl #20 RETLDM "r4, r5, r6" @ Division by 0x1p*: shortcut a lot of code. LSYM(Ldv_1): and lr, lr, #0x80000000 orr xh, lr, xh, lsr #12 adds r4, r4, ip, lsr #1 do_it gt, tt COND(rsb,s,gt) r5, r4, ip orrgt xh, xh, r4, lsl #20 RETLDM "r4, r5, r6" gt orr xh, xh, #0x00100000 mov lr, #0 subs r4, r4, #1 b LSYM(Lml_u) @ Result mightt need to be denormalized: put remainder bits @ in lr for rounding considerations. LSYM(Ldv_u): orr lr, r5, r6 b LSYM(Lml_u) @ One or both arguments is either INF, NAN or zero. LSYM(Ldv_s): and r5, ip, yh, lsr #20 teq r4, ip do_it eq teqeq r5, ip beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN teq r4, ip bne 1f orrs r4, xl, xh, lsl #12 bne LSYM(Lml_n) @ NAN / -> NAN teq r5, ip bne LSYM(Lml_i) @ INF / -> INF mov xl, yl mov xh, yh b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN 1: teq r5, ip bne 2f orrs r5, yl, yh, lsl #12 beq LSYM(Lml_z) @ / INF -> 0 mov xl, yl mov xh, yh b LSYM(Lml_n) @ / NAN -> NAN 2: @ If both are nonzero, we need to normalize and resume above. orrs r6, xl, xh, lsl #1 do_it ne COND(orr,s,ne) r6, yl, yh, lsl #1 bne LSYM(Lml_d) @ One or both arguments are 0. orrs r4, xl, xh, lsl #1 bne LSYM(Lml_i) @ / 0 -> INF orrs r5, yl, yh, lsl #1 bne LSYM(Lml_z) @ 0 / -> 0 b LSYM(Lml_n) @ 0 / 0 -> NAN FUNC_END aeabi_ddiv FUNC_END divdf3 #endif /* L_muldivdf3 */ #ifdef L_arm_cmpdf2 @ Note: only r0 (return value) and ip are clobbered here. ARM_FUNC_START gtdf2 ARM_FUNC_ALIAS gedf2 gtdf2 mov ip, #-1 b 1f ARM_FUNC_START ltdf2 ARM_FUNC_ALIAS ledf2 ltdf2 mov ip, #1 b 1f ARM_FUNC_START cmpdf2 ARM_FUNC_ALIAS nedf2 cmpdf2 ARM_FUNC_ALIAS eqdf2 cmpdf2 mov ip, #1 @ how should we specify unordered here? 1: str ip, [sp, #-4]! @ Trap any INF/NAN first. mov ip, xh, lsl #1 mvns ip, ip, asr #21 mov ip, yh, lsl #1 do_it ne COND(mvn,s,ne) ip, ip, asr #21 beq 3f @ Test for equality. @ Note that 0.0 is equal to -0.0. 2: add sp, sp, #4 orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0 do_it eq, e COND(orr,s,eq) ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0 teqne xh, yh @ or xh == yh do_it eq, tt teqeq xl, yl @ and xl == yl moveq r0, #0 @ then equal. RETc(eq) @ Clear C flag cmn r0, #0 @ Compare sign, teq xh, yh @ Compare values if same sign do_it pl cmppl xh, yh do_it eq cmpeq xl, yl @ Result: do_it cs, e movcs r0, yh, asr #31 mvncc r0, yh, asr #31 orr r0, r0, #1 RET @ Look for a NAN. 3: mov ip, xh, lsl #1 mvns ip, ip, asr #21 bne 4f orrs ip, xl, xh, lsl #12 bne 5f @ x is NAN 4: mov ip, yh, lsl #1 mvns ip, ip, asr #21 bne 2b orrs ip, yl, yh, lsl #12 beq 2b @ y is not NAN 5: ldr r0, [sp], #4 @ unordered return code RET FUNC_END gedf2 FUNC_END gtdf2 FUNC_END ledf2 FUNC_END ltdf2 FUNC_END nedf2 FUNC_END eqdf2 FUNC_END cmpdf2 ARM_FUNC_START aeabi_cdrcmple mov ip, r0 mov r0, r2 mov r2, ip mov ip, r1 mov r1, r3 mov r3, ip b 6f ARM_FUNC_START aeabi_cdcmpeq ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq @ The status-returning routines are required to preserve all @ registers except ip, lr, and cpsr. 6: do_push {r0, lr} ARM_CALL cmpdf2 @ Set the Z flag correctly, and the C flag unconditionally. cmp r0, #0 @ Clear the C flag if the return value was -1, indicating @ that the first operand was smaller than the second. do_it mi cmnmi r0, #0 RETLDM "r0" FUNC_END aeabi_cdcmple FUNC_END aeabi_cdcmpeq FUNC_END aeabi_cdrcmple ARM_FUNC_START aeabi_dcmpeq str lr, [sp, #-8]! ARM_CALL aeabi_cdcmple do_it eq, e moveq r0, #1 @ Equal to. movne r0, #0 @ Less than, greater than, or unordered. RETLDM FUNC_END aeabi_dcmpeq ARM_FUNC_START aeabi_dcmplt str lr, [sp, #-8]! ARM_CALL aeabi_cdcmple do_it cc, e movcc r0, #1 @ Less than. movcs r0, #0 @ Equal to, greater than, or unordered. RETLDM FUNC_END aeabi_dcmplt ARM_FUNC_START aeabi_dcmple str lr, [sp, #-8]! ARM_CALL aeabi_cdcmple do_it ls, e movls r0, #1 @ Less than or equal to. movhi r0, #0 @ Greater than or unordered. RETLDM FUNC_END aeabi_dcmple ARM_FUNC_START aeabi_dcmpge str lr, [sp, #-8]! ARM_CALL aeabi_cdrcmple do_it ls, e movls r0, #1 @ Operand 2 is less than or equal to operand 1. movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. RETLDM FUNC_END aeabi_dcmpge ARM_FUNC_START aeabi_dcmpgt str lr, [sp, #-8]! ARM_CALL aeabi_cdrcmple do_it cc, e movcc r0, #1 @ Operand 2 is less than operand 1. movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, @ or they are unordered. RETLDM FUNC_END aeabi_dcmpgt #endif /* L_cmpdf2 */ #ifdef L_arm_unorddf2 ARM_FUNC_START unorddf2 ARM_FUNC_ALIAS aeabi_dcmpun unorddf2 mov ip, xh, lsl #1 mvns ip, ip, asr #21 bne 1f orrs ip, xl, xh, lsl #12 bne 3f @ x is NAN 1: mov ip, yh, lsl #1 mvns ip, ip, asr #21 bne 2f orrs ip, yl, yh, lsl #12 bne 3f @ y is NAN 2: mov r0, #0 @ arguments are ordered. RET 3: mov r0, #1 @ arguments are unordered. RET FUNC_END aeabi_dcmpun FUNC_END unorddf2 #endif /* L_unorddf2 */ #ifdef L_arm_fixdfsi ARM_FUNC_START fixdfsi ARM_FUNC_ALIAS aeabi_d2iz fixdfsi @ check exponent range. mov r2, xh, lsl #1 adds r2, r2, #(1 << 21) bcs 2f @ value is INF or NAN bpl 1f @ value is too small mov r3, #(0xfffffc00 + 31) subs r2, r3, r2, asr #21 bls 3f @ value is too large @ scale value mov r3, xh, lsl #11 orr r3, r3, #0x80000000 orr r3, r3, xl, lsr #21 tst xh, #0x80000000 @ the sign bit shift1 lsr, r0, r3, r2 do_it ne rsbne r0, r0, #0 RET 1: mov r0, #0 RET 2: orrs xl, xl, xh, lsl #12 bne 4f @ x is NAN. 3: ands r0, xh, #0x80000000 @ the sign bit do_it eq moveq r0, #0x7fffffff @ maximum signed positive si RET 4: mov r0, #0 @ How should we convert NAN? RET FUNC_END aeabi_d2iz FUNC_END fixdfsi #endif /* L_fixdfsi */ #ifdef L_arm_fixunsdfsi ARM_FUNC_START fixunsdfsi ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi @ check exponent range. movs r2, xh, lsl #1 bcs 1f @ value is negative adds r2, r2, #(1 << 21) bcs 2f @ value is INF or NAN bpl 1f @ value is too small mov r3, #(0xfffffc00 + 31) subs r2, r3, r2, asr #21 bmi 3f @ value is too large @ scale value mov r3, xh, lsl #11 orr r3, r3, #0x80000000 orr r3, r3, xl, lsr #21 shift1 lsr, r0, r3, r2 RET 1: mov r0, #0 RET 2: orrs xl, xl, xh, lsl #12 bne 4f @ value is NAN. 3: mov r0, #0xffffffff @ maximum unsigned si RET 4: mov r0, #0 @ How should we convert NAN? RET FUNC_END aeabi_d2uiz FUNC_END fixunsdfsi #endif /* L_fixunsdfsi */ #ifdef L_arm_truncdfsf2 ARM_FUNC_START truncdfsf2 ARM_FUNC_ALIAS aeabi_d2f truncdfsf2 @ check exponent range. mov r2, xh, lsl #1 subs r3, r2, #((1023 - 127) << 21) do_it cs, t COND(sub,s,cs) ip, r3, #(1 << 21) COND(rsb,s,cs) ip, ip, #(254 << 21) bls 2f @ value is out of range 1: @ shift and round mantissa and ip, xh, #0x80000000 mov r2, xl, lsl #3 orr xl, ip, xl, lsr #29 cmp r2, #0x80000000 adc r0, xl, r3, lsl #2 do_it eq biceq r0, r0, #1 RET 2: @ either overflow or underflow tst xh, #0x40000000 bne 3f @ overflow @ check if denormalized value is possible adds r2, r3, #(23 << 21) do_it lt, t andlt r0, xh, #0x80000000 @ too small, return signed 0. RETc(lt) @ denormalize value so we can resume with the code above afterwards. orr xh, xh, #0x00100000 mov r2, r2, lsr #21 rsb r2, r2, #24 rsb ip, r2, #32 #if defined(__thumb2__) lsls r3, xl, ip #else movs r3, xl, lsl ip #endif shift1 lsr, xl, xl, r2 do_it ne orrne xl, xl, #1 @ fold r3 for rounding considerations. mov r3, xh, lsl #11 mov r3, r3, lsr #11 shiftop orr xl xl r3 lsl ip ip shift1 lsr, r3, r3, r2 mov r3, r3, lsl #1 b 1b 3: @ chech for NAN mvns r3, r2, asr #21 bne 5f @ simple overflow orrs r3, xl, xh, lsl #12 do_it ne, tt movne r0, #0x7f000000 orrne r0, r0, #0x00c00000 RETc(ne) @ return NAN 5: @ return INF with sign and r0, xh, #0x80000000 orr r0, r0, #0x7f000000 orr r0, r0, #0x00800000 RET FUNC_END aeabi_d2f FUNC_END truncdfsf2 #endif /* L_truncdfsf2 */