ARM_FUNC_START negdf2
ARM_FUNC_ALIAS aeabi_dneg negdf2
+
@ flip sign bit
eor xh, xh, #0x80000000
RET
eor xh, xh, #0x80000000 @ flip sign bit of first arg
b 1f
- ARM_FUNC_START subdf3
+ARM_FUNC_START subdf3
ARM_FUNC_ALIAS aeabi_dsub subdf3
- @ flip sign bit of second arg
- eor yh, yh, #0x80000000
+
+ eor yh, yh, #0x80000000 @ flip sign bit of second arg
#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
b 1f @ Skip Thumb-code prologue
#endif
ARM_FUNC_START adddf3
ARM_FUNC_ALIAS aeabi_dadd adddf3
-1: @ Compare both args, return zero if equal but the sign.
- teq xl, yl
- eoreq ip, xh, yh
- teqeq ip, #0x80000000
- beq LSYM(Lad_z)
-
- @ If first arg is 0 or -0, return second arg.
- @ If second arg is 0 or -0, return first arg.
- orrs ip, xl, xh, lsl #1
- moveq xl, yl
- moveq xh, yh
- orrnes ip, yl, yh, lsl #1
- RETc(eq)
-
- stmfd sp!, {r4, r5, lr}
-
- @ Mask out exponents.
- mov ip, #0x7f000000
- orr ip, ip, #0x00f00000
- and r4, xh, ip
- and r5, yh, ip
+1: stmfd sp!, {r4, r5, lr}
- @ If either of them is 0x7ff, result will be INF or NAN
- teq r4, ip
- teqne r5, ip
- beq LSYM(Lad_i)
+ @ Look for zeroes, equal values, INF, or NAN.
+ mov r4, xh, lsl #1
+ mov r5, yh, lsl #1
+ teq r4, r5
+ teqeq xl, yl
+ orrnes ip, r4, xl
+ orrnes ip, r5, yl
+ mvnnes ip, r4, asr #21
+ mvnnes ip, r5, asr #21
+ beq LSYM(Lad_s)
@ Compute exponent difference. Make largest exponent in r4,
@ corresponding arg in xh-xl, and positive exponent difference in r5.
- subs r5, r5, r4
+ mov r4, r4, lsr #21
+ rsbs r5, r4, r5, lsr #21
rsblt r5, r5, #0
ble 1f
add r4, r4, r5
eor yl, xl, yl
eor yh, xh, yh
1:
-
@ If exponent difference is too large, return largest argument
@ already in xh-xl. We need up to 54 bit to handle proper rounding
@ of 0x1p54 - 1.1.
- cmp r5, #(54 << 20)
+ cmp r5, #54
RETLDM "r4, r5" hi
@ Convert mantissa to signed integer.
tst xh, #0x80000000
- bic xh, xh, ip, lsl #1
- orr xh, xh, #0x00100000
+ mov xh, xh, lsl #12
+ mov ip, #0x00100000
+ orr xh, ip, xh, lsr #12
beq 1f
rsbs xl, xl, #0
rsc xh, xh, #0
1:
tst yh, #0x80000000
- bic yh, yh, ip, lsl #1
- orr yh, yh, #0x00100000
+ mov yh, yh, lsl #12
+ orr yh, ip, yh, lsr #12
beq 1f
rsbs yl, yl, #0
rsc yh, yh, #0
teq r4, r5
beq LSYM(Lad_d)
LSYM(Lad_x):
- @ Scale down second arg with exponent difference.
- @ Apply shift one bit left to first arg and the rest to second arg
- @ to simplify things later, but only if exponent does not become 0.
- mov ip, #0
- movs r5, r5, lsr #20
- beq 3f
- teq r4, #(1 << 20)
- beq 1f
- movs xl, xl, lsl #1
- adc xh, ip, xh, lsl #1
- sub r4, r4, #(1 << 20)
- subs r5, r5, #1
- beq 3f
- @ Shift yh-yl right per r5, keep leftover bits into ip.
-1: rsbs lr, r5, #32
- blt 2f
+ @ Compensate for the exponent overlapping the mantissa MSB added later
+ sub r4, r4, #1
+
+ @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
+ rsbs lr, r5, #32
+ blt 1f
mov ip, yl, lsl lr
- mov yl, yl, lsr r5
- orr yl, yl, yh, lsl lr
- mov yh, yh, asr r5
- b 3f
-2: sub r5, r5, #32
+ adds xl, xl, yl, lsr r5
+ adc xh, xh, #0
+ adds xl, xl, yh, lsl lr
+ adcs xh, xh, yh, asr r5
+ b 2f
+1: sub r5, r5, #32
add lr, lr, #32
cmp yl, #1
- adc ip, ip, yh, lsl lr
- mov yl, yh, asr r5
- mov yh, yh, asr #32
-3:
- @ the actual addition
- adds xl, xl, yl
- adc xh, xh, yh
-
+ mov ip, yh, lsl lr
+ orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later
+ adds xl, xl, yh, asr r5
+ adcs xh, xh, yh, asr #31
+2:
@ We now have a result in xh-xl-ip.
- @ Keep absolute value in xh-xl-ip, sign in r5.
- ands r5, xh, #0x80000000
+ @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
+ and r5, xh, #0x80000000
bpl LSYM(Lad_p)
rsbs ip, ip, #0
rscs xl, xl, #0
@ Determine how to normalize the result.
LSYM(Lad_p):
cmp xh, #0x00100000
- bcc LSYM(Lad_l)
+ bcc LSYM(Lad_a)
cmp xh, #0x00200000
- bcc LSYM(Lad_r0)
- cmp xh, #0x00400000
- bcc LSYM(Lad_r1)
+ bcc LSYM(Lad_e)
@ Result needs to be shifted right.
movs xh, xh, lsr #1
movs xl, xl, rrx
- movs ip, ip, rrx
- orrcs ip, ip, #1
- add r4, r4, #(1 << 20)
-LSYM(Lad_r1):
- movs xh, xh, lsr #1
- movs xl, xl, rrx
- movs ip, ip, rrx
- orrcs ip, ip, #1
- add r4, r4, #(1 << 20)
+ mov ip, ip, rrx
+ add r4, r4, #1
+
+ @ Make sure we did not bust our exponent.
+ mov r2, r4, lsl #21
+ cmn r2, #(2 << 21)
+ bcs LSYM(Lad_o)
@ Our result is now properly aligned into xh-xl, remaining bits in ip.
@ Round with MSB of ip. If halfway between two numbers, round towards
@ LSB of xl = 0.
-LSYM(Lad_r0):
- adds xl, xl, ip, lsr #31
- adc xh, xh, #0
- teq ip, #0x80000000
- biceq xl, xl, #1
-
- @ One extreme rounding case may add a new MSB. Adjust exponent.
- @ That MSB will be cleared when exponent is merged below.
- tst xh, #0x00200000
- addne r4, r4, #(1 << 20)
-
- @ Make sure we did not bust our exponent.
- adds ip, r4, #(1 << 20)
- bmi LSYM(Lad_o)
-
@ Pack final result together.
LSYM(Lad_e):
- bic xh, xh, #0x00300000
- orr xh, xh, r4
+ cmp ip, #0x80000000
+ moveqs ip, xl, lsr #1
+ adcs xl, xl, #0
+ adc xh, xh, r4, lsl #20
orr xh, xh, r5
RETLDM "r4, r5"
-LSYM(Lad_l):
@ Result must be shifted left and exponent adjusted.
- @ No rounding necessary since ip will always be 0.
+LSYM(Lad_a):
+ movs ip, ip, lsl #1
+ adcs xl, xl, xl
+ adc xh, xh, xh
+ tst xh, #0x00100000
+ sub r4, r4, #1
+ bne LSYM(Lad_e)
+
+ @ No rounding necessary since ip will always be 0 at this point.
+LSYM(Lad_l):
+
#if __ARM_ARCH__ < 5
teq xh, #0
- movne r3, #-11
- moveq r3, #21
+ movne r3, #20
+ moveq r3, #52
moveq xh, xl
moveq xl, #0
mov r2, xh
- movs ip, xh, lsr #16
- moveq r2, r2, lsl #16
- addeq r3, r3, #16
- tst r2, #0xff000000
- moveq r2, r2, lsl #8
- addeq r3, r3, #8
- tst r2, #0xf0000000
- moveq r2, r2, lsl #4
- addeq r3, r3, #4
- tst r2, #0xc0000000
- moveq r2, r2, lsl #2
- addeq r3, r3, #2
- tst r2, #0x80000000
- addeq r3, r3, #1
+ cmp r2, #(1 << 16)
+ movhs r2, r2, lsr #16
+ subhs r3, r3, #16
+ cmp r2, #(1 << 8)
+ movhs r2, r2, lsr #8
+ subhs r3, r3, #8
+ cmp r2, #(1 << 4)
+ movhs r2, r2, lsr #4
+ subhs r3, r3, #4
+ cmp r2, #(1 << 2)
+ subhs r3, r3, #2
+ sublo r3, r3, r2, lsr #1
+ sub r3, r3, r2, lsr #3
#else
movle xl, xl, lsl r2
@ adjust exponent accordingly.
-3: subs r4, r4, r3, lsl #20
- bgt LSYM(Lad_e)
+3: subs r4, r4, r3
+ addge xh, xh, r4, lsl #20
+ orrge xh, xh, r5
+ RETLDM "r4, r5" ge
@ Exponent too small, denormalize result.
@ Find out proper shift value.
- mvn r4, r4, asr #20
- subs r4, r4, #30
+ mvn r4, r4
+ subs r4, r4, #31
bge 2f
adds r4, r4, #12
bgt 1f
RETLDM "r4, r5"
@ Adjust exponents for denormalized arguments.
+ @ Note that r4 must not remain equal to 0.
LSYM(Lad_d):
teq r4, #0
- eoreq xh, xh, #0x00100000
- addeq r4, r4, #(1 << 20)
eor yh, yh, #0x00100000
- subne r5, r5, #(1 << 20)
+ eoreq xh, xh, #0x00100000
+ addeq r4, r4, #1
+ subne r5, r5, #1
b LSYM(Lad_x)
- @ Result is x - x = 0, unless x = INF or NAN.
-LSYM(Lad_z):
- sub ip, ip, #0x00100000 @ ip becomes 0x7ff00000
- and r2, xh, ip
- teq r2, ip
- orreq xh, ip, #0x00080000
+
+LSYM(Lad_s):
+ mvns ip, r4, asr #21
+ mvnnes ip, r5, asr #21
+ beq LSYM(Lad_i)
+
+ teq r4, r5
+ teqeq xl, yl
+ beq 1f
+
+ @ Result is x + 0.0 = x or 0.0 + y = y.
+ teq r4, #0
+ moveq xh, yh
+ moveq xl, yl
+ RETLDM "r4, r5"
+
+1: teq xh, yh
+
+ @ Result is x - x = 0.
movne xh, #0
- mov xl, #0
- RET
+ movne xl, #0
+ RETLDM "r4, r5" ne
+
+ @ Result is x + x = 2x.
+ movs ip, r4, lsr #21
+ bne 2f
+ movs xl, xl, lsl #1
+ adcs xh, xh, xh
+ orrcs xh, xh, #0x80000000
+ RETLDM "r4, r5"
+2: adds r4, r4, #(2 << 21)
+ addcc xh, xh, #(1 << 20)
+ RETLDM "r4, r5" cc
+ and r5, xh, #0x80000000
@ Overflow: return INF.
LSYM(Lad_o):
@ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
@ if either is NAN: return NAN
@ if opposite sign: return NAN
- @ return xh-xl (which is INF or -INF)
+ @ otherwise return xh-xl (which is INF or -INF)
LSYM(Lad_i):
- teq r4, ip
+ mvns ip, r4, asr #21
movne xh, yh
movne xl, yl
- teqeq r5, ip
- RETLDM "r4, r5" ne
-
+ mvneqs ip, r5, asr #21
+ movne yh, xh
+ movne yl, xl
orrs r4, xl, xh, lsl #12
- orreqs r4, yl, yh, lsl #12
+ orreqs r5, yl, yh, lsl #12
teqeq xh, yh
- orrne xh, r5, #0x00080000
- movne xl, #0
+ orrne xh, xh, #0x00080000 @ quiet NAN
RETLDM "r4, r5"
FUNC_END aeabi_dsub
ARM_FUNC_START floatunsidf
ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
+
teq r0, #0
moveq r1, #0
RETc(eq)
stmfd sp!, {r4, r5, lr}
- mov r4, #(0x400 << 20) @ initial exponent
- add r4, r4, #((52-1) << 20)
+ mov r4, #0x400 @ initial exponent
+ add r4, r4, #(52-1 - 1)
mov r5, #0 @ sign bit is 0
+ .ifnc xl, r0
mov xl, r0
+ .endif
mov xh, #0
b LSYM(Lad_l)
ARM_FUNC_START floatsidf
ARM_FUNC_ALIAS aeabi_i2d floatsidf
+
teq r0, #0
moveq r1, #0
RETc(eq)
stmfd sp!, {r4, r5, lr}
- mov r4, #(0x400 << 20) @ initial exponent
- add r4, r4, #((52-1) << 20)
+ mov r4, #0x400 @ initial exponent
+ add r4, r4, #(52-1 - 1)
ands r5, r0, #0x80000000 @ sign bit in r5
rsbmi r0, r0, #0 @ absolute value
+ .ifnc xl, r0
mov xl, r0
+ .endif
mov xh, #0
b LSYM(Lad_l)
ARM_FUNC_START extendsfdf2
ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
-
- movs r2, r0, lsl #1
- beq 1f @ value is 0.0 or -0.0
+
+ movs r2, r0, lsl #1 @ toss sign bit
mov xh, r2, asr #3 @ stretch exponent
mov xh, xh, rrx @ retrieve sign bit
mov xl, r2, lsl #28 @ retrieve remaining bits
- ands r2, r2, #0xff000000 @ isolate exponent
- beq 2f @ exponent was 0 but not mantissa
- teq r2, #0xff000000 @ check if INF or NAN
+ andnes r3, r2, #0xff000000 @ isolate exponent
+ teqne r3, #0xff000000 @ if not 0, check if INF or NAN
eorne xh, xh, #0x38000000 @ fixup exponent otherwise.
- RET
+ RETc(ne) @ and return it.
-1: mov xh, r0
- mov xl, #0
- RET
+ teq r2, #0 @ if actually 0
+ teqne r3, #0xff000000 @ or INF or NAN
+ RETc(eq) @ we are done already.
-2: @ value was denormalized. We can normalize it now.
+ @ value was denormalized. We can normalize it now.
stmfd sp!, {r4, r5, lr}
- mov r4, #(0x380 << 20) @ setup corresponding exponent
- add r4, r4, #(1 << 20)
+ mov r4, #0x380 @ setup corresponding exponent
and r5, xh, #0x80000000 @ move sign bit in r5
bic xh, xh, #0x80000000
b LSYM(Lad_l)
ARM_FUNC_START floatundidf
ARM_FUNC_ALIAS aeabi_ul2d floatundidf
-
+
orrs r2, r0, r1
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
mvfeqd f0, #0.0
#endif
RETc(eq)
+
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
@ For hard FPA code we want to return via the tail below so that
@ we can return the result in f0 as well as in r0/r1 for backwards
@ compatibility.
- adr ip, 1f
+ adr ip, LSYM(f0_ret)
stmfd sp!, {r4, r5, ip, lr}
#else
stmfd sp!, {r4, r5, lr}
#endif
+
mov r5, #0
b 2f
ARM_FUNC_START floatdidf
ARM_FUNC_ALIAS aeabi_l2d floatdidf
+
orrs r2, r0, r1
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
mvfeqd f0, #0.0
#endif
RETc(eq)
+
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
@ For hard FPA code we want to return via the tail below so that
@ we can return the result in f0 as well as in r0/r1 for backwards
@ compatibility.
- adr ip, 1f
+ adr ip, LSYM(f0_ret)
stmfd sp!, {r4, r5, ip, lr}
#else
stmfd sp!, {r4, r5, lr}
#endif
+
ands r5, ah, #0x80000000 @ sign bit in r5
bpl 2f
rsbs al, al, #0
rsc ah, ah, #0
2:
- mov r4, #(0x400 << 20) @ initial exponent
- add r4, r4, #((52 - 1) << 20)
-#if !defined (__VFP_FP__) && !defined(__ARMEB__)
+ mov r4, #0x400 @ initial exponent
+ add r4, r4, #(52-1 - 1)
+
@ FPA little-endian: must swap the word order.
+ .ifnc xh, ah
mov ip, al
mov xh, ah
mov xl, ip
-#endif
- movs ip, xh, lsr #23
+ .endif
+
+ movs ip, xh, lsr #22
beq LSYM(Lad_p)
- @ The value's too big. Scale it down a bit...
+
+ @ The value is too big. Scale it down a bit...
mov r2, #3
movs ip, ip, lsr #3
addne r2, r2, #3
movs ip, ip, lsr #3
addne r2, r2, #3
+ add r2, r2, ip
+
rsb r3, r2, #32
mov ip, xl, lsl r3
mov xl, xl, lsr r2
orr xl, xl, xh, lsl r3
mov xh, xh, lsr r2
- add r4, r4, r2, lsl #20
+ add r4, r4, r2
b LSYM(Lad_p)
+
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-1:
+
@ Legacy code expects the result to be returned in f0. Copy it
@ there as well.
+LSYM(f0_ret):
stmfd sp!, {r0, r1}
ldfd f0, [sp], #8
RETLDM
+
#endif
+
FUNC_END floatdidf
FUNC_END aeabi_l2d
FUNC_END floatundidf
ARM_FUNC_ALIAS aeabi_dmul muldf3
stmfd sp!, {r4, r5, r6, lr}
- @ Mask out exponents.
- mov ip, #0x7f000000
- orr ip, ip, #0x00f00000
- and r4, xh, ip
- and r5, yh, ip
-
- @ Trap any INF/NAN.
- teq r4, ip
+ @ Mask out exponents, trap any zero/denormal/INF/NAN.
+ mov ip, #0xff
+ orr ip, ip, #0x700
+ ands r4, ip, xh, lsr #20
+ andnes r5, ip, yh, lsr #20
+ teqne r4, ip
teqne r5, ip
- beq LSYM(Lml_s)
+ bleq LSYM(Lml_s)
- @ Trap any multiplication by 0.
- orrs r6, xl, xh, lsl #1
- orrnes r6, yl, yh, lsl #1
- beq LSYM(Lml_z)
-
- @ Shift exponents right one bit to make room for overflow bit.
- @ If either of them is 0, scale denormalized arguments off line.
- @ Then add both exponents together.
- movs r4, r4, lsr #1
- teqne r5, #0
- beq LSYM(Lml_d)
-LSYM(Lml_x):
- add r4, r4, r5, asr #1
-
- @ Preserve final sign in r4 along with exponent for now.
- teq xh, yh
- orrmi r4, r4, #0x8000
+ @ Add exponents together
+ add r4, r4, r5
+
+ @ Determine final sign.
+ eor r6, xh, yh
@ Convert mantissa to unsigned integer.
- bic xh, xh, ip, lsl #1
- bic yh, yh, ip, lsl #1
+ @ If power of two, branch to a separate path.
+ bic xh, xh, ip, lsl #21
+ bic yh, yh, ip, lsl #21
+ orrs r5, xl, xh, lsl #12
+ orrnes r5, yl, yh, lsl #12
orr xh, xh, #0x00100000
orr yh, yh, #0x00100000
+ beq LSYM(Lml_1)
#if __ARM_ARCH__ < 4
+ @ Put sign bit in r6, which will be restored in yl later.
+ and r6, r6, #0x80000000
+
@ Well, no way to make it shorter without the umull instruction.
- @ We must perform that 53 x 53 bit multiplication by hand.
- stmfd sp!, {r7, r8, r9, sl, fp}
+ stmfd sp!, {r6, r7, r8, r9, sl, fp}
mov r7, xl, lsr #16
mov r8, yl, lsr #16
mov r9, xh, lsr #16
mul fp, xh, yh
adcs r5, r5, fp
adc r6, r6, #0
- ldmfd sp!, {r7, r8, r9, sl, fp}
+ ldmfd sp!, {yl, r7, r8, r9, sl, fp}
#else
- @ Here is the actual multiplication: 53 bits * 53 bits -> 106 bits.
+ @ Here is the actual multiplication.
umull ip, lr, xl, yl
mov r5, #0
- umlal lr, r5, xl, yh
umlal lr, r5, xh, yl
+ and yl, r6, #0x80000000
+ umlal lr, r5, xl, yh
mov r6, #0
umlal r5, r6, xh, yh
#endif
@ The LSBs in ip are only significant for the final rounding.
- @ Fold them into one bit of lr.
+ @ Fold them into lr.
teq ip, #0
orrne lr, lr, #1
- @ Put final sign in xh.
- mov xh, r4, lsl #16
- bic r4, r4, #0x8000
-
- @ Adjust result if one extra MSB appeared (one of four times).
- tst r6, #(1 << 9)
- beq 1f
- add r4, r4, #(1 << 19)
- movs r6, r6, lsr #1
- movs r5, r5, rrx
- movs lr, lr, rrx
- orrcs lr, lr, #1
-1:
- @ Scale back to 53 bits.
- @ xh contains sign bit already.
- orr xh, xh, r6, lsl #12
- orr xh, xh, r5, lsr #20
- mov xl, r5, lsl #12
- orr xl, xl, lr, lsr #20
-
- @ Apply exponent bias, check range for underflow.
- sub r4, r4, #0x00f80000
- subs r4, r4, #0x1f000000
- ble LSYM(Lml_u)
-
- @ Round the result.
- movs lr, lr, lsl #12
- bpl 1f
- adds xl, xl, #1
- adc xh, xh, #0
- teq lr, #0x80000000
- biceq xl, xl, #1
-
- @ Rounding may have produced an extra MSB here.
- @ The extra bit is cleared before merging the exponent below.
- tst xh, #0x00200000
- addne r4, r4, #(1 << 19)
+ @ Adjust result upon the MSB position.
+ sub r4, r4, #0xff
+ cmp r6, #(1 << (20-11))
+ sbc r4, r4, #0x300
+ bcs 1f
+ movs lr, lr, lsl #1
+ adcs r5, r5, r5
+ adc r6, r6, r6
1:
- @ Check exponent for overflow.
- adds ip, r4, #(1 << 19)
- tst ip, #(1 << 30)
- bne LSYM(Lml_o)
-
- @ Add final exponent.
- bic xh, xh, #0x00300000
- orr xh, xh, r4, lsl #1
+ @ Shift to final position, add sign to result.
+ orr xh, yl, r6, lsl #11
+ orr xh, xh, r5, lsr #21
+ mov xl, r5, lsl #11
+ orr xl, xl, lr, lsr #21
+ mov lr, lr, lsl #11
+
+ @ Check exponent range for under/overflow.
+ subs ip, r4, #(254 - 1)
+ cmphi ip, #0x700
+ bhi LSYM(Lml_u)
+
+ @ Round the result, merge final exponent.
+ cmp lr, #0x80000000
+ moveqs lr, xl, lsr #1
+ adcs xl, xl, #0
+ adc xh, xh, r4, lsl #20
RETLDM "r4, r5, r6"
- @ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
+ @ Multiplication by 0x1p*: let''s shortcut a lot of code.
+LSYM(Lml_1):
+ and r6, r6, #0x80000000
+ orr xh, r6, xh
+ orr xl, xl, yl
eor xh, xh, yh
-LSYM(Ldv_z):
- bic xh, xh, #0x7fffffff
- mov xl, #0
- RETLDM "r4, r5, r6"
+ subs r4, r4, ip, lsr #1
+ rsbgts r5, r4, ip
+ orrgt xh, xh, r4, lsl #20
+ RETLDM "r4, r5, r6" gt
+
+ @ Under/overflow: fix things up for the code below.
+ orr xh, xh, #0x00100000
+ mov lr, #0
+ subs r4, r4, #1
- @ Check if denormalized result is possible, otherwise return signed 0.
LSYM(Lml_u):
- cmn r4, #(53 << 19)
+ @ Overflow?
+ bgt LSYM(Lml_o)
+
+ @ Check if denormalized result is possible, otherwise return signed 0.
+ cmn r4, #(53 + 1)
movle xl, #0
bicle xh, xh, #0x7fffffff
RETLDM "r4, r5, r6" le
@ Find out proper shift value.
-LSYM(Lml_r):
- mvn r4, r4, asr #19
- subs r4, r4, #30
+ rsb r4, r4, #0
+ subs r4, r4, #32
bge 2f
adds r4, r4, #12
bgt 1f
mov r3, xl, lsl r5
mov xl, xl, lsr r4
orr xl, xl, xh, lsl r5
- movs xh, xh, lsl #1
- mov xh, xh, lsr r4
- mov xh, xh, rrx
+ and r2, xh, #0x80000000
+ bic xh, xh, #0x80000000
adds xl, xl, r3, lsr #31
- adc xh, xh, #0
- teq lr, #0
- teqeq r3, #0x80000000
- biceq xl, xl, #1
+ adc xh, r2, xh, lsr r4
+ orrs lr, lr, r3, lsl #1
+ biceq xl, xl, r3, lsr #31
RETLDM "r4, r5, r6"
@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
bic xh, xh, #0x7fffffff
adds xl, xl, r3, lsr #31
adc xh, xh, #0
- teq lr, #0
- teqeq r3, #0x80000000
- biceq xl, xl, #1
+ orrs lr, lr, r3, lsl #1
+ biceq xl, xl, r3, lsr #31
RETLDM "r4, r5, r6"
@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
@ from xh to xl. Leftover bits are in r3-r6-lr for rounding.
2: rsb r5, r4, #32
- mov r6, xl, lsl r5
+ orr lr, lr, xl, lsl r5
mov r3, xl, lsr r4
orr r3, r3, xh, lsl r5
mov xl, xh, lsr r4
bic xh, xh, #0x7fffffff
bic xl, xl, xh, lsr r4
add xl, xl, r3, lsr #31
- orrs r6, r6, lr
- teqeq r3, #0x80000000
- biceq xl, xl, #1
+ orrs lr, lr, r3, lsl #1
+ biceq xl, xl, r3, lsr #31
RETLDM "r4, r5, r6"
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
LSYM(Lml_d):
- mov lr, #0
teq r4, #0
bne 2f
and r6, xh, #0x80000000
1: movs xl, xl, lsl #1
- adc xh, lr, xh, lsl #1
+ adc xh, xh, xh
tst xh, #0x00100000
- subeq r4, r4, #(1 << 19)
+ subeq r4, r4, #1
beq 1b
orr xh, xh, r6
teq r5, #0
- bne LSYM(Lml_x)
+ movne pc, lr
2: and r6, yh, #0x80000000
3: movs yl, yl, lsl #1
- adc yh, lr, yh, lsl #1
+ adc yh, yh, yh
tst yh, #0x00100000
- subeq r5, r5, #(1 << 20)
+ subeq r5, r5, #1
beq 3b
orr yh, yh, r6
- b LSYM(Lml_x)
+ mov pc, lr
- @ One or both args are INF or NAN.
LSYM(Lml_s):
+ @ Isolate the INF and NAN cases away
+ teq r4, ip
+ and r5, ip, yh, lsr #20
+ teqne r5, ip
+ beq 1f
+
+ @ Here, one or more arguments are either denormalized or zero.
+ orrs r6, xl, xh, lsl #1
+ orrnes r6, yl, yh, lsl #1
+ bne LSYM(Lml_d)
+
+ @ Result is 0, but determine sign anyway.
+LSYM(Lml_z):
+ eor xh, xh, yh
+ bic xh, xh, #0x7fffffff
+ mov xl, #0
+ RETLDM "r4, r5, r6"
+
+1: @ One or both args are INF or NAN.
orrs r6, xl, xh, lsl #1
+ moveq xl, yl
+ moveq xh, yh
orrnes r6, yl, yh, lsl #1
beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
teq r4, ip
1: teq r5, ip
bne LSYM(Lml_i)
orrs r6, yl, yh, lsl #12
+ movne xl, yl
+ movne xh, yh
bne LSYM(Lml_n) @ <anything> * NAN -> NAN
@ Result is INF, but we need to determine its sign.
mov xl, #0
RETLDM "r4, r5, r6"
- @ Return NAN.
+ @ Return a quiet NAN.
LSYM(Lml_n):
- mov xh, #0x7f000000
+ orr xh, xh, #0x7f000000
orr xh, xh, #0x00f80000
RETLDM "r4, r5, r6"
stmfd sp!, {r4, r5, r6, lr}
- @ Mask out exponents.
- mov ip, #0x7f000000
- orr ip, ip, #0x00f00000
- and r4, xh, ip
- and r5, yh, ip
-
- @ Trap any INF/NAN or zeroes.
- teq r4, ip
+ @ Mask out exponents, trap any zero/denormal/INF/NAN.
+ mov ip, #0xff
+ orr ip, ip, #0x700
+ ands r4, ip, xh, lsr #20
+ andnes r5, ip, yh, lsr #20
+ teqne r4, ip
teqne r5, ip
- orrnes r6, xl, xh, lsl #1
- orrnes r6, yl, yh, lsl #1
- beq LSYM(Ldv_s)
+ bleq LSYM(Ldv_s)
- @ Shift exponents right one bit to make room for overflow bit.
- @ If either of them is 0, scale denormalized arguments off line.
- @ Then substract divisor exponent from dividend''s.
- movs r4, r4, lsr #1
- teqne r5, #0
- beq LSYM(Ldv_d)
-LSYM(Ldv_x):
- sub r4, r4, r5, asr #1
+ @ Substract divisor exponent from dividend''s.
+ sub r4, r4, r5
@ Preserve final sign into lr.
eor lr, xh, yh
@ Convert mantissa to unsigned integer.
@ Dividend -> r5-r6, divisor -> yh-yl.
- mov r5, #0x10000000
+ orrs r5, yl, yh, lsl #12
+ mov xh, xh, lsl #12
+ beq LSYM(Ldv_1)
mov yh, yh, lsl #12
+ mov r5, #0x10000000
orr yh, r5, yh, lsr #4
orr yh, yh, yl, lsr #24
- movs yl, yl, lsl #8
- mov xh, xh, lsl #12
- teqeq yh, r5
- beq LSYM(Ldv_1)
+ mov yl, yl, lsl #8
orr r5, r5, xh, lsr #4
orr r5, r5, xl, lsr #24
mov r6, xl, lsl #8
and xh, lr, #0x80000000
@ Ensure result will land to known bit position.
+ @ Apply exponent bias accordingly.
cmp r5, yh
cmpeq r6, yl
+ adc r4, r4, #(255 - 2)
+ add r4, r4, #0x300
bcs 1f
- sub r4, r4, #(1 << 19)
movs yh, yh, lsr #1
mov yl, yl, rrx
1:
- @ Apply exponent bias, check range for over/underflow.
- add r4, r4, #0x1f000000
- add r4, r4, #0x00f80000
- cmn r4, #(53 << 19)
- ble LSYM(Ldv_z)
- cmp r4, ip, lsr #1
- bge LSYM(Lml_o)
-
@ Perform first substraction to align result to a nibble.
subs r6, r6, yl
sbc r5, r5, yh
orreq xh, xh, xl
moveq xl, #0
3:
- @ Check if denormalized result is needed.
- cmp r4, #0
- ble LSYM(Ldv_u)
+ @ Check exponent range for under/overflow.
+ subs ip, r4, #(254 - 1)
+ cmphi ip, #0x700
+ bhi LSYM(Lml_u)
- @ Apply proper rounding.
+ @ Round the result, merge final exponent.
subs ip, r5, yh
subeqs ip, r6, yl
+ moveqs ip, xl, lsr #1
adcs xl, xl, #0
- adc xh, xh, #0
- teq ip, #0
- biceq xl, xl, #1
-
- @ Add exponent to result.
- bic xh, xh, #0x00100000
- orr xh, xh, r4, lsl #1
+ adc xh, xh, r4, lsl #20
RETLDM "r4, r5, r6"
@ Division by 0x1p*: shortcut a lot of code.
LSYM(Ldv_1):
and lr, lr, #0x80000000
orr xh, lr, xh, lsr #12
- add r4, r4, #0x1f000000
- add r4, r4, #0x00f80000
- cmp r4, ip, lsr #1
- bge LSYM(Lml_o)
- cmp r4, #0
- orrgt xh, xh, r4, lsl #1
+ adds r4, r4, ip, lsr #1
+ rsbgts r5, r4, ip
+ orrgt xh, xh, r4, lsl #20
RETLDM "r4, r5, r6" gt
- cmn r4, #(53 << 19)
- ble LSYM(Ldv_z)
orr xh, xh, #0x00100000
mov lr, #0
- b LSYM(Lml_r)
+ subs r4, r4, #1
+ b LSYM(Lml_u)
- @ Result must be denormalized: put remainder in lr for
- @ rounding considerations.
+ @ Result mightt need to be denormalized: put remainder bits
+ @ in lr for rounding considerations.
LSYM(Ldv_u):
orr lr, r5, r6
- b LSYM(Lml_r)
-
- @ One or both arguments are denormalized.
- @ Scale them leftwards and preserve sign bit.
-LSYM(Ldv_d):
- mov lr, #0
- teq r4, #0
- bne 2f
- and r6, xh, #0x80000000
-1: movs xl, xl, lsl #1
- adc xh, lr, xh, lsl #1
- tst xh, #0x00100000
- subeq r4, r4, #(1 << 19)
- beq 1b
- orr xh, xh, r6
- teq r5, #0
- bne LSYM(Ldv_x)
-2: and r6, yh, #0x80000000
-3: movs yl, yl, lsl #1
- adc yh, lr, yh, lsl #1
- tst yh, #0x00100000
- subeq r5, r5, #(1 << 20)
- beq 3b
- orr yh, yh, r6
- b LSYM(Ldv_x)
+ b LSYM(Lml_u)
@ One or both arguments is either INF, NAN or zero.
LSYM(Ldv_s):
+ and r5, ip, yh, lsr #20
teq r4, ip
teqeq r5, ip
beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
bne 1f
orrs r4, xl, xh, lsl #12
bne LSYM(Lml_n) @ NAN / <anything> -> NAN
- b LSYM(Lml_i) @ INF / <anything> -> INF
+ teq r5, ip
+ bne LSYM(Lml_i) @ INF / <anything> -> INF
+ mov xl, yl
+ mov xh, yh
+ b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
1: teq r5, ip
bne 2f
orrs r5, yl, yh, lsl #12
- bne LSYM(Lml_n) @ <anything> / NAN -> NAN
- b LSYM(Lml_z) @ <anything> / INF -> 0
-2: @ One or both arguments are 0.
+ beq LSYM(Lml_z) @ <anything> / INF -> 0
+ mov xl, yl
+ mov xh, yh
+ b LSYM(Lml_n) @ <anything> / NAN -> NAN
+2: @ If both are non-zero, we need to normalize and resume above.
+ orrs r6, xl, xh, lsl #1
+ orrnes r6, yl, yh, lsl #1
+ bne LSYM(Lml_d)
+ @ One or both arguments are 0.
orrs r4, xl, xh, lsl #1
bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
orrs r5, yl, yh, lsl #1
#ifdef L_cmpdf2
+@ Note: only r0 (return value) and ip are clobbered here.
+
ARM_FUNC_START gtdf2
ARM_FUNC_ALIAS gedf2 gtdf2
mov ip, #-1
ARM_FUNC_ALIAS eqdf2 cmpdf2
mov ip, #1 @ how should we specify unordered here?
-1: stmfd sp!, {r4, r5, lr}
+1: str ip, [sp, #-4]
@ Trap any INF/NAN first.
- mov lr, #0x7f000000
- orr lr, lr, #0x00f00000
- and r4, xh, lr
- and r5, yh, lr
- teq r4, lr
- teqne r5, lr
+ mov ip, xh, lsl #1
+ mvns ip, ip, asr #21
+ mov ip, yh, lsl #1
+ mvnnes ip, ip, asr #21
beq 3f
@ Test for equality.
teqne xh, yh @ or xh == yh
teqeq xl, yl @ and xl == yl
moveq r0, #0 @ then equal.
- RETLDM "r4, r5" eq
+ RETc(eq)
- @ Check for sign difference.
- teq xh, yh
- movmi r0, xh, asr #31
- orrmi r0, r0, #1
- RETLDM "r4, r5" mi
+ @ Clear C flag
+ cmn r0, #0
- @ Compare exponents.
- cmp r4, r5
+ @ Compare sign,
+ teq xh, yh
- @ Compare mantissa if exponents are equal.
- moveq xh, xh, lsl #12
- cmpeq xh, yh, lsl #12
+ @ Compare values if same sign
+ cmppl xh, yh
cmpeq xl, yl
+
+ @ Result:
movcs r0, yh, asr #31
mvncc r0, yh, asr #31
orr r0, r0, #1
- RETLDM "r4, r5"
+ RET
@ Look for a NAN.
-3: teq r4, lr
+3: mov ip, xh, lsl #1
+ mvns ip, ip, asr #21
bne 4f
- orrs xl, xl, xh, lsl #12
+ orrs ip, xl, xh, lsl #12
bne 5f @ x is NAN
-4: teq r5, lr
+4: mov ip, yh, lsl #1
+ mvns ip, ip, asr #21
bne 2b
- orrs yl, yl, yh, lsl #12
+ orrs ip, yl, yh, lsl #12
beq 2b @ y is not NAN
-5: mov r0, ip @ return unordered code from ip
- RETLDM "r4, r5"
+5: ldr r0, [sp, #-4] @ unordered return code
+ RET
FUNC_END gedf2
FUNC_END gtdf2
FUNC_END cmpdf2
ARM_FUNC_START aeabi_cdrcmple
+
mov ip, r0
mov r0, r2
mov r2, ip
ARM_FUNC_START aeabi_cdcmpeq
ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
+
@ The status-returning routines are required to preserve all
@ registers except ip, lr, and cpsr.
-6: stmfd sp!, {r0, r1, r2, r3, lr}
+6: stmfd sp!, {r0, lr}
ARM_CALL cmpdf2
@ Set the Z flag correctly, and the C flag unconditionally.
cmp r0, #0
@ Clear the C flag if the return value was -1, indicating
@ that the first operand was smaller than the second.
cmnmi r0, #0
- RETLDM "r0, r1, r2, r3"
+ RETLDM "r0"
+
FUNC_END aeabi_cdcmple
FUNC_END aeabi_cdcmpeq
+ FUNC_END aeabi_cdrcmple
ARM_FUNC_START aeabi_dcmpeq
+
str lr, [sp, #-4]!
ARM_CALL aeabi_cdcmple
moveq r0, #1 @ Equal to.
movne r0, #0 @ Less than, greater than, or unordered.
RETLDM
+
FUNC_END aeabi_dcmpeq
ARM_FUNC_START aeabi_dcmplt
+
str lr, [sp, #-4]!
ARM_CALL aeabi_cdcmple
movcc r0, #1 @ Less than.
movcs r0, #0 @ Equal to, greater than, or unordered.
RETLDM
+
FUNC_END aeabi_dcmplt
ARM_FUNC_START aeabi_dcmple
+
str lr, [sp, #-4]!
ARM_CALL aeabi_cdcmple
movls r0, #1 @ Less than or equal to.
movhi r0, #0 @ Greater than or unordered.
RETLDM
+
FUNC_END aeabi_dcmple
ARM_FUNC_START aeabi_dcmpge
+
str lr, [sp, #-4]!
ARM_CALL aeabi_cdrcmple
movls r0, #1 @ Operand 2 is less than or equal to operand 1.
movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
RETLDM
+
FUNC_END aeabi_dcmpge
ARM_FUNC_START aeabi_dcmpgt
+
str lr, [sp, #-4]!
ARM_CALL aeabi_cdrcmple
movcc r0, #1 @ Operand 2 is less than operand 1.
movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
@ or they are unordered.
RETLDM
+
FUNC_END aeabi_dcmpgt
-
+
#endif /* L_cmpdf2 */
#ifdef L_unorddf2
ARM_FUNC_START unorddf2
ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
-
- str lr, [sp, #-4]!
- mov ip, #0x7f000000
- orr ip, ip, #0x00f00000
- and lr, xh, ip
- teq lr, ip
+
+ mov ip, xh, lsl #1
+ mvns ip, ip, asr #21
bne 1f
- orrs xl, xl, xh, lsl #12
+ orrs ip, xl, xh, lsl #12
bne 3f @ x is NAN
-1: and lr, yh, ip
- teq lr, ip
+1: mov ip, yh, lsl #1
+ mvns ip, ip, asr #21
bne 2f
- orrs yl, yl, yh, lsl #12
+ orrs ip, yl, yh, lsl #12
bne 3f @ y is NAN
2: mov r0, #0 @ arguments are ordered.
- RETLDM
+ RET
3: mov r0, #1 @ arguments are unordered.
- RETLDM
+ RET
FUNC_END aeabi_dcmpun
FUNC_END unorddf2
ARM_FUNC_START fixdfsi
ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
- orrs ip, xl, xh, lsl #1
- beq 1f @ value is 0.
-
- mov r3, r3, rrx @ preserve C flag (the actual sign)
@ check exponent range.
- mov ip, #0x7f000000
- orr ip, ip, #0x00f00000
- and r2, xh, ip
- teq r2, ip
- beq 2f @ value is INF or NAN
- bic ip, ip, #0x40000000
- cmp r2, ip
- bcc 1f @ value is too small
- add ip, ip, #(31 << 20)
- cmp r2, ip
- bcs 3f @ value is too large
-
- rsb r2, r2, ip
- mov ip, xh, lsl #11
- orr ip, ip, #0x80000000
- orr ip, ip, xl, lsr #21
- mov r2, r2, lsr #20
- tst r3, #0x80000000 @ the sign bit
- mov r0, ip, lsr r2
+ mov r2, xh, lsl #1
+ adds r2, r2, #(1 << 21)
+ bcs 2f @ value is INF or NAN
+ bpl 1f @ value is too small
+ mov r3, #(0xfffffc00 + 31)
+ subs r2, r3, r2, asr #21
+ bls 3f @ value is too large
+
+ @ scale value
+ mov r3, xh, lsl #11
+ orr r3, r3, #0x80000000
+ orr r3, r3, xl, lsr #21
+ tst xh, #0x80000000 @ the sign bit
+ mov r0, r3, lsr r2
rsbne r0, r0, #0
RET
RET
2: orrs xl, xl, xh, lsl #12
- bne 4f @ r0 is NAN.
-3: ands r0, r3, #0x80000000 @ the sign bit
+ bne 4f @ x is NAN.
+3: ands r0, xh, #0x80000000 @ the sign bit
moveq r0, #0x7fffffff @ maximum signed positive si
RET
ARM_FUNC_START fixunsdfsi
ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
- orrs ip, xl, xh, lsl #1
- movcss r0, #0 @ value is negative
- RETc(eq) @ or 0 (xl, xh overlap r0)
@ check exponent range.
- mov ip, #0x7f000000
- orr ip, ip, #0x00f00000
- and r2, xh, ip
- teq r2, ip
- beq 2f @ value is INF or NAN
- bic ip, ip, #0x40000000
- cmp r2, ip
- bcc 1f @ value is too small
- add ip, ip, #(31 << 20)
- cmp r2, ip
- bhi 3f @ value is too large
-
- rsb r2, r2, ip
- mov ip, xh, lsl #11
- orr ip, ip, #0x80000000
- orr ip, ip, xl, lsr #21
- mov r2, r2, lsr #20
- mov r0, ip, lsr r2
+ movs r2, xh, lsl #1
+ bcs 1f @ value is negative
+ adds r2, r2, #(1 << 21)
+ bcs 2f @ value is INF or NAN
+ bpl 1f @ value is too small
+ mov r3, #(0xfffffc00 + 31)
+ subs r2, r3, r2, asr #21
+ bmi 3f @ value is too large
+
+ @ scale value
+ mov r3, xh, lsl #11
+ orr r3, r3, #0x80000000
+ orr r3, r3, xl, lsr #21
+ mov r0, r3, lsr r2
RET
1: mov r0, #0
ARM_FUNC_START truncdfsf2
ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
- orrs r2, xl, xh, lsl #1
- moveq r0, r2, rrx
- RETc(eq) @ value is 0.0 or -0.0
-
+
@ check exponent range.
- mov ip, #0x7f000000
- orr ip, ip, #0x00f00000
- and r2, ip, xh
- teq r2, ip
- beq 2f @ value is INF or NAN
- bic xh, xh, ip
- cmp r2, #(0x380 << 20)
- bls 4f @ value is too small
-
- @ shift and round mantissa
-1: movs r3, xl, lsr #29
- adc r3, r3, xh, lsl #3
-
- @ if halfway between two numbers, round towards LSB = 0.
- mov xl, xl, lsl #3
- teq xl, #0x80000000
- biceq r3, r3, #1
-
- @ rounding might have created an extra MSB. If so adjust exponent.
- tst r3, #0x00800000
- addne r2, r2, #(1 << 20)
- bicne r3, r3, #0x00800000
-
- @ check exponent for overflow
- mov ip, #(0x400 << 20)
- orr ip, ip, #(0x07f << 20)
- cmp r2, ip
- bcs 3f @ overflow
-
- @ adjust exponent, merge with sign bit and mantissa.
- movs xh, xh, lsl #1
- mov r2, r2, lsl #4
- orr r0, r3, r2, rrx
- eor r0, r0, #0x40000000
+ mov r2, xh, lsl #1
+ subs r3, r2, #((1023 - 127) << 21)
+ subcss ip, r3, #(1 << 21)
+ rsbcss ip, ip, #(254 << 21)
+ bls 2f @ value is out of range
+
+1: @ shift and round mantissa
+ and ip, xh, #0x80000000
+ mov r2, xl, lsl #3
+ orr xl, ip, xl, lsr #29
+ cmp r2, #0x80000000
+ adc r0, xl, r3, lsl #2
+ biceq r0, r0, #1
RET
-2: @ chech for NAN
- orrs xl, xl, xh, lsl #12
- movne r0, #0x7f000000
- orrne r0, r0, #0x00c00000
- RETc(ne) @ return NAN
+2: @ either overflow or underflow
+ tst xh, #0x40000000
+ bne 3f @ overflow
-3: @ return INF with sign
- and r0, xh, #0x80000000
- orr r0, r0, #0x7f000000
- orr r0, r0, #0x00800000
- RET
+ @ check if denormalized value is possible
+ adds r2, r3, #(23 << 21)
+ andlt r0, xh, #0x80000000 @ too small, return signed 0.
+ RETc(lt)
-4: @ check if denormalized value is possible
- subs r2, r2, #((0x380 - 24) << 20)
- andle r0, xh, #0x80000000 @ too small, return signed 0.
- RETc(le)
-
@ denormalize value so we can resume with the code above afterwards.
orr xh, xh, #0x00100000
- mov r2, r2, lsr #20
- rsb r2, r2, #25
- cmp r2, #20
- bgt 6f
-
+ mov r2, r2, lsr #21
+ rsb r2, r2, #24
rsb ip, r2, #32
- mov r3, xl, lsl ip
+ movs r3, xl, lsl ip
mov xl, xl, lsr r2
- orr xl, xl, xh, lsl ip
- movs xh, xh, lsl #1
- mov xh, xh, lsr r2
- mov xh, xh, rrx
-5: teq r3, #0 @ fold r3 bits into the LSB
- orrne xl, xl, #1 @ for rounding considerations.
- mov r2, #(0x380 << 20) @ equivalent to the 0 float exponent
+ orrne xl, xl, #1 @ fold r3 for rounding considerations.
+ mov r3, xh, lsl #11
+ mov r3, r3, lsr #11
+ orr xl, xl, r3, lsl ip
+ mov r3, r3, lsr r2
+ mov r3, r3, lsl #1
b 1b
-6: rsb r2, r2, #(12 + 20)
- rsb ip, r2, #32
- mov r3, xl, lsl r2
- mov xl, xl, lsr ip
- orr xl, xl, xh, lsl r2
- and xh, xh, #0x80000000
- b 5b
+3: @ chech for NAN
+ mvns r3, r2, asr #21
+ bne 5f @ simple overflow
+ orrs r3, xl, xh, lsl #12
+ movne r0, #0x7f000000
+ orrne r0, r0, #0x00c00000
+ RETc(ne) @ return NAN
+
+5: @ return INF with sign
+ and r0, xh, #0x80000000
+ orr r0, r0, #0x7f000000
+ orr r0, r0, #0x00800000
+ RET
FUNC_END aeabi_d2f
FUNC_END truncdfsf2
ARM_FUNC_START negsf2
ARM_FUNC_ALIAS aeabi_fneg negsf2
-
+
eor r0, r0, #0x80000000 @ flip sign bit
RET
ARM_FUNC_START aeabi_frsub
eor r0, r0, #0x80000000 @ flip sign bit of first arg
- b 1f
-
+ b 1f
+
ARM_FUNC_START subsf3
ARM_FUNC_ALIAS aeabi_fsub subsf3
-
+
eor r1, r1, #0x80000000 @ flip sign bit of second arg
#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
b 1f @ Skip Thumb-code prologue
ARM_FUNC_START addsf3
ARM_FUNC_ALIAS aeabi_fadd addsf3
-
-1: @ Compare both args, return zero if equal but the sign.
- eor r2, r0, r1
- teq r2, #0x80000000
- beq LSYM(Lad_z)
- @ If first arg is 0 or -0, return second arg.
- @ If second arg is 0 or -0, return first arg.
- bics r2, r0, #0x80000000
- moveq r0, r1
- bicnes r2, r1, #0x80000000
- RETc(eq)
-
- @ Mask out exponents.
- mov ip, #0xff000000
- and r2, r0, ip, lsr #1
- and r3, r1, ip, lsr #1
-
- @ If either of them is 255, result will be INF or NAN
- teq r2, ip, lsr #1
- teqne r3, ip, lsr #1
- beq LSYM(Lad_i)
+1: @ Look for zeroes, equal values, INF, or NAN.
+ movs r2, r0, lsl #1
+ movnes r3, r1, lsl #1
+ teqne r2, r3
+ mvnnes ip, r2, asr #24
+ mvnnes ip, r3, asr #24
+ beq LSYM(Lad_s)
@ Compute exponent difference. Make largest exponent in r2,
@ corresponding arg in r0, and positive exponent difference in r3.
- subs r3, r3, r2
+ mov r2, r2, lsr #24
+ rsbs r3, r2, r3, lsr #24
addgt r2, r2, r3
eorgt r1, r0, r1
eorgt r0, r1, r0
@ If exponent difference is too large, return largest argument
@ already in r0. We need up to 25 bit to handle proper rounding
@ of 0x1p25 - 1.1.
- cmp r3, #(25 << 23)
+ cmp r3, #25
RETc(hi)
@ Convert mantissa to signed integer.
beq LSYM(Lad_d)
LSYM(Lad_x):
- @ Scale down second arg with exponent difference.
- @ Apply shift one bit left to first arg and the rest to second arg
- @ to simplify things later, but only if exponent does not become 0.
- movs r3, r3, lsr #23
- teqne r2, #(1 << 23)
- movne r0, r0, lsl #1
- subne r2, r2, #(1 << 23)
- subne r3, r3, #1
+ @ Compensate for the exponent overlapping the mantissa MSB added later
+ sub r2, r2, #1
- @ Shift second arg into ip, keep leftover bits into r1.
- mov ip, r1, asr r3
+ @ Shift and add second arg to first arg in r0.
+ @ Keep leftover bits into r1.
+ adds r0, r0, r1, asr r3
rsb r3, r3, #32
mov r1, r1, lsl r3
- add r0, r0, ip @ the actual addition
-
- @ We now have a 64 bit result in r0-r1.
- @ Keep absolute value in r0-r1, sign in r3.
- ands r3, r0, #0x80000000
+ @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
+ and r3, r0, #0x80000000
bpl LSYM(Lad_p)
rsbs r1, r1, #0
rsc r0, r0, #0
@ Determine how to normalize the result.
LSYM(Lad_p):
cmp r0, #0x00800000
- bcc LSYM(Lad_l)
+ bcc LSYM(Lad_a)
cmp r0, #0x01000000
- bcc LSYM(Lad_r0)
- cmp r0, #0x02000000
- bcc LSYM(Lad_r1)
+ bcc LSYM(Lad_e)
@ Result needs to be shifted right.
movs r0, r0, lsr #1
mov r1, r1, rrx
- add r2, r2, #(1 << 23)
-LSYM(Lad_r1):
- movs r0, r0, lsr #1
- mov r1, r1, rrx
- add r2, r2, #(1 << 23)
-
- @ Our result is now properly aligned into r0, remaining bits in r1.
- @ Round with MSB of r1. If halfway between two numbers, round towards
- @ LSB of r0 = 0.
-LSYM(Lad_r0):
- add r0, r0, r1, lsr #31
- teq r1, #0x80000000
- biceq r0, r0, #1
-
- @ Rounding may have added a new MSB. Adjust exponent.
- @ That MSB will be cleared when exponent is merged below.
- tst r0, #0x01000000
- addne r2, r2, #(1 << 23)
+ add r2, r2, #1
@ Make sure we did not bust our exponent.
- cmp r2, #(254 << 23)
- bhi LSYM(Lad_o)
+ cmp r2, #254
+ bhs LSYM(Lad_o)
+ @ Our result is now properly aligned into r0, remaining bits in r1.
@ Pack final result together.
+ @ Round with MSB of r1. If halfway between two numbers, round towards
+ @ LSB of r0 = 0.
LSYM(Lad_e):
- bic r0, r0, #0x01800000
- orr r0, r0, r2
+ cmp r1, #0x80000000
+ adc r0, r0, r2, lsl #23
+ biceq r0, r0, #1
orr r0, r0, r3
RET
- @ Result must be shifted left.
- @ No rounding necessary since r1 will always be 0.
+ @ Result must be shifted left and exponent adjusted.
+LSYM(Lad_a):
+ movs r1, r1, lsl #1
+ adc r0, r0, r0
+ tst r0, #0x00800000
+ sub r2, r2, #1
+ bne LSYM(Lad_e)
+
+ @ No rounding necessary since r1 will always be 0 at this point.
LSYM(Lad_l):
#if __ARM_ARCH__ < 5
movs ip, r0, lsr #12
moveq r0, r0, lsl #12
- subeq r2, r2, #(12 << 23)
+ subeq r2, r2, #12
tst r0, #0x00ff0000
moveq r0, r0, lsl #8
- subeq r2, r2, #(8 << 23)
+ subeq r2, r2, #8
tst r0, #0x00f00000
moveq r0, r0, lsl #4
- subeq r2, r2, #(4 << 23)
+ subeq r2, r2, #4
tst r0, #0x00c00000
moveq r0, r0, lsl #2
- subeq r2, r2, #(2 << 23)
- tst r0, #0x00800000
- moveq r0, r0, lsl #1
- subeq r2, r2, #(1 << 23)
- cmp r2, #0
- bgt LSYM(Lad_e)
+ subeq r2, r2, #2
+ cmp r0, #0x00800000
+ movcc r0, r0, lsl #1
+ sbcs r2, r2, #0
#else
clz ip, r0
sub ip, ip, #8
+ subs r2, r2, ip
mov r0, r0, lsl ip
- subs r2, r2, ip, lsl #23
- bgt LSYM(Lad_e)
#endif
- @ Exponent too small, denormalize result.
- mvn r2, r2, asr #23
- add r2, r2, #2
- orr r0, r3, r0, lsr r2
+ @ Final result with sign
+ @ If exponent negative, denormalize result.
+ addge r0, r0, r2, lsl #23
+ rsblt r2, r2, #0
+ orrge r0, r0, r3
+ orrlt r0, r3, r0, lsr r2
RET
@ Fixup and adjust bit position for denormalized arguments.
@ Note that r2 must not remain equal to 0.
LSYM(Lad_d):
teq r2, #0
- eoreq r0, r0, #0x00800000
- addeq r2, r2, #(1 << 23)
eor r1, r1, #0x00800000
- subne r3, r3, #(1 << 23)
+ eoreq r0, r0, #0x00800000
+ addeq r2, r2, #1
+ subne r3, r3, #1
b LSYM(Lad_x)
- @ Result is x - x = 0, unless x is INF or NAN.
-LSYM(Lad_z):
- mov ip, #0xff000000
- and r2, r0, ip, lsr #1
- teq r2, ip, lsr #1
- moveq r0, ip, asr #2
+LSYM(Lad_s):
+ mov r3, r1, lsl #1
+
+ mvns ip, r2, asr #24
+ mvnnes ip, r3, asr #24
+ beq LSYM(Lad_i)
+
+ teq r2, r3
+ beq 1f
+
+ @ Result is x + 0.0 = x or 0.0 + y = y.
+ teq r2, #0
+ moveq r0, r1
+ RET
+
+1: teq r0, r1
+
+ @ Result is x - x = 0.
movne r0, #0
+ RETc(ne)
+
+ @ Result is x + x = 2x.
+ tst r2, #0xff000000
+ bne 2f
+ movs r0, r0, lsl #1
+ orrcs r0, r0, #0x80000000
RET
+2: adds r2, r2, #(2 << 24)
+ addcc r0, r0, #(1 << 23)
+ RETc(cc)
+ and r3, r0, #0x80000000
@ Overflow: return INF.
LSYM(Lad_o):
@ if r1 != INF/NAN: return r0 (which is INF/NAN)
@ if r0 or r1 is NAN: return NAN
@ if opposite sign: return NAN
- @ return r0 (which is INF or -INF)
+ @ otherwise return r0 (which is INF or -INF)
LSYM(Lad_i):
- teq r2, ip, lsr #1
+ mvns r2, r2, asr #24
movne r0, r1
- teqeq r3, ip, lsr #1
- RETc(ne)
+ mvneqs r3, r3, asr #24
+ movne r1, r0
movs r2, r0, lsl #9
- moveqs r2, r1, lsl #9
+ moveqs r3, r1, lsl #9
teqeq r0, r1
- orrne r0, r3, #0x00400000 @ NAN
+ orrne r0, r0, #0x00400000 @ quiet NAN
RET
FUNC_END aeabi_frsub
ands r3, r0, #0x80000000
rsbmi r0, r0, #0
-1: teq r0, #0
+1: movs ip, r0
RETc(eq)
-3:
- mov r1, #0
- mov r2, #((127 + 23) << 23)
- tst r0, #0xfc000000
- beq LSYM(Lad_p)
-
- @ We need to scale the value a little before branching to code above.
- tst r0, #0xf0000000
-4:
- orrne r1, r1, r0, lsl #28
- movne r0, r0, lsr #4
- addne r2, r2, #(4 << 23)
- tst r0, #0x0c000000
- beq LSYM(Lad_p)
- mov r1, r1, lsr #2
- orr r1, r1, r0, lsl #30
- mov r0, r0, lsr #2
- add r2, r2, #(2 << 23)
- b LSYM(Lad_p)
+ @ Add initial exponent to sign
+ orr r3, r3, #((127 + 23) << 23)
+
+ .ifnc ah, r0
+ mov ah, r0
+ .endif
+ mov al, #0
+ b 2f
FUNC_END aeabi_i2f
FUNC_END floatsisf
ARM_FUNC_START floatundisf
ARM_FUNC_ALIAS aeabi_ul2f floatundisf
+
orrs r2, r0, r1
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
mvfeqs f0, #0.0
#endif
RETc(eq)
-
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
- @ For hard FPA code we want to return via the tail below so that
- @ we can return the result in f0 as well as in r0 for backwards
- @ compatibility.
- str lr, [sp, #-4]!
- adr lr, 4f
-#endif
mov r3, #0
- b 2f
+ b 1f
ARM_FUNC_START floatdisf
ARM_FUNC_ALIAS aeabi_l2f floatdisf
mvfeqs f0, #0.0
#endif
RETc(eq)
-
+
+ ands r3, ah, #0x80000000 @ sign bit in r3
+ bpl 1f
+ rsbs al, al, #0
+ rsc ah, ah, #0
+1:
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
@ For hard FPA code we want to return via the tail below so that
@ we can return the result in f0 as well as in r0 for backwards
@ compatibility.
str lr, [sp, #-4]!
- adr lr, 4f
+ adr lr, LSYM(f0_ret)
#endif
- ands r3, ah, #0x80000000 @ sign bit in r3
- bpl 2f
- rsbs al, al, #0
- rsc ah, ah, #0
-2:
+
movs ip, ah
-#ifdef __ARMEB__
- moveq r0, al
-#endif
- beq 3b
- mov r2, #((127 + 23 + 32) << 23) @ initial exponent
-#ifndef __ARMEB__
- mov r1, al
- mov r0, ip
-#endif
- tst r0, #0xfc000000
- bne 3f
+ moveq ip, al
+
+ @ Add initial exponent to sign
+ orr r3, r3, #((127 + 23 + 32) << 23)
+ subeq r3, r3, #(32 << 23)
+2: sub r3, r3, #(1 << 23)
#if __ARM_ARCH__ < 5
- cmp r0, #(1 << 13)
- movlo ip, #13
- movlo r0, r0, lsl #13
- movhs ip, #0
- tst r0, #0x03fc0000
- addeq ip, ip, #8
- moveq r0, r0, lsl #8
- tst r0, #0x03c00000
- addeq ip, ip, #4
- moveq r0, r0, lsl #4
- tst r0, #0x03000000
- addeq ip, ip, #2
- moveq r0, r0, lsl #2
+
+ mov r2, #23
+ cmp ip, #(1 << 16)
+ movhs ip, ip, lsr #16
+ subhs r2, r2, #16
+ cmp ip, #(1 << 8)
+ movhs ip, ip, lsr #8
+ subhs r2, r2, #8
+ cmp ip, #(1 << 4)
+ movhs ip, ip, lsr #4
+ subhs r2, r2, #4
+ cmp ip, #(1 << 2)
+ subhs r2, r2, #2
+ sublo r2, r2, ip, lsr #1
+ subs r2, r2, ip, lsr #3
+
#else
- clz ip, r0
- sub ip, ip, #6
- mov r0, r0, lsl ip
+
+ clz r2, ip
+ subs r2, r2, #8
+
#endif
- sub r2, r2, ip, lsl #23
- rsb ip, ip, #32
- orr r0, r0, r1, lsr ip
- rsb ip, ip, #32
- mov r1, r1, asl ip
- @ At this point we no-longer care about the precise value in r1, only
- @ whether only the top bit is set, or if the top bit and some others
- @ are set.
- and ip, r1, #0xff
- orr r1, r1, ip, lsl #8
- b LSYM(Lad_p)
-3:
- @ We need to scale the value a little before branching to code above.
- @ At this point we no-longer care about the precise value in r1, only
- @ whether only the top bit is set, or if the top bit and some others
- @ are set.
- and ip, r1, #0xff
- orr r1, r1, ip, lsl #8
- tst r0, #0xf0000000
- movne r1, r1, lsr #4
- b 4b
+
+ sub r3, r3, r2, lsl #23
+ blt 3f
+
+ add r3, r3, ah, lsl r2
+ mov ip, al, lsl r2
+ rsb r2, r2, #32
+ cmp ip, #0x80000000
+ adc r0, r3, al, lsr r2
+ biceq r0, r0, #1
+ RET
+
+3: add r2, r2, #32
+ mov ip, ah, lsl r2
+ rsb r2, r2, #32
+ orrs al, al, ip, lsl #1
+ adc r0, r3, ah, lsr r2
+ biceq r0, r0, ip, lsr #31
+ RET
+
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-4:
+
+LSYM(f0_ret)
str r0, [sp, #-4]!
ldfs f0, [sp], #4
RETLDM
+
#endif
+
FUNC_END floatdisf
FUNC_END aeabi_l2f
FUNC_END floatundisf
ARM_FUNC_START mulsf3
ARM_FUNC_ALIAS aeabi_fmul mulsf3
-
- @ Mask out exponents.
- mov ip, #0xff000000
- and r2, r0, ip, lsr #1
- and r3, r1, ip, lsr #1
-
- @ Trap any INF/NAN.
- teq r2, ip, lsr #1
- teqne r3, ip, lsr #1
- beq LSYM(Lml_s)
- @ Trap any multiplication by 0.
- bics ip, r0, #0x80000000
- bicnes ip, r1, #0x80000000
- beq LSYM(Lml_z)
-
- @ Shift exponents right one bit to make room for overflow bit.
- @ If either of them is 0, scale denormalized arguments off line.
- @ Then add both exponents together.
- movs r2, r2, lsr #1
- teqne r3, #0
- beq LSYM(Lml_d)
+ @ Mask out exponents, trap any zero/denormal/INF/NAN.
+ mov ip, #0xff
+ ands r2, ip, r0, lsr #23
+ andnes r3, ip, r1, lsr #23
+ teqne r2, ip
+ teqne r3, ip
+ beq LSYM(Lml_s)
LSYM(Lml_x):
- add r2, r2, r3, asr #1
- @ Preserve final sign in r2 along with exponent for now.
- teq r0, r1
- orrmi r2, r2, #0x8000
+ @ Add exponents together
+ add r2, r2, r3
+
+ @ Determine final sign.
+ eor ip, r0, r1
@ Convert mantissa to unsigned integer.
- bic r0, r0, #0xff000000
- bic r1, r1, #0xff000000
- orr r0, r0, #0x00800000
- orr r1, r1, #0x00800000
+ @ If power of two, branch to a separate path.
+ @ Make up for final alignment.
+ movs r0, r0, lsl #9
+ movnes r1, r1, lsl #9
+ beq LSYM(Lml_1)
+ mov r3, #0x08000000
+ orr r0, r3, r0, lsr #5
+ orr r1, r3, r1, lsr #5
#if __ARM_ARCH__ < 4
+ @ Put sign bit in r3, which will be restored into r0 later.
+ and r3, ip, #0x80000000
+
@ Well, no way to make it shorter without the umull instruction.
- @ We must perform that 24 x 24 -> 48 bit multiplication by hand.
- stmfd sp!, {r4, r5}
+ stmfd sp!, {r3, r4, r5}
mov r4, r0, lsr #16
mov r5, r1, lsr #16
- bic r0, r0, #0x00ff0000
- bic r1, r1, #0x00ff0000
+ bic r0, r0, r4, lsl #16
+ bic r1, r1, r5, lsl #16
mul ip, r4, r5
mul r3, r0, r1
mul r0, r5, r0
mla r0, r4, r1, r0
adds r3, r3, r0, lsl #16
- adc ip, ip, r0, lsr #16
- ldmfd sp!, {r4, r5}
+ adc r1, ip, r0, lsr #16
+ ldmfd sp!, {r0, r4, r5}
#else
- umull r3, ip, r0, r1 @ The actual multiplication.
+ @ The actual multiplication.
+ umull r3, r1, r0, r1
+
+ @ Put final sign in r0.
+ and r0, ip, #0x80000000
#endif
- @ Put final sign in r0.
- mov r0, r2, lsl #16
- bic r2, r2, #0x8000
-
- @ Adjust result if one extra MSB appeared.
- @ The LSB may be lost but this never changes the result in this case.
- tst ip, #(1 << 15)
- addne r2, r2, #(1 << 22)
- movnes ip, ip, lsr #1
- movne r3, r3, rrx
-
- @ Apply exponent bias, check range for underflow.
- subs r2, r2, #(127 << 22)
- ble LSYM(Lml_u)
-
- @ Scale back to 24 bits with rounding.
- @ r0 contains sign bit already.
- orrs r0, r0, r3, lsr #23
- adc r0, r0, ip, lsl #9
-
- @ If halfway between two numbers, rounding should be towards LSB = 0.
- mov r3, r3, lsl #9
- teq r3, #0x80000000
- biceq r0, r0, #1
+ @ Adjust result upon the MSB position.
+ cmp r1, #(1 << 23)
+ movcc r1, r1, lsl #1
+ orrcc r1, r1, r3, lsr #31
+ movcc r3, r3, lsl #1
- @ Note: rounding may have produced an extra MSB here.
- @ The extra bit is cleared before merging the exponent below.
- tst r0, #0x01000000
- addne r2, r2, #(1 << 22)
+ @ Add sign to result.
+ orr r0, r0, r1
- @ Check for exponent overflow
- cmp r2, #(255 << 22)
- bge LSYM(Lml_o)
+ @ Apply exponent bias, check for under/overflow.
+ sbc r2, r2, #127
+ cmp r2, #(254 - 1)
+ bhi LSYM(Lml_u)
- @ Add final exponent.
- bic r0, r0, #0x01800000
- orr r0, r0, r2, lsl #1
+ @ Round the result, merge final exponent.
+ cmp r3, #0x80000000
+ adc r0, r0, r2, lsl #23
+ biceq r0, r0, #1
RET
- @ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
- eor r0, r0, r1
- bic r0, r0, #0x7fffffff
- RET
+ @ Multiplication by 0x1p*: let''s shortcut a lot of code.
+LSYM(Lml_1):
+ teq r0, #0
+ and ip, ip, #0x80000000
+ moveq r1, r1, lsl #9
+ orr r0, ip, r0, lsr #9
+ orr r0, r0, r1, lsr #9
+ subs r2, r2, #127
+ rsbgts r3, r2, #255
+ orrgt r0, r0, r2, lsl #23
+ RETc(gt)
+
+ @ Under/overflow: fix things up for the code below.
+ orr r0, r0, #0x00800000
+ mov r3, #0
+ subs r2, r2, #1
- @ Check if denormalized result is possible, otherwise return signed 0.
LSYM(Lml_u):
- cmn r2, #(24 << 22)
- RETc(le)
+ @ Overflow?
+ bgt LSYM(Lml_o)
- @ Find out proper shift value.
- mvn r1, r2, asr #22
- subs r1, r1, #7
- bgt LSYM(Lml_ur)
-
- @ Shift value left, round, etc.
- add r1, r1, #32
- orrs r0, r0, r3, lsr r1
- rsb r1, r1, #32
- adc r0, r0, ip, lsl r1
- mov ip, r3, lsl r1
- teq ip, #0x80000000
- biceq r0, r0, #1
- RET
+ @ Check if denormalized result is possible, otherwise return signed 0.
+ cmn r2, #(24 + 1)
+ bicle r0, r0, #0x7fffffff
+ RETc(le)
@ Shift value right, round, etc.
- @ Note: r1 must not be 0 otherwise carry does not get set.
-LSYM(Lml_ur):
- orrs r0, r0, ip, lsr r1
+ rsb r2, r2, #0
+ movs r1, r0, lsl #1
+ mov r1, r1, lsr r2
+ rsb r2, r2, #32
+ mov ip, r0, lsl r2
+ movs r0, r1, rrx
adc r0, r0, #0
- rsb r1, r1, #32
- mov ip, ip, lsl r1
- teq r3, #0
- teqeq ip, #0x80000000
- biceq r0, r0, #1
+ orrs r3, r3, ip, lsl #1
+ biceq r0, r0, ip, lsr #31
RET
@ One or both arguments are denormalized.
and ip, r0, #0x80000000
1: moveq r0, r0, lsl #1
tsteq r0, #0x00800000
- subeq r2, r2, #(1 << 22)
+ subeq r2, r2, #1
beq 1b
orr r0, r0, ip
teq r3, #0
and ip, r1, #0x80000000
2: moveq r1, r1, lsl #1
tsteq r1, #0x00800000
- subeq r3, r3, #(1 << 23)
+ subeq r3, r3, #1
beq 2b
orr r1, r1, ip
b LSYM(Lml_x)
- @ One or both args are INF or NAN.
LSYM(Lml_s):
+ @ Isolate the INF and NAN cases away
+ and r3, ip, r1, lsr #23
+ teq r2, ip
+ teqne r3, ip
+ beq 1f
+
+ @ Here, one or more arguments are either denormalized or zero.
+ bics ip, r0, #0x80000000
+ bicnes ip, r1, #0x80000000
+ bne LSYM(Lml_d)
+
+ @ Result is 0, but determine sign anyway.
+LSYM(Lml_z):
+ eor r0, r0, r1
+ bic r0, r0, #0x7fffffff
+ RET
+
+1: @ One or both args are INF or NAN.
teq r0, #0x0
- teqne r1, #0x0
teqne r0, #0x80000000
+ moveq r0, r1
+ teqne r1, #0x0
teqne r1, #0x80000000
beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
- teq r2, ip, lsr #1
+ teq r2, ip
bne 1f
movs r2, r0, lsl #9
bne LSYM(Lml_n) @ NAN * <anything> -> NAN
-1: teq r3, ip, lsr #1
+1: teq r3, ip
bne LSYM(Lml_i)
movs r3, r1, lsl #9
+ movne r0, r1
bne LSYM(Lml_n) @ <anything> * NAN -> NAN
@ Result is INF, but we need to determine its sign.
orr r0, r0, #0x00800000
RET
- @ Return NAN.
+ @ Return a quiet NAN.
LSYM(Lml_n):
- mov r0, #0x7f000000
+ orr r0, r0, #0x7f000000
orr r0, r0, #0x00c00000
RET
ARM_FUNC_START divsf3
ARM_FUNC_ALIAS aeabi_fdiv divsf3
-
- @ Mask out exponents.
- mov ip, #0xff000000
- and r2, r0, ip, lsr #1
- and r3, r1, ip, lsr #1
-
- @ Trap any INF/NAN or zeroes.
- teq r2, ip, lsr #1
- teqne r3, ip, lsr #1
- bicnes ip, r0, #0x80000000
- bicnes ip, r1, #0x80000000
- beq LSYM(Ldv_s)
- @ Shift exponents right one bit to make room for overflow bit.
- @ If either of them is 0, scale denormalized arguments off line.
- @ Then substract divisor exponent from dividend''s.
- movs r2, r2, lsr #1
- teqne r3, #0
- beq LSYM(Ldv_d)
+ @ Mask out exponents, trap any zero/denormal/INF/NAN.
+ mov ip, #0xff
+ ands r2, ip, r0, lsr #23
+ andnes r3, ip, r1, lsr #23
+ teqne r2, ip
+ teqne r3, ip
+ beq LSYM(Ldv_s)
LSYM(Ldv_x):
- sub r2, r2, r3, asr #1
+
+ @ Substract divisor exponent from dividend''s
+ sub r2, r2, r3
@ Preserve final sign into ip.
eor ip, r0, r1
@ Convert mantissa to unsigned integer.
@ Dividend -> r3, divisor -> r1.
- mov r3, #0x10000000
movs r1, r1, lsl #9
mov r0, r0, lsl #9
beq LSYM(Ldv_1)
+ mov r3, #0x10000000
orr r1, r3, r1, lsr #4
orr r3, r3, r0, lsr #4
and r0, ip, #0x80000000
@ Ensure result will land to known bit position.
+ @ Apply exponent bias accordingly.
cmp r3, r1
- subcc r2, r2, #(1 << 22)
movcc r3, r3, lsl #1
-
- @ Apply exponent bias, check range for over/underflow.
- add r2, r2, #(127 << 22)
- cmn r2, #(24 << 22)
- RETc(le)
- cmp r2, #(255 << 22)
- bge LSYM(Lml_o)
+ adc r2, r2, #(127 - 2)
@ The actual division loop.
mov ip, #0x00800000
movnes ip, ip, lsr #4
bne 1b
- @ Check if denormalized result is needed.
- cmp r2, #0
- ble LSYM(Ldv_u)
+ @ Check exponent for under/overflow.
+ cmp r2, #(254 - 1)
+ bhi LSYM(Lml_u)
- @ Apply proper rounding.
+ @ Round the result, merge final exponent.
cmp r3, r1
- addcs r0, r0, #1
+ adc r0, r0, r2, lsl #23
biceq r0, r0, #1
-
- @ Add exponent to result.
- bic r0, r0, #0x00800000
- orr r0, r0, r2, lsl #1
RET
@ Division by 0x1p*: let''s shortcut a lot of code.
LSYM(Ldv_1):
and ip, ip, #0x80000000
orr r0, ip, r0, lsr #9
- add r2, r2, #(127 << 22)
- cmp r2, #(255 << 22)
- bge LSYM(Lml_o)
- cmp r2, #0
- orrgt r0, r0, r2, lsl #1
+ adds r2, r2, #127
+ rsbgts r3, r2, #255
+ orrgt r0, r0, r2, lsl #23
RETc(gt)
- cmn r2, #(24 << 22)
- movle r0, ip
- RETc(le)
+
orr r0, r0, #0x00800000
mov r3, #0
-
- @ Result must be denormalized: prepare parameters to use code above.
- @ r3 already contains remainder for rounding considerations.
-LSYM(Ldv_u):
- bic ip, r0, #0x80000000
- and r0, r0, #0x80000000
- mvn r1, r2, asr #22
- add r1, r1, #2
- b LSYM(Lml_ur)
+ subs r2, r2, #1
+ b LSYM(Lml_u)
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
and ip, r0, #0x80000000
1: moveq r0, r0, lsl #1
tsteq r0, #0x00800000
- subeq r2, r2, #(1 << 22)
+ subeq r2, r2, #1
beq 1b
orr r0, r0, ip
teq r3, #0
and ip, r1, #0x80000000
2: moveq r1, r1, lsl #1
tsteq r1, #0x00800000
- subeq r3, r3, #(1 << 23)
+ subeq r3, r3, #1
beq 2b
orr r1, r1, ip
b LSYM(Ldv_x)
- @ One or both arguments is either INF, NAN or zero.
+ @ One or both arguments are either INF, NAN, zero or denormalized.
LSYM(Ldv_s):
- mov ip, #0xff000000
- teq r2, ip, lsr #1
- teqeq r3, ip, lsr #1
- beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
- teq r2, ip, lsr #1
+ and r3, ip, r1, lsr #23
+ teq r2, ip
bne 1f
movs r2, r0, lsl #9
bne LSYM(Lml_n) @ NAN / <anything> -> NAN
- b LSYM(Lml_i) @ INF / <anything> -> INF
-1: teq r3, ip, lsr #1
+ teq r3, ip
+ bne LSYM(Lml_i) @ INF / <anything> -> INF
+ mov r0, r1
+ b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
+1: teq r3, ip
bne 2f
movs r3, r1, lsl #9
- bne LSYM(Lml_n) @ <anything> / NAN -> NAN
- b LSYM(Lml_z) @ <anything> / INF -> 0
-2: @ One or both arguments are 0.
+ beq LSYM(Lml_z) @ <anything> / INF -> 0
+ mov r0, r1
+ b LSYM(Lml_n) @ <anything> / NAN -> NAN
+2: @ If both are non-zero, we need to normalize and resume above.
+ bics ip, r0, #0x80000000
+ bicnes ip, r1, #0x80000000
+ bne LSYM(Ldv_d)
+ @ One or both arguments are zero.
bics r2, r0, #0x80000000
bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
bics r3, r1, #0x80000000
ARM_FUNC_START gtsf2
ARM_FUNC_ALIAS gesf2 gtsf2
- mov r3, #-1
+ mov ip, #-1
b 1f
ARM_FUNC_START ltsf2
ARM_FUNC_ALIAS lesf2 ltsf2
- mov r3, #1
+ mov ip, #1
b 1f
ARM_FUNC_START cmpsf2
ARM_FUNC_ALIAS nesf2 cmpsf2
ARM_FUNC_ALIAS eqsf2 cmpsf2
- mov r3, #1 @ how should we specify unordered here?
-
- @ Both Inf and NaN have an exponent of 255. Therefore, we
- @ compute (r1 & 0x8f80000) || (r2 & 0x8f8000).
-1: mov ip, #0xff000000
- and r2, r1, ip, lsr #1
- teq r2, ip, lsr #1
- and r2, r0, ip, lsr #1
- teqne r2, ip, lsr #1
+ mov ip, #1 @ how should we specify unordered here?
+
+1: str ip, [sp, #-4]
+
+ @ Trap any INF/NAN first.
+ mov r2, r0, lsl #1
+ mov r3, r1, lsl #1
+ mvns ip, r2, asr #24
+ mvnnes ip, r3, asr #24
beq 3f
- @ Test for equality. The representations of +0.0 and -0.0
- @ have all bits set to zero, except for the sign bit. Since
- @ 0.0 is equal to -0.0, we begin by testing
- @ ((r0 | r1) & ~0x8000000).
-2: orr r3, r0, r1
- @ If the result of the bitwise and is zero, then the Z flag
- @ will be set. In any case, the C flag will be set.
- bics r3, r3, #0x80000000 @ either 0.0 or -0.0
- teqne r0, r1 @ or both the same
- @ If the Z flag is set, the two operands were equal. Return zero.
- moveq r0, #0
- RETc(eq)
+ @ Compare values.
+ @ Note that 0.0 is equal to -0.0.
+2: orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag
+ teqne r0, r1 @ if not 0 compare sign
+ subpls r0, r2, r3 @ if same sign compare values, set r0
- @ Check for sign difference. The N flag is set (due to the
- @ use of teq above) if the sign bit is set on exactly one
- @ of the operands. Return the sign of the first operand.
- movmi r0, r0, asr #31
- orrmi r0, r0, #1
- RETc(mi)
-
- @ Compare exponents.
- and r3, r1, ip, lsr #1
- cmp r2, r3
-
- @ Compare mantissa if exponents are equal
- moveq r0, r0, lsl #9
- cmpeq r0, r1, lsl #9
-
- @ We know the operands cannot be equal at this point, so the
- @ Z flag is clear. The C flag is set if the first operand has
- @ the greater exponent, or the exponents are equal and the
- @ first operand has the greater mantissa. Therefore, if the C
- @ flag is set, the first operand is greater iff the sign is
- @ positive. These next two instructions will put zero in
- @ r0 if the first operand is greater, and -1 if the second
- @ operand is greater.
- movcs r0, r1, asr #31
- mvncc r0, r1, asr #31
- @ If r0 is 0, the first operand is greater, so return 1. Leave
- @ -1 unchanged.
- orr r0, r0, #1
+ @ Result:
+ movhi r0, r1, asr #31
+ mvnlo r0, r1, asr #31
+ orrne r0, r0, #1
RET
- @ We know that at least one argument is either Inf or NaN.
- @ Look for a NaN.
-3: and r2, r1, ip, lsr #1
- teq r2, ip, lsr #1
+ @ Look for a NAN.
+3: mvns ip, r2, asr #24
bne 4f
- movs r2, r1, lsl #9
- bne 5f @ r1 is NAN
-4: and r2, r0, ip, lsr #1
- teq r2, ip, lsr #1
- bne 2b
movs ip, r0, lsl #9
- beq 2b @ r0 is not NAN
-5: @ The Z flag is clear at this point.
- mov r0, r3 @ return unordered code from r3.
+ bne 5f @ r0 is NAN
+4: mvns ip, r3, asr #24
+ bne 2b
+ movs ip, r1, lsl #9
+ beq 2b @ r1 is not NAN
+5: ldr r0, [sp, #-4] @ return unordered code.
RET
FUNC_END gesf2
FUNC_END cmpsf2
ARM_FUNC_START aeabi_cfrcmple
+
mov ip, r0
mov r0, r1
mov r1, ip
b 6f
-
+
ARM_FUNC_START aeabi_cfcmpeq
ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
+
@ The status-returning routines are required to preserve all
@ registers except ip, lr, and cpsr.
6: stmfd sp!, {r0, r1, r2, r3, lr}
@ that the first operand was smaller than the second.
cmnmi r0, #0
RETLDM "r0, r1, r2, r3"
+
FUNC_END aeabi_cfcmple
FUNC_END aeabi_cfcmpeq
-
+ FUNC_END aeabi_cfrcmple
+
ARM_FUNC_START aeabi_fcmpeq
+
str lr, [sp, #-4]!
ARM_CALL aeabi_cfcmple
moveq r0, #1 @ Equal to.
movne r0, #0 @ Less than, greater than, or unordered.
RETLDM
+
FUNC_END aeabi_fcmpeq
ARM_FUNC_START aeabi_fcmplt
+
str lr, [sp, #-4]!
ARM_CALL aeabi_cfcmple
movcc r0, #1 @ Less than.
movcs r0, #0 @ Equal to, greater than, or unordered.
RETLDM
+
FUNC_END aeabi_fcmplt
ARM_FUNC_START aeabi_fcmple
+
str lr, [sp, #-4]!
ARM_CALL aeabi_cfcmple
movls r0, #1 @ Less than or equal to.
movhi r0, #0 @ Greater than or unordered.
RETLDM
+
FUNC_END aeabi_fcmple
ARM_FUNC_START aeabi_fcmpge
+
str lr, [sp, #-4]!
ARM_CALL aeabi_cfrcmple
movls r0, #1 @ Operand 2 is less than or equal to operand 1.
movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
RETLDM
+
FUNC_END aeabi_fcmpge
ARM_FUNC_START aeabi_fcmpgt
+
str lr, [sp, #-4]!
ARM_CALL aeabi_cfrcmple
movcc r0, #1 @ Operand 2 is less than operand 1.
movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
@ or they are unordered.
RETLDM
+
FUNC_END aeabi_fcmpgt
-
+
#endif /* L_cmpsf2 */
#ifdef L_unordsf2
ARM_FUNC_START unordsf2
ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
-
- mov ip, #0xff000000
- and r2, r1, ip, lsr #1
- teq r2, ip, lsr #1
+
+ mov r2, r0, lsl #1
+ mov r3, r1, lsl #1
+ mvns ip, r2, asr #24
bne 1f
- movs r2, r1, lsl #9
- bne 3f @ r1 is NAN
-1: and r2, r0, ip, lsr #1
- teq r2, ip, lsr #1
- bne 2f
- movs r2, r0, lsl #9
+ movs ip, r0, lsl #9
bne 3f @ r0 is NAN
+1: mvns ip, r3, asr #24
+ bne 2f
+ movs ip, r1, lsl #9
+ bne 3f @ r1 is NAN
2: mov r0, #0 @ arguments are ordered.
RET
3: mov r0, #1 @ arguments are unordered.
ARM_FUNC_START fixsfsi
ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
- movs r0, r0, lsl #1
- RETc(eq) @ value is 0.
-
- mov r1, r1, rrx @ preserve C flag (the actual sign)
@ check exponent range.
- and r2, r0, #0xff000000
+ mov r2, r0, lsl #1
cmp r2, #(127 << 24)
- movcc r0, #0 @ value is too small
- RETc(cc)
- cmp r2, #((127 + 31) << 24)
- bcs 1f @ value is too large
-
- mov r0, r0, lsl #7
- orr r0, r0, #0x80000000
- mov r2, r2, lsr #24
- rsb r2, r2, #(127 + 31)
- tst r1, #0x80000000 @ the sign bit
- mov r0, r0, lsr r2
+ bcc 1f @ value is too small
+ mov r3, #(127 + 31)
+ subs r2, r3, r2, lsr #24
+ bls 2f @ value is too large
+
+ @ scale value
+ mov r3, r0, lsl #8
+ orr r3, r3, #0x80000000
+ tst r0, #0x80000000 @ the sign bit
+ mov r0, r3, lsr r2
rsbne r0, r0, #0
RET
-1: teq r2, #0xff000000
- bne 2f
- movs r0, r0, lsl #8
- bne 3f @ r0 is NAN.
-2: ands r0, r1, #0x80000000 @ the sign bit
+1: mov r0, #0
+ RET
+
+2: cmp r2, #(127 + 31 - 0xff)
+ bne 3f
+ movs r2, r0, lsl #9
+ bne 4f @ r0 is NAN.
+3: ands r0, r0, #0x80000000 @ the sign bit
moveq r0, #0x7fffffff @ the maximum signed positive si
RET
-3: mov r0, #0 @ What should we convert NAN to?
+4: mov r0, #0 @ What should we convert NAN to?
RET
FUNC_END aeabi_f2iz
ARM_FUNC_START fixunssfsi
ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
- movs r0, r0, lsl #1
- movcss r0, #0 @ value is negative...
- RETc(eq) @ ... or 0.
-
@ check exponent range.
- and r2, r0, #0xff000000
+ movs r2, r0, lsl #1
+ bcs 1f @ value is negative
cmp r2, #(127 << 24)
- movcc r0, #0 @ value is too small
- RETc(cc)
- cmp r2, #((127 + 32) << 24)
- bcs 1f @ value is too large
+ bcc 1f @ value is too small
+ mov r3, #(127 + 31)
+ subs r2, r3, r2, lsr #24
+ bmi 2f @ value is too large
+
+ @ scale the value
+ mov r3, r0, lsl #8
+ orr r3, r3, #0x80000000
+ mov r0, r3, lsr r2
+ RET
- mov r0, r0, lsl #7
- orr r0, r0, #0x80000000
- mov r2, r2, lsr #24
- rsb r2, r2, #(127 + 31)
- mov r0, r0, lsr r2
+1: mov r0, #0
RET
-1: teq r2, #0xff000000
- bne 2f
- movs r0, r0, lsl #8
- bne 3f @ r0 is NAN.
-2: mov r0, #0xffffffff @ maximum unsigned si
+2: cmp r2, #(127 + 31 - 0xff)
+ bne 3f
+ movs r2, r0, lsl #9
+ bne 4f @ r0 is NAN.
+3: mov r0, #0xffffffff @ maximum unsigned si
RET
-3: mov r0, #0 @ What should we convert NAN to?
+4: mov r0, #0 @ What should we convert NAN to?
RET
FUNC_END aeabi_f2uiz