1 /* Copyright (C) 2004, 2006 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it
4 under the terms of the GNU General Public License as published by the
5 Free Software Foundation; either version 2, or (at your option) any
8 In addition to the permissions in the GNU General Public License, the
9 Free Software Foundation gives you unlimited permission to link the
10 compiled version of this file into combinations with other programs,
11 and to distribute those combinations without any restriction coming
12 from the use of this file. (The General Public License restrictions
13 do apply in other respects; for example, they cover modification of
14 the file, and distribution when not linked into a combine
17 This file is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; see the file COPYING. If not, write to
24 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
25 Boston, MA 02110-1301, USA. */
27 /* libgcc routines for the STMicroelectronics ST40-300 CPU.
28 Contributed by J"orn Rennecke joern.rennecke@st.com. */
30 #include "lib1funcs.h"
34 #if defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
35 /* This code used shld, thus is not suitable for SH1 / SH2. */
37 /* Signed / unsigned division without use of FPU, optimized for SH4-300.
38 Uses a lookup table for divisors in the range -128 .. +127, and
39 div1 with case distinction for larger divisors in three more ranges.
40 The code is lumped together with the table to allow the use of mova. */
41 #ifdef __LITTLE_ENDIAN__
51 .global GLOBAL(udivsi3_i4i)
52 .global GLOBAL(sdivsi3_i4i)
53 FUNC(GLOBAL(udivsi3_i4i))
54 FUNC(GLOBAL(sdivsi3_i4i))
57 LOCAL(div_ge8m): ! 10 cycles up to here
58 rotcr r1 ! signed shift must use original sign from r4
65 swap.w r5,r0 ! detect -0x80000000 : 0x800000
100 ! 31 cycles up to here
103 LOCAL(udiv_ge64k): ! 3 cycles up to here
111 ! 7 cycles up to here
115 extu.b r4,r1 ! 15 cycles up to here
122 .endr ! 25 cycles up to here
128 rotcl r0 ! 28 cycles up to here
131 LOCAL(udiv_r8): ! 6 cycles up to here
143 ! 12 cycles up to here
147 mov.l @r15+,r6 ! 24 cycles up to here
152 LOCAL(div_ge32k): ! 6 cycles up to here
160 cmp/hi r1,r4 ! copy sign bit of r4 into T
161 rotcr r1 ! signed shift must use original sign from r4
166 mov r5,r7 ! detect r4 == 0x80000000 && r5 == 0x8000(00)
188 neg r7,r7 ! upper 16 bit of r7 == 0 if r4 == 0x80000000 && r5 == 0x8000
192 swap.w r7,r7 ! 26 cycles up to here.
208 shad r1,r5 ! 34 cycles up to here
225 extu.b r4,r0 ! 7 cycles up to here
228 .endr ! 15 cycles up to here
229 xor r1,r0 ! xor dividend with result lsb
233 mov.l r7,@-r15 ! 21 cycles up to here
239 xor r7,r1 ! replace lsb of result with lsb of dividend
246 div1 r6,r1 ! 28 cycles up to here
248 /* This is link-compatible with a GLOBAL(sdivsi3) call,
249 but we effectively clobber only r1, macl and mach */
250 /* Because negative quotients are calculated as one's complements,
251 -0x80000000 divided by the smallest positive number of a number
252 range (0x80, 0x8000, 0x800000) causes saturation in the one's
253 complement representation, and we have to suppress the
254 one's -> two's complement adjustment. Since positive numbers
255 don't get such an adjustment, it's OK to also compute one's -> two's
256 complement adjustment suppression for a dividend of 0. */
263 bt/s LOCAL(div_le128)
269 bf/s LOCAL(div_ge32k)
270 cmp/hi r1,r4 ! copy sign bit of r4 into T
272 shll16 r6 ! 7 cycles up to here
278 mov r4,r0 ! re-compute adjusted dividend
284 add r4,r0 ! adjusted dividend
288 swap.w r4,r8 ! detect special case r4 = 0x80000000, r5 = 0x80
291 xor r1,r0 ! xor dividend with result lsb
296 add #-0x80,r8 ! r8 is 0 iff there is a match
298 swap.w r8,r7 ! or upper 16 bits...
300 or r7,r8 !...into lower 16 bits
308 xor r7,r1 ! replace lsb of result with lsb of dividend
310 neg r8,r8 ! upper 16 bits of r8 are now 0xffff iff we want end adjm.
316 mov.l @r15+,r8 ! 58 insns, 29 cycles up to here
334 LOCAL(udiv_le128): ! 4 cycles up to here (or 7 for mispredict)
335 mova LOCAL(div_table_inv),r0
338 mova LOCAL(div_table_clz),r0
359 LOCAL(div_le128): ! 3 cycles up to here (or 6 for mispredict)
360 mova LOCAL(div_table_inv),r0
363 mova LOCAL(div_table_clz),r0
373 bt/s LOCAL(le128_neg)
383 /* Could trap divide by zero for the cost of one cycle more mispredict penalty:
387 bt/s LOCAL(le128_neg)
389 bt LOCAL(div_by_zero)
398 bt LOCAL(div_by_zero)
413 ENDFUNC(GLOBAL(udivsi3_i4i))
414 ENDFUNC(GLOBAL(sdivsi3_i4i))
416 /* This table has been generated by divtab-sh4.c. */
546 LOCAL(div_table_clz):
675 /* 1/-128 .. 1/127, normalized. There is an implicit leading 1 in bit 32,
676 or in bit 33 for powers of two. */
806 LOCAL(div_table_inv):
935 /* maximum error: 0.987342 scaled: 0.921875*/
937 #endif /* SH3 / SH4 */
939 #endif /* L_div_table */
940 #endif /* !__SHMEDIA__ */