1 /* Copyright (C) 2004, 2006 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it
4 under the terms of the GNU General Public License as published by the
5 Free Software Foundation; either version 2, or (at your option) any
8 In addition to the permissions in the GNU General Public License, the
9 Free Software Foundation gives you unlimited permission to link the
10 compiled version of this file into combinations with other programs,
11 and to distribute those combinations without any restriction coming
12 from the use of this file. (The General Public License restrictions
13 do apply in other respects; for example, they cover modification of
14 the file, and distribution when not linked into a combine
17 This file is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; see the file COPYING. If not, write to
24 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
25 Boston, MA 02110-1301, USA. */
27 /* libgcc routines for the STMicroelectronics ST40-300 CPU.
28 Contributed by J"orn Rennecke joern.rennecke@st.com. */
30 #include "lib1funcs.h"
33 #if defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
34 /* This code used shld, thus is not suitable for SH1 / SH2. */
36 /* Signed / unsigned division without use of FPU, optimized for SH4-300.
37 Uses a lookup table for divisors in the range -128 .. +127, and
38 div1 with case distinction for larger divisors in three more ranges.
39 The code is lumped together with the table to allow the use of mova. */
40 #ifdef __LITTLE_ENDIAN__
50 .global GLOBAL(udivsi3_i4i)
51 .global GLOBAL(sdivsi3_i4i)
52 FUNC(GLOBAL(udivsi3_i4i))
53 FUNC(GLOBAL(sdivsi3_i4i))
56 LOCAL(div_ge8m): ! 10 cycles up to here
57 rotcr r1 ! signed shift must use original sign from r4
64 swap.w r5,r0 ! detect -0x80000000 : 0x800000
99 ! 31 cycles up to here
102 LOCAL(udiv_ge64k): ! 3 cycles up to here
110 ! 7 cycles up to here
114 extu.b r4,r1 ! 15 cycles up to here
121 .endr ! 25 cycles up to here
127 rotcl r0 ! 28 cycles up to here
130 LOCAL(udiv_r8): ! 6 cycles up to here
142 ! 12 cycles up to here
146 mov.l @r15+,r6 ! 24 cycles up to here
151 LOCAL(div_ge32k): ! 6 cycles up to here
159 cmp/hi r1,r4 ! copy sign bit of r4 into T
160 rotcr r1 ! signed shift must use original sign from r4
165 mov r5,r7 ! detect r4 == 0x80000000 && r5 == 0x8000(00)
187 neg r7,r7 ! upper 16 bit of r7 == 0 if r4 == 0x80000000 && r5 == 0x8000
191 swap.w r7,r7 ! 26 cycles up to here.
207 shad r1,r5 ! 34 cycles up to here
224 extu.b r4,r0 ! 7 cycles up to here
227 .endr ! 15 cycles up to here
228 xor r1,r0 ! xor dividend with result lsb
232 mov.l r7,@-r15 ! 21 cycles up to here
238 xor r7,r1 ! replace lsb of result with lsb of dividend
245 div1 r6,r1 ! 28 cycles up to here
247 /* This is link-compatible with a GLOBAL(sdivsi3) call,
248 but we effectively clobber only r1, macl and mach */
249 /* Because negative quotients are calculated as one's complements,
250 -0x80000000 divided by the smallest positive number of a number
251 range (0x80, 0x8000, 0x800000) causes saturation in the one's
252 complement representation, and we have to suppress the
253 one's -> two's complement adjustment. Since positive numbers
254 don't get such an adjustment, it's OK to also compute one's -> two's
255 complement adjustment suppression for a dividend of 0. */
262 bt/s LOCAL(div_le128)
268 bf/s LOCAL(div_ge32k)
269 cmp/hi r1,r4 ! copy sign bit of r4 into T
271 shll16 r6 ! 7 cycles up to here
277 mov r4,r0 ! re-compute adjusted dividend
283 add r4,r0 ! adjusted dividend
287 swap.w r4,r8 ! detect special case r4 = 0x80000000, r5 = 0x80
290 xor r1,r0 ! xor dividend with result lsb
295 add #-0x80,r8 ! r8 is 0 iff there is a match
297 swap.w r8,r7 ! or upper 16 bits...
299 or r7,r8 !...into lower 16 bits
307 xor r7,r1 ! replace lsb of result with lsb of dividend
309 neg r8,r8 ! upper 16 bits of r8 are now 0xffff iff we want end adjm.
315 mov.l @r15+,r8 ! 58 insns, 29 cycles up to here
333 LOCAL(udiv_le128): ! 4 cycles up to here (or 7 for mispredict)
334 mova LOCAL(div_table_inv),r0
337 mova LOCAL(div_table_clz),r0
358 LOCAL(div_le128): ! 3 cycles up to here (or 6 for mispredict)
359 mova LOCAL(div_table_inv),r0
362 mova LOCAL(div_table_clz),r0
372 bt/s LOCAL(le128_neg)
382 /* Could trap divide by zero for the cost of one cycle more mispredict penalty:
386 bt/s LOCAL(le128_neg)
388 bt LOCAL(div_by_zero)
397 bt LOCAL(div_by_zero)
412 ENDFUNC(GLOBAL(udivsi3_i4i))
413 ENDFUNC(GLOBAL(sdivsi3_i4i))
415 /* This table has been generated by divtab-sh4.c. */
545 LOCAL(div_table_clz):
674 /* 1/-128 .. 1/127, normalized. There is an implicit leading 1 in bit 32,
675 or in bit 33 for powers of two. */
805 LOCAL(div_table_inv):
934 /* maximum error: 0.987342 scaled: 0.921875*/
936 #endif /* SH3 / SH4 */
938 #endif /* L_div_table */