1 /* Copyright (C) 2006 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it
4 under the terms of the GNU General Public License as published by the
5 Free Software Foundation; either version 2, or (at your option) any
8 In addition to the permissions in the GNU General Public License, the
9 Free Software Foundation gives you unlimited permission to link the
10 compiled version of this file into combinations with other programs,
11 and to distribute those combinations without any restriction coming
12 from the use of this file. (The General Public License restrictions
13 do apply in other respects; for example, they cover modification of
14 the file, and distribution when not linked into a combine
17 This file is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; see the file COPYING. If not, write to
24 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
25 Boston, MA 02110-1301, USA. */
27 /* Moderately Space-optimized libgcc routines for the Renesas SH /
28 STMicroelectronics ST40 CPUs.
29 Contributed by J"orn Rennecke joern.rennecke@st.com. */
31 #include "lib1funcs.h"
36 /* 88 bytes; sh4-200 cycle counts:
37 divisor >= 2G: 11 cycles
38 dividend < 2G: 48 cycles
39 dividend >= 2G: divisor != 1: 54 cycles
40 dividend >= 2G, divisor == 1: 22 cycles */
41 #if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
42 !! args in r4 and r5, result in r0, clobber r1
44 .global GLOBAL(udivsi3_i4i)
45 FUNC(GLOBAL(udivsi3_i4i))
52 bf LOCAL(huge_divisor)
60 bt LOCAL(dividend_adjusted)
66 LOCAL(dividend_adjusted):
74 #else /* !FMOVD_WORKS */
80 bt/s LOCAL(dividend_adjusted)
87 LOCAL(dividend_adjusted):
97 #endif /* !FMOVD_WORKS */
104 .p2align 3 ! make double below 8 byte aligned.
122 ENDFUNC(GLOBAL(udivsi3_i4i))
123 #elif !defined (__sh1__) /* !__SH_FPU_DOUBLE__ */
126 /* With 36 bytes, the following would probably be the most compact
127 implementation, but with 139 cycles on an sh4-200, it is extremely slow. */
152 /* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
154 udiv small divisor: 55 cycles
155 udiv large divisor: 52 cycles
156 sdiv small divisor, positive result: 59 cycles
157 sdiv large divisor, positive result: 56 cycles
158 sdiv small divisor, negative result: 65 cycles (*)
159 sdiv large divisor, negative result: 62 cycles (*)
160 (*): r2 is restored in the rts delay slot and has a lingering latency
161 of two more cycles. */
163 .global GLOBAL(udivsi3_i4i)
164 FUNC(GLOBAL(udivsi3_i4i))
165 FUNC(GLOBAL(sdivsi3_i4i))
173 bf/s LOCAL(large_divisor)
177 LOCAL(sdiv_small_divisor):
200 div1 r5,r4; div1 r5,r4; div1 r5,r4
201 div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
212 LOCAL(large_divisor):
214 LOCAL(sdiv_large_divisor):
225 ENDFUNC(GLOBAL(udivsi3_i4i))
227 .global GLOBAL(sdivsi3_i4i)
232 bt/s LOCAL(pos_divisor)
236 bt/s LOCAL(neg_result)
241 bra LOCAL(sdiv_check_divisor)
245 bt/s LOCAL(pos_result)
249 mova LOCAL(negate_result),r0
255 LOCAL(sdiv_check_divisor):
257 bf/s LOCAL(sdiv_large_divisor)
259 bra LOCAL(sdiv_small_divisor)
262 LOCAL(negate_result):
266 ENDFUNC(GLOBAL(sdivsi3_i4i))
267 #endif /* !__SH_FPU_DOUBLE__ */
268 #endif /* L_udivsi3_i4i */
271 #if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
272 /* 48 bytes, 45 cycles on sh4-200 */
273 !! args in r4 and r5, result in r0, clobber r1
275 .global GLOBAL(sdivsi3_i4i)
276 FUNC(GLOBAL(sdivsi3_i4i))
324 ENDFUNC(GLOBAL(sdivsi3_i4i))
325 #endif /* __SH_FPU_DOUBLE__ */
326 #endif /* L_sdivsi3_i4i */
327 #endif /* !__SHMEDIA__ */