1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3 Free Software Foundation, Inc.
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
19 This file is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; see the file COPYING. If not, write to
26 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
27 Boston, MA 02110-1301, USA. */
29 !! libgcc routines for the Renesas / SuperH SH CPUs.
30 !! Contributed by Steve Chamberlain.
33 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
34 !! recoded in assembly by Toshiyasu Morita
37 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
38 ELF local label prefixes by J"orn Rennecke
42 #define LOCAL(X) .L_##X
43 #define FUNC(X) .type X,@function
44 #define HIDDEN_FUNC(X) FUNC(X); .hidden X
45 #define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X)
46 #define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X
47 #define ENDFUNC(X) ENDFUNC0(X)
49 #define LOCAL(X) L_##X
51 #define HIDDEN_FUNC(X)
52 #define HIDDEN_ALIAS(X,Y) ALIAS (X,Y)
56 #define CONCAT(A,B) A##B
57 #define GLOBAL0(U,X) CONCAT(U,__##X)
58 #define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X)
60 #define ALIAS(X,Y) .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y)
69 .global GLOBAL(ashiftrt_r4_0)
70 .global GLOBAL(ashiftrt_r4_1)
71 .global GLOBAL(ashiftrt_r4_2)
72 .global GLOBAL(ashiftrt_r4_3)
73 .global GLOBAL(ashiftrt_r4_4)
74 .global GLOBAL(ashiftrt_r4_5)
75 .global GLOBAL(ashiftrt_r4_6)
76 .global GLOBAL(ashiftrt_r4_7)
77 .global GLOBAL(ashiftrt_r4_8)
78 .global GLOBAL(ashiftrt_r4_9)
79 .global GLOBAL(ashiftrt_r4_10)
80 .global GLOBAL(ashiftrt_r4_11)
81 .global GLOBAL(ashiftrt_r4_12)
82 .global GLOBAL(ashiftrt_r4_13)
83 .global GLOBAL(ashiftrt_r4_14)
84 .global GLOBAL(ashiftrt_r4_15)
85 .global GLOBAL(ashiftrt_r4_16)
86 .global GLOBAL(ashiftrt_r4_17)
87 .global GLOBAL(ashiftrt_r4_18)
88 .global GLOBAL(ashiftrt_r4_19)
89 .global GLOBAL(ashiftrt_r4_20)
90 .global GLOBAL(ashiftrt_r4_21)
91 .global GLOBAL(ashiftrt_r4_22)
92 .global GLOBAL(ashiftrt_r4_23)
93 .global GLOBAL(ashiftrt_r4_24)
94 .global GLOBAL(ashiftrt_r4_25)
95 .global GLOBAL(ashiftrt_r4_26)
96 .global GLOBAL(ashiftrt_r4_27)
97 .global GLOBAL(ashiftrt_r4_28)
98 .global GLOBAL(ashiftrt_r4_29)
99 .global GLOBAL(ashiftrt_r4_30)
100 .global GLOBAL(ashiftrt_r4_31)
101 .global GLOBAL(ashiftrt_r4_32)
103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
112 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
113 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
114 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
115 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
116 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
117 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
118 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
119 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
120 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
121 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
122 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
123 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
124 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
125 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
126 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
127 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
128 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
129 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
130 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
131 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
132 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
133 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
134 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
135 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
138 GLOBAL(ashiftrt_r4_32):
139 GLOBAL(ashiftrt_r4_31):
144 GLOBAL(ashiftrt_r4_30):
146 GLOBAL(ashiftrt_r4_29):
148 GLOBAL(ashiftrt_r4_28):
150 GLOBAL(ashiftrt_r4_27):
152 GLOBAL(ashiftrt_r4_26):
154 GLOBAL(ashiftrt_r4_25):
156 GLOBAL(ashiftrt_r4_24):
162 GLOBAL(ashiftrt_r4_23):
164 GLOBAL(ashiftrt_r4_22):
166 GLOBAL(ashiftrt_r4_21):
168 GLOBAL(ashiftrt_r4_20):
170 GLOBAL(ashiftrt_r4_19):
172 GLOBAL(ashiftrt_r4_18):
174 GLOBAL(ashiftrt_r4_17):
176 GLOBAL(ashiftrt_r4_16):
181 GLOBAL(ashiftrt_r4_15):
183 GLOBAL(ashiftrt_r4_14):
185 GLOBAL(ashiftrt_r4_13):
187 GLOBAL(ashiftrt_r4_12):
189 GLOBAL(ashiftrt_r4_11):
191 GLOBAL(ashiftrt_r4_10):
193 GLOBAL(ashiftrt_r4_9):
195 GLOBAL(ashiftrt_r4_8):
197 GLOBAL(ashiftrt_r4_7):
199 GLOBAL(ashiftrt_r4_6):
201 GLOBAL(ashiftrt_r4_5):
203 GLOBAL(ashiftrt_r4_4):
205 GLOBAL(ashiftrt_r4_3):
207 GLOBAL(ashiftrt_r4_2):
209 GLOBAL(ashiftrt_r4_1):
213 GLOBAL(ashiftrt_r4_0):
217 ENDFUNC(GLOBAL(ashiftrt_r4_0))
218 ENDFUNC(GLOBAL(ashiftrt_r4_1))
219 ENDFUNC(GLOBAL(ashiftrt_r4_2))
220 ENDFUNC(GLOBAL(ashiftrt_r4_3))
221 ENDFUNC(GLOBAL(ashiftrt_r4_4))
222 ENDFUNC(GLOBAL(ashiftrt_r4_5))
223 ENDFUNC(GLOBAL(ashiftrt_r4_6))
224 ENDFUNC(GLOBAL(ashiftrt_r4_7))
225 ENDFUNC(GLOBAL(ashiftrt_r4_8))
226 ENDFUNC(GLOBAL(ashiftrt_r4_9))
227 ENDFUNC(GLOBAL(ashiftrt_r4_10))
228 ENDFUNC(GLOBAL(ashiftrt_r4_11))
229 ENDFUNC(GLOBAL(ashiftrt_r4_12))
230 ENDFUNC(GLOBAL(ashiftrt_r4_13))
231 ENDFUNC(GLOBAL(ashiftrt_r4_14))
232 ENDFUNC(GLOBAL(ashiftrt_r4_15))
233 ENDFUNC(GLOBAL(ashiftrt_r4_16))
234 ENDFUNC(GLOBAL(ashiftrt_r4_17))
235 ENDFUNC(GLOBAL(ashiftrt_r4_18))
236 ENDFUNC(GLOBAL(ashiftrt_r4_19))
237 ENDFUNC(GLOBAL(ashiftrt_r4_20))
238 ENDFUNC(GLOBAL(ashiftrt_r4_21))
239 ENDFUNC(GLOBAL(ashiftrt_r4_22))
240 ENDFUNC(GLOBAL(ashiftrt_r4_23))
241 ENDFUNC(GLOBAL(ashiftrt_r4_24))
242 ENDFUNC(GLOBAL(ashiftrt_r4_25))
243 ENDFUNC(GLOBAL(ashiftrt_r4_26))
244 ENDFUNC(GLOBAL(ashiftrt_r4_27))
245 ENDFUNC(GLOBAL(ashiftrt_r4_28))
246 ENDFUNC(GLOBAL(ashiftrt_r4_29))
247 ENDFUNC(GLOBAL(ashiftrt_r4_30))
248 ENDFUNC(GLOBAL(ashiftrt_r4_31))
249 ENDFUNC(GLOBAL(ashiftrt_r4_32))
271 .global GLOBAL(ashrsi3)
272 HIDDEN_FUNC(GLOBAL(ashrsi3))
277 mova LOCAL(ashrsi3_table),r0
288 LOCAL(ashrsi3_table):
289 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
300 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
301 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
302 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
303 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
304 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
305 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
306 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
307 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
308 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
309 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
310 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
311 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
312 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
313 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
314 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
315 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
316 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
317 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
318 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
319 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
320 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
400 ENDFUNC(GLOBAL(ashrsi3))
421 .global GLOBAL(ashlsi3)
422 HIDDEN_FUNC(GLOBAL(ashlsi3))
427 mova LOCAL(ashlsi3_table),r0
438 LOCAL(ashlsi3_table):
439 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
440 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
441 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
442 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
443 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
444 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
445 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
446 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
447 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
448 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
449 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
450 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
451 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
452 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
453 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
454 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
455 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
456 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
457 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
458 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
459 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
460 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
461 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
462 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
463 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
464 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
465 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
466 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
467 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
468 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
469 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
470 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
559 ENDFUNC(GLOBAL(ashlsi3))
580 .global GLOBAL(lshrsi3)
581 HIDDEN_FUNC(GLOBAL(lshrsi3))
586 mova LOCAL(lshrsi3_table),r0
597 LOCAL(lshrsi3_table):
598 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
599 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
600 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
601 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
602 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
603 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
604 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
605 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
606 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
607 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
608 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
609 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
610 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
611 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
612 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
613 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
614 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
615 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
616 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
617 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
618 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
619 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
620 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
621 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
622 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
623 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
624 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
625 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
626 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
627 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
628 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
629 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
718 ENDFUNC(GLOBAL(lshrsi3))
724 .global GLOBAL(movmem)
725 HIDDEN_FUNC(GLOBAL(movmem))
726 HIDDEN_ALIAS(movstr,movmem)
727 /* This would be a lot simpler if r6 contained the byte count
728 minus 64, and we wouldn't be called here for a byte count of 64. */
732 bsr GLOBAL(movmemSI52+2)
735 LOCAL(movmem_loop): /* Reached with rts */
741 bt LOCAL(movmem_done)
748 bt GLOBAL(movmemSI52)
749 ! done all the large groups, do the remainder
751 mova GLOBAL(movmemSI4)+4,r0
754 LOCAL(movmem_done): ! share slot insn, works out aligned.
761 ! ??? We need aliases movstr* for movmem* for the older libraries. These
762 ! aliases will be removed at the some point in the future.
763 .global GLOBAL(movmemSI64)
764 HIDDEN_FUNC(GLOBAL(movmemSI64))
765 HIDDEN_ALIAS(movstrSI64,movmemSI64)
769 .global GLOBAL(movmemSI60)
770 HIDDEN_FUNC(GLOBAL(movmemSI60))
771 HIDDEN_ALIAS(movstrSI60,movmemSI60)
775 .global GLOBAL(movmemSI56)
776 HIDDEN_FUNC(GLOBAL(movmemSI56))
777 HIDDEN_ALIAS(movstrSI56,movmemSI56)
781 .global GLOBAL(movmemSI52)
782 HIDDEN_FUNC(GLOBAL(movmemSI52))
783 HIDDEN_ALIAS(movstrSI52,movmemSI52)
787 .global GLOBAL(movmemSI48)
788 HIDDEN_FUNC(GLOBAL(movmemSI48))
789 HIDDEN_ALIAS(movstrSI48,movmemSI48)
793 .global GLOBAL(movmemSI44)
794 HIDDEN_FUNC(GLOBAL(movmemSI44))
795 HIDDEN_ALIAS(movstrSI44,movmemSI44)
799 .global GLOBAL(movmemSI40)
800 HIDDEN_FUNC(GLOBAL(movmemSI40))
801 HIDDEN_ALIAS(movstrSI40,movmemSI40)
805 .global GLOBAL(movmemSI36)
806 HIDDEN_FUNC(GLOBAL(movmemSI36))
807 HIDDEN_ALIAS(movstrSI36,movmemSI36)
811 .global GLOBAL(movmemSI32)
812 HIDDEN_FUNC(GLOBAL(movmemSI32))
813 HIDDEN_ALIAS(movstrSI32,movmemSI32)
817 .global GLOBAL(movmemSI28)
818 HIDDEN_FUNC(GLOBAL(movmemSI28))
819 HIDDEN_ALIAS(movstrSI28,movmemSI28)
823 .global GLOBAL(movmemSI24)
824 HIDDEN_FUNC(GLOBAL(movmemSI24))
825 HIDDEN_ALIAS(movstrSI24,movmemSI24)
829 .global GLOBAL(movmemSI20)
830 HIDDEN_FUNC(GLOBAL(movmemSI20))
831 HIDDEN_ALIAS(movstrSI20,movmemSI20)
835 .global GLOBAL(movmemSI16)
836 HIDDEN_FUNC(GLOBAL(movmemSI16))
837 HIDDEN_ALIAS(movstrSI16,movmemSI16)
841 .global GLOBAL(movmemSI12)
842 HIDDEN_FUNC(GLOBAL(movmemSI12))
843 HIDDEN_ALIAS(movstrSI12,movmemSI12)
847 .global GLOBAL(movmemSI8)
848 HIDDEN_FUNC(GLOBAL(movmemSI8))
849 HIDDEN_ALIAS(movstrSI8,movmemSI8)
853 .global GLOBAL(movmemSI4)
854 HIDDEN_FUNC(GLOBAL(movmemSI4))
855 HIDDEN_ALIAS(movstrSI4,movmemSI4)
861 ENDFUNC(GLOBAL(movmemSI64))
862 ENDFUNC(GLOBAL(movmemSI60))
863 ENDFUNC(GLOBAL(movmemSI56))
864 ENDFUNC(GLOBAL(movmemSI52))
865 ENDFUNC(GLOBAL(movmemSI48))
866 ENDFUNC(GLOBAL(movmemSI44))
867 ENDFUNC(GLOBAL(movmemSI40))
868 ENDFUNC(GLOBAL(movmemSI36))
869 ENDFUNC(GLOBAL(movmemSI32))
870 ENDFUNC(GLOBAL(movmemSI28))
871 ENDFUNC(GLOBAL(movmemSI24))
872 ENDFUNC(GLOBAL(movmemSI20))
873 ENDFUNC(GLOBAL(movmemSI16))
874 ENDFUNC(GLOBAL(movmemSI12))
875 ENDFUNC(GLOBAL(movmemSI8))
876 ENDFUNC(GLOBAL(movmemSI4))
877 ENDFUNC(GLOBAL(movmem))
882 .global GLOBAL(movmem_i4_even)
883 .global GLOBAL(movmem_i4_odd)
884 .global GLOBAL(movmemSI12_i4)
886 HIDDEN_FUNC(GLOBAL(movmem_i4_even))
887 HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
888 HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
890 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
891 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
892 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
902 GLOBAL(movmem_i4_even):
904 bra L_movmem_start_even
907 GLOBAL(movmem_i4_odd):
919 bt/s L_movmem_2mod4_end
933 ENDFUNC(GLOBAL(movmem_i4_even))
934 ENDFUNC(GLOBAL(movmem_i4_odd))
937 GLOBAL(movmemSI12_i4):
946 ENDFUNC(GLOBAL(movmemSI12_i4))
952 .global GLOBAL(mulsi3)
953 HIDDEN_FUNC(GLOBAL(mulsi3))
957 ! r0 = aabb*ccdd via partial products
959 ! if aa == 0 and cc = 0
963 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
967 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
968 mov r5,r3 ! r3 = ccdd
969 swap.w r4,r2 ! r2 = bbaa
970 xtrct r2,r3 ! r3 = aacc
971 tst r3,r3 ! msws zero ?
973 rts ! yes - then we have the answer
976 hiset: sts macl,r0 ! r0 = bb*dd
977 mulu.w r2,r5 ! brewing macl = aa*dd
979 mulu.w r3,r4 ! brewing macl = cc*bb
986 ENDFUNC(GLOBAL(mulsi3))
988 #endif /* ! __SH5__ */
991 !! 4 byte integer Divide code for the Renesas SH
993 !! args in r4 and r5, result in fpul, clobber dr0, dr2
995 .global GLOBAL(sdivsi3_i4)
996 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1006 ENDFUNC(GLOBAL(sdivsi3_i4))
1007 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
1008 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
1010 #if ! __SH5__ || __SH5__ == 32
1014 .global GLOBAL(sdivsi3_i4)
1015 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1030 ENDFUNC(GLOBAL(sdivsi3_i4))
1031 #endif /* ! __SH5__ || __SH5__ == 32 */
1032 #endif /* ! __SH4__ */
1036 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1038 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1040 !! Steve Chamberlain
1045 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1047 .global GLOBAL(sdivsi3)
1050 .section .text..SHmedia32,"ax"
1056 /* The assembly code that follows is a hand-optimized version of the C
1057 code that follows. Note that the registers that are modified are
1058 exactly those listed as clobbered in the patterns divsi3_i1 and
1061 int __sdivsi3 (i, j)
1064 register unsigned long long r18 asm ("r18");
1065 register unsigned long long r19 asm ("r19");
1066 register unsigned long long r0 asm ("r0") = 0;
1067 register unsigned long long r1 asm ("r1") = 1;
1068 register int r2 asm ("r2") = i >> 31;
1069 register int r3 asm ("r3") = j >> 31;
1081 r0 |= r1, r18 -= r19;
1082 while (r19 >>= 1, r1 >>= 1);
1084 return r2 * (int)r0;
1088 pt/l LOCAL(sdivsi3_dontadd), tr2
1089 pt/l LOCAL(sdivsi3_loop), tr1
1102 LOCAL(sdivsi3_loop):
1106 LOCAL(sdivsi3_dontadd):
1115 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1118 // can create absolute value without extra latency,
1119 // but dependent on proper sign extension of inputs:
1122 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1125 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1126 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1134 // If r4 was to be used in-place instead of r21, could use this sequence
1135 // to compute absolute:
1136 // sub r63,r4,r19 // compute absolute value of r4
1137 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1138 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1150 mmacnfx.wl r25,r2,r1
1175 #else /* ! 0 && ! 0 */
1178 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1180 HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1182 FUNC(GLOBAL(sdivsi3))
1183 GLOBAL(sdivsi3): /* this is the shcompact entry point */
1184 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1185 // with the SHcompact implementation, which clobbers tr1 / tr2.
1186 .global GLOBAL(sdivsi3_1)
1188 .global GLOBAL(div_table_internal)
1189 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1190 shori GLOBAL(div_table_internal) & 65535, r20
1192 .global GLOBAL(sdivsi3_2)
1194 // clobbered: r1,r18,r19,r21,r25,tr0
1197 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
1198 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
1199 ldx.ub r20, r21, r19 // u0.8
1200 shari r25, 32, r25 // normalize to s2.30
1202 muls.l r25, r19, r19 // s2.38
1203 ldx.w r20, r21, r21 // s2.14
1205 shari r19, 24, r19 // truncate to s2.14
1206 sub r21, r19, r19 // some 11 bit inverse in s1.14
1207 muls.l r19, r19, r21 // u0.28
1210 muls.l r25, r21, r18 // s2.58
1211 shlli r19, 45, r19 // multiply by two and convert to s2.58
1214 shari r18, 28, r18 // some 22 bit inverse in s1.30
1215 muls.l r18, r25, r0 // s2.60
1216 muls.l r18, r4, r25 // s32.30
1218 shari r0, 16, r19 // s-16.44
1219 muls.l r19, r18, r19 // s-16.74
1221 shari r4, 14, r18 // s19.-14
1222 shari r19, 30, r19 // s-16.44
1223 muls.l r19, r18, r19 // s15.30
1224 xor r21, r0, r21 // You could also use the constant 1 << 27.
1231 ENDFUNC(GLOBAL(sdivsi3))
1233 ENDFUNC(GLOBAL(sdivsi3_2))
1235 #elif defined __SHMEDIA__
1236 /* m5compact-nofpu */
1237 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1239 .section .text..SHmedia32,"ax"
1241 FUNC(GLOBAL(sdivsi3))
1243 pt/l LOCAL(sdivsi3_dontsub), tr0
1244 pt/l LOCAL(sdivsi3_loop), tr1
1256 LOCAL(sdivsi3_loop):
1260 LOCAL(sdivsi3_dontsub):
1266 ENDFUNC(GLOBAL(sdivsi3))
1267 #else /* ! __SHMEDIA__ */
1268 FUNC(GLOBAL(sdivsi3))
1353 ENDFUNC(GLOBAL(sdivsi3))
1354 #endif /* ! __SHMEDIA__ */
1355 #endif /* ! __SH4__ */
1360 !! 4 byte integer Divide code for the Renesas SH
1362 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1365 .global GLOBAL(udivsi3_i4)
1366 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1378 #ifdef __LITTLE_ENDIAN__
1402 .align 3 ! make double below 8 byte aligned.
1407 ENDFUNC(GLOBAL(udivsi3_i4))
1408 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1409 #if ! __SH5__ || __SH5__ == 32
1410 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1412 .global GLOBAL(udivsi3_i4)
1413 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1427 ENDFUNC(GLOBAL(udivsi3_i4))
1428 #endif /* ! __SH5__ || __SH5__ == 32 */
1429 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1430 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1432 .global GLOBAL(udivsi3_i4)
1433 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1447 #ifdef __LITTLE_ENDIAN__
1467 .align 3 ! make double below 8 byte aligned.
1482 ENDFUNC(GLOBAL(udivsi3_i4))
1483 #endif /* ! __SH4__ */
1487 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1489 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1491 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1492 .global GLOBAL(udivsi3)
1493 HIDDEN_FUNC(GLOBAL(udivsi3))
1497 .section .text..SHmedia32,"ax"
1503 /* The assembly code that follows is a hand-optimized version of the C
1504 code that follows. Note that the registers that are modified are
1505 exactly those listed as clobbered in the patterns udivsi3_i1 and
1512 register unsigned long long r0 asm ("r0") = 0;
1513 register unsigned long long r18 asm ("r18") = 1;
1514 register unsigned long long r4 asm ("r4") = i;
1515 register unsigned long long r19 asm ("r19") = j;
1521 r0 |= r18, r4 -= r19;
1522 while (r19 >>= 1, r18 >>= 1);
1528 pt/l LOCAL(udivsi3_dontadd), tr2
1529 pt/l LOCAL(udivsi3_loop), tr1
1537 LOCAL(udivsi3_loop):
1541 LOCAL(udivsi3_dontadd):
1549 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1555 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1557 mmulfx.w r21,r21,r19
1558 mshflo.w r21,r63,r21
1560 mmulfx.w r25,r19,r19
1564 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1565 before the msub.w, but we need a different value for
1566 r19 to keep errors under control. */
1568 mmulfx.w r19,r19,r19
1572 mmacnfx.wl r25,r19,r21
1597 #elif defined (__SHMEDIA__)
1598 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1599 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1600 So use a short shmedia loop. */
1601 // clobbered: r20,r21,r25,tr0,tr1,tr2
1603 .section .text..SHmedia32,"ax"
1606 pt/l LOCAL(udivsi3_dontsub), tr0
1607 pt/l LOCAL(udivsi3_loop), tr1
1612 LOCAL(udivsi3_loop):
1616 LOCAL(udivsi3_dontsub):
1621 #else /* ! defined (__SHMEDIA__) */
1625 div1 r5,r4; div1 r5,r4; div1 r5,r4
1626 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1629 div1 r5,r4; rotcl r0
1630 div1 r5,r4; rotcl r0
1631 div1 r5,r4; rotcl r0
1639 bf LOCAL(large_divisor)
1641 bf/s LOCAL(large_divisor)
1663 LOCAL(large_divisor):
1682 ENDFUNC(GLOBAL(udivsi3))
1683 #endif /* ! __SHMEDIA__ */
1684 #endif /* __SH4__ */
1685 #endif /* L_udivsi3 */
1690 .section .text..SHmedia32,"ax"
1692 .global GLOBAL(udivdi3)
1693 FUNC(GLOBAL(udivdi3))
1695 HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1700 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1704 sub r63,r22,r20 // r63 == 64 % 64
1706 pta LOCAL(large_divisor),tr0
1712 bgt/u r9,r63,tr0 // large_divisor
1721 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1722 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1723 the case may be, %0000000000000000 000.11111111111, still */
1724 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1729 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1731 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1740 mcmpgt.l r21,r63,r21 // See Note 1
1742 mshfhi.l r63,r21,r21
1746 /* small divisor: need a third divide step */
1756 /* could test r3 here to check for divide by zero. */
1759 LOCAL(large_divisor):
1768 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1769 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1770 the case may be, %0000000000000000 000.11111111111, still */
1771 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1776 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1778 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1782 pta LOCAL(no_lo_adj),tr0
1789 bgtu/u r7,r25,tr0 // no_lo_adj
1795 /* large_divisor: only needs a few adjustments. */
1802 ENDFUNC(GLOBAL(udivdi3))
1803 /* Note 1: To shift the result of the second divide stage so that the result
1804 always fits into 32 bits, yet we still reduce the rest sufficiently
1805 would require a lot of instructions to do the shifts just right. Using
1806 the full 64 bit shift result to multiply with the divisor would require
1807 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1808 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1809 know that the rest after taking this partial result into account will
1810 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1811 upper 32 bits of the partial result are nonzero. */
1812 #endif /* __SHMEDIA__ */
1813 #endif /* L_udivdi3 */
1818 .section .text..SHmedia32,"ax"
1820 .global GLOBAL(divdi3)
1821 FUNC(GLOBAL(divdi3))
1823 pta GLOBAL(udivdi3_internal),tr0
1835 ENDFUNC(GLOBAL(divdi3))
1836 #endif /* __SHMEDIA__ */
1837 #endif /* L_divdi3 */
1842 .section .text..SHmedia32,"ax"
1844 .global GLOBAL(umoddi3)
1845 FUNC(GLOBAL(umoddi3))
1847 HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1852 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1856 sub r63,r22,r20 // r63 == 64 % 64
1858 pta LOCAL(large_divisor),tr0
1864 bgt/u r9,r63,tr0 // large_divisor
1873 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1874 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1875 the case may be, %0000000000000000 000.11111111111, still */
1876 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1881 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1883 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1888 /* bubble */ /* could test r3 here to check for divide by zero. */
1891 mcmpgt.l r21,r63,r21 // See Note 1
1893 mshfhi.l r63,r21,r21
1897 /* small divisor: need a third divide step */
1900 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1910 LOCAL(large_divisor):
1919 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1920 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1921 the case may be, %0000000000000000 000.11111111111, still */
1922 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1927 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1929 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1933 pta LOCAL(no_lo_adj),tr0
1940 bgtu/u r7,r25,tr0 // no_lo_adj
1946 /* large_divisor: only needs a few adjustments. */
1955 ENDFUNC(GLOBAL(umoddi3))
1956 /* Note 1: To shift the result of the second divide stage so that the result
1957 always fits into 32 bits, yet we still reduce the rest sufficiently
1958 would require a lot of instructions to do the shifts just right. Using
1959 the full 64 bit shift result to multiply with the divisor would require
1960 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1961 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1962 know that the rest after taking this partial result into account will
1963 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1964 upper 32 bits of the partial result are nonzero. */
1965 #endif /* __SHMEDIA__ */
1966 #endif /* L_umoddi3 */
1971 .section .text..SHmedia32,"ax"
1973 .global GLOBAL(moddi3)
1974 FUNC(GLOBAL(moddi3))
1976 pta GLOBAL(umoddi3_internal),tr0
1988 ENDFUNC(GLOBAL(moddi3))
1989 #endif /* __SHMEDIA__ */
1990 #endif /* L_moddi3 */
1993 #if !defined (__SH2A_NOFPU__)
1994 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1998 .global GLOBAL(set_fpscr)
1999 HIDDEN_FUNC(GLOBAL(set_fpscr))
2004 mova LOCAL(set_fpscr_L0),r0
2005 mov.l LOCAL(set_fpscr_L0),r12
2007 mov.l LOCAL(set_fpscr_L1),r0
2011 mov.l LOCAL(set_fpscr_L1),r1
2018 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2021 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2030 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2034 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2041 LOCAL(set_fpscr_L0):
2042 .long _GLOBAL_OFFSET_TABLE_
2043 LOCAL(set_fpscr_L1):
2044 .long GLOBAL(fpscr_values@GOT)
2046 LOCAL(set_fpscr_L1):
2047 .long GLOBAL(fpscr_values)
2050 ENDFUNC(GLOBAL(set_fpscr))
2051 #ifndef NO_FPSCR_VALUES
2053 .comm GLOBAL(fpscr_values),8,4
2055 .comm GLOBAL(fpscr_values),8
2057 #endif /* NO_FPSCR_VALUES */
2058 #endif /* SH2E / SH3E / SH4 */
2059 #endif /* __SH2A_NOFPU__ */
2060 #endif /* L_set_fpscr */
2061 #ifdef L_ic_invalidate
2064 .section .text..SHmedia32,"ax"
2066 .global GLOBAL(init_trampoline)
2067 HIDDEN_FUNC(GLOBAL(init_trampoline))
2068 GLOBAL(init_trampoline):
2070 #ifdef __LITTLE_ENDIAN__
2076 movi 0xffffffffffffd002,r20
2083 ENDFUNC(GLOBAL(init_trampoline))
2084 .global GLOBAL(ic_invalidate)
2085 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2086 GLOBAL(ic_invalidate):
2093 ENDFUNC(GLOBAL(ic_invalidate))
2094 #elif defined(__SH4A__)
2095 .global GLOBAL(ic_invalidate)
2096 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2097 GLOBAL(ic_invalidate):
2102 ENDFUNC(GLOBAL(ic_invalidate))
2103 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2104 /* For system code, we use ic_invalidate_line_i, but user code
2105 needs a different mechanism. A kernel call is generally not
2106 available, and it would also be slow. Different SH4 variants use
2107 different sizes and associativities of the Icache. We use a small
2108 bit of dispatch code that can be put hidden in every shared object,
2109 which calls the actual processor-specific invalidation code in a
2111 Or if you have operating system support, the OS could mmap the
2112 procesor-specific code from a single page, since it is highly
2114 .global GLOBAL(ic_invalidate)
2115 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2116 GLOBAL(ic_invalidate):
2132 0: .long GLOBAL(ic_invalidate_array)
2134 .global GLOBAL(ic_invalidate_array)
2135 /* ??? Why won't the assembler allow to add these two constants? */
2136 0: .long _GLOBAL_OFFSET_TABLE_
2137 1: .long GLOBAL(ic_invalidate_array)@GOT
2138 ENDFUNC(GLOBAL(ic_invalidate))
2139 #endif /* __pic__ */
2141 #endif /* L_ic_invalidate */
2143 #ifdef L_ic_invalidate_array
2144 #if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2145 .global GLOBAL(ic_invalidate_array)
2146 /* This is needed when an SH4 dso with trampolines is used on SH4A. */
2147 .global GLOBAL(ic_invalidate_array)
2148 FUNC(GLOBAL(ic_invalidate_array))
2149 GLOBAL(ic_invalidate_array):
2155 ENDFUNC(GLOBAL(ic_invalidate_array))
2156 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2157 .global GLOBAL(ic_invalidate_array)
2159 FUNC(GLOBAL(ic_invalidate_array))
2160 /* This must be aligned to the beginning of a cache line. */
2161 GLOBAL(ic_invalidate_array):
2164 #define WAY_SIZE 0x4000
2167 .rept WAY_SIZE * WAYS / 32
2175 .rept WAY_SIZE * WAYS / 32
2189 #else /* WAYS > 6 */
2190 /* This variant needs two different pages for mmap-ing. */
2208 ENDFUNC(GLOBAL(ic_invalidate_array))
2210 #endif /* L_ic_invalidate_array */
2212 #if defined (__SH5__) && __SH5__ == 32
2213 #ifdef L_shcompact_call_trampoline
2216 LOCAL(ct_main_table):
2217 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2218 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2219 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2220 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2221 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2222 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2223 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2224 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2225 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2226 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2227 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2228 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2229 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2230 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2231 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2232 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2233 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2234 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2235 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2236 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2237 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2238 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2239 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2240 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2241 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2242 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2243 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2244 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2245 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2246 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2247 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2248 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2249 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2251 .section .text..SHmedia32, "ax"
2254 /* This function loads 64-bit general-purpose registers from the
2255 stack, from a memory address contained in them or from an FP
2256 register, according to a cookie passed in r1. Its execution
2257 time is linear on the number of registers that actually have
2258 to be copied. See sh.h for details on the actual bit pattern.
2260 The function to be called is passed in r0. If a 32-bit return
2261 value is expected, the actual function will be tail-called,
2262 otherwise the return address will be stored in r10 (that the
2263 caller should expect to be clobbered) and the return value
2264 will be expanded into r2/r3 upon return. */
2266 .global GLOBAL(GCC_shcompact_call_trampoline)
2267 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2268 GLOBAL(GCC_shcompact_call_trampoline):
2269 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2270 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2271 pt/l LOCAL(ct_loop), tr1
2273 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2278 LOCAL(ct_main_label):
2281 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2282 /* It must be dr0, so just do it. */
2288 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2289 /* It is either dr0 or dr2. */
2298 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2299 shlri r1, 23 - 3, r34
2300 andi r34, 3 << 3, r33
2301 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2302 LOCAL(ct_r4_fp_base):
2308 LOCAL(ct_r4_fp_copy):
2315 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2316 shlri r1, 20 - 3, r34
2317 andi r34, 3 << 3, r33
2318 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2319 LOCAL(ct_r5_fp_base):
2325 LOCAL(ct_r5_fp_copy):
2334 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2335 /* It must be dr8. */
2341 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2342 shlri r1, 16 - 3, r34
2343 andi r34, 3 << 3, r33
2344 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2345 LOCAL(ct_r6_fp_base):
2351 LOCAL(ct_r6_fp_copy):
2360 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2361 /* It is either dr8 or dr10. */
2369 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2370 shlri r1, 12 - 3, r34
2371 andi r34, 3 << 3, r33
2372 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2373 LOCAL(ct_r7_fp_base):
2378 LOCAL(ct_r7_fp_copy):
2387 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2388 /* It is either dr8 or dr10. */
2390 andi r1, 1 << 8, r32
2396 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2397 shlri r1, 8 - 3, r34
2398 andi r34, 3 << 3, r33
2399 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2400 LOCAL(ct_r8_fp_base):
2405 LOCAL(ct_r8_fp_copy):
2414 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2415 /* It is either dr8 or dr10. */
2417 andi r1, 1 << 4, r32
2423 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2424 shlri r1, 4 - 3, r34
2425 andi r34, 3 << 3, r33
2426 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2427 LOCAL(ct_r9_fp_base):
2432 LOCAL(ct_r9_fp_copy):
2441 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2442 pt/l LOCAL(ct_r2_load), tr2
2451 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2452 pt/l LOCAL(ct_r3_load), tr2
2460 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2461 pt/l LOCAL(ct_r4_load), tr2
2469 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2470 pt/l LOCAL(ct_r5_load), tr2
2478 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2479 pt/l LOCAL(ct_r6_load), tr2
2486 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2487 pt/l LOCAL(ct_r7_load), tr2
2494 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2495 pt/l LOCAL(ct_r8_load), tr2
2502 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2503 pt/l LOCAL(ct_check_tramp), tr2
2527 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2534 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2541 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2548 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2555 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2562 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2568 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2574 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2575 andi r1, 7 << 1, r30
2576 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2578 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2582 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2595 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2598 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2599 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2600 pt/u LOCAL(ct_ret_wide), tr2
2603 LOCAL(ct_call_func): /* Just branch to the function. */
2605 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2606 64-bit return value. */
2610 #if __LITTLE_ENDIAN__
2619 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2620 #endif /* L_shcompact_call_trampoline */
2622 #ifdef L_shcompact_return_trampoline
2623 /* This function does the converse of the code in `ret_wide'
2624 above. It is tail-called by SHcompact functions returning
2625 64-bit non-floating-point values, to pack the 32-bit values in
2626 r2 and r3 into r2. */
2629 .section .text..SHmedia32, "ax"
2631 .global GLOBAL(GCC_shcompact_return_trampoline)
2632 HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2633 GLOBAL(GCC_shcompact_return_trampoline):
2635 #if __LITTLE_ENDIAN__
2645 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2646 #endif /* L_shcompact_return_trampoline */
2648 #ifdef L_shcompact_incoming_args
2651 LOCAL(ia_main_table):
2652 .word 1 /* Invalid, just loop */
2653 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2654 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2655 .word 1 /* Invalid, just loop */
2656 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2657 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2658 .word 1 /* Invalid, just loop */
2659 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2660 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2661 .word 1 /* Invalid, just loop */
2662 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2663 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2664 .word 1 /* Invalid, just loop */
2665 .word 1 /* Invalid, just loop */
2666 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2667 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2668 .word 1 /* Invalid, just loop */
2669 .word 1 /* Invalid, just loop */
2670 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2671 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2672 .word 1 /* Invalid, just loop */
2673 .word 1 /* Invalid, just loop */
2674 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2675 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2676 .word 1 /* Invalid, just loop */
2677 .word 1 /* Invalid, just loop */
2678 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2679 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2680 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2681 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2682 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2683 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2684 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2686 .section .text..SHmedia32, "ax"
2689 /* This function stores 64-bit general-purpose registers back in
2690 the stack, and loads the address in which each register
2691 was stored into itself. The lower 32 bits of r17 hold the address
2692 to begin storing, and the upper 32 bits of r17 hold the cookie.
2693 Its execution time is linear on the
2694 number of registers that actually have to be copied, and it is
2695 optimized for structures larger than 64 bits, as opposed to
2696 individual `long long' arguments. See sh.h for details on the
2697 actual bit pattern. */
2699 .global GLOBAL(GCC_shcompact_incoming_args)
2700 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2701 GLOBAL(GCC_shcompact_incoming_args):
2702 ptabs/l r18, tr0 /* Prepare to return. */
2703 shlri r17, 32, r0 /* Load the cookie. */
2704 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2705 pt/l LOCAL(ia_loop), tr1
2707 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2712 LOCAL(ia_main_label):
2715 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2724 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2733 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2742 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2751 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2760 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2768 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2776 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2780 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2787 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2794 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2801 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2808 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2815 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2821 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2827 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2828 andi r0, 7 << 1, r38
2829 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2831 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2835 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2848 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2850 LOCAL(ia_return): /* Return. */
2852 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2853 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2854 #endif /* L_shcompact_incoming_args */
2857 #ifdef L_nested_trampoline
2859 .section .text..SHmedia32,"ax"
2863 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2864 .global GLOBAL(GCC_nested_trampoline)
2865 HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2866 GLOBAL(GCC_nested_trampoline):
2883 ENDFUNC(GLOBAL(GCC_nested_trampoline))
2884 #endif /* L_nested_trampoline */
2885 #endif /* __SH5__ */
2887 #ifdef L_push_pop_shmedia_regs
2888 .section .text..SHmedia32,"ax"
2891 #ifndef __SH4_NOFPU__
2892 .global GLOBAL(GCC_push_shmedia_regs)
2893 FUNC(GLOBAL(GCC_push_shmedia_regs))
2894 GLOBAL(GCC_push_shmedia_regs):
2895 addi.l r15, -14*8, r15
2896 fst.d r15, 13*8, dr62
2897 fst.d r15, 12*8, dr60
2898 fst.d r15, 11*8, dr58
2899 fst.d r15, 10*8, dr56
2900 fst.d r15, 9*8, dr54
2901 fst.d r15, 8*8, dr52
2902 fst.d r15, 7*8, dr50
2903 fst.d r15, 6*8, dr48
2904 fst.d r15, 5*8, dr46
2905 fst.d r15, 4*8, dr44
2906 fst.d r15, 3*8, dr42
2907 fst.d r15, 2*8, dr40
2908 fst.d r15, 1*8, dr38
2909 fst.d r15, 0*8, dr36
2910 #else /* ! __SH4_NOFPU__ */
2911 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2912 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2913 GLOBAL(GCC_push_shmedia_regs_nofpu):
2914 #endif /* ! __SH4_NOFPU__ */
2916 addi.l r15, -27*8, r15
2948 #ifndef __SH4_NOFPU__
2949 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2951 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2953 #ifndef __SH4_NOFPU__
2954 .global GLOBAL(GCC_pop_shmedia_regs)
2955 FUNC(GLOBAL(GCC_pop_shmedia_regs))
2956 GLOBAL(GCC_pop_shmedia_regs):
2959 fld.d r15, 40*8, dr62
2960 fld.d r15, 39*8, dr60
2961 fld.d r15, 38*8, dr58
2962 fld.d r15, 37*8, dr56
2963 fld.d r15, 36*8, dr54
2964 fld.d r15, 35*8, dr52
2965 fld.d r15, 34*8, dr50
2966 fld.d r15, 33*8, dr48
2967 fld.d r15, 32*8, dr46
2968 fld.d r15, 31*8, dr44
2969 fld.d r15, 30*8, dr42
2970 fld.d r15, 29*8, dr40
2971 fld.d r15, 28*8, dr38
2972 fld.d r15, 27*8, dr36
2974 #else /* ! __SH4_NOFPU__ */
2975 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2976 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2977 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2978 #endif /* ! __SH4_NOFPU__ */
3015 #ifndef __SH4_NOFPU__
3016 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
3018 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3020 #endif /* __SH5__ == 32 */
3021 #endif /* L_push_pop_shmedia_regs */
3025 #if defined(__pic__) && defined(__SHMEDIA__)
3026 .global GLOBAL(sdivsi3)
3027 FUNC(GLOBAL(sdivsi3))
3029 .section .text..SHmedia32,"ax"
3034 /* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3035 in a text section does not work (at least for shared libraries):
3036 the linker sets the LSB of the address as if this was SHmedia code. */
3037 #define TEXT_DATA_BUG
3041 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3043 .global GLOBAL(sdivsi3)
3045 #ifdef TEXT_DATA_BUG
3046 ptb datalabel Local_div_table,tr0
3048 ptb GLOBAL(div_table_internal),tr0
3051 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
3052 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
3055 ldx.ub r20, r21, r19 // u0.8
3056 shari r25, 32, r25 // normalize to s2.30
3058 muls.l r25, r19, r19 // s2.38
3059 ldx.w r20, r21, r21 // s2.14
3061 shari r19, 24, r19 // truncate to s2.14
3062 sub r21, r19, r19 // some 11 bit inverse in s1.14
3063 muls.l r19, r19, r21 // u0.28
3066 muls.l r25, r21, r18 // s2.58
3067 shlli r19, 45, r19 // multiply by two and convert to s2.58
3070 shari r18, 28, r18 // some 22 bit inverse in s1.30
3071 muls.l r18, r25, r0 // s2.60
3072 muls.l r18, r4, r25 // s32.30
3074 shari r0, 16, r19 // s-16.44
3075 muls.l r19, r18, r19 // s-16.74
3077 shari r4, 14, r18 // s19.-14
3078 shari r19, 30, r19 // s-16.44
3079 muls.l r19, r18, r19 // s15.30
3080 xor r21, r0, r21 // You could also use the constant 1 << 27.
3086 ENDFUNC(GLOBAL(sdivsi3))
3087 /* This table has been generated by divtab.c .
3088 Defects for bias -330:
3089 Max defect: 6.081536e-07 at -1.000000e+00
3090 Min defect: 2.849516e-08 at 1.030651e+00
3091 Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3092 Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3093 Defect at 1: 1.238659e-07
3094 Defect at -2: 1.061708e-07 */
3095 #else /* ! __pic__ || ! __SHMEDIA__ */
3097 #endif /* __pic__ */
3098 #if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
3100 .type Local_div_table,@object
3101 .size Local_div_table,128
3102 /* negative division constants */
3119 /* negative division factors */
3139 /* positive division factors */
3156 /* positive division constants */
3174 #endif /* TEXT_DATA_BUG */
3176 .type GLOBAL(div_table),@object
3177 .size GLOBAL(div_table),128
3178 /* negative division constants */
3195 /* negative division factors */
3213 .global GLOBAL(div_table)
3215 HIDDEN_ALIAS(div_table_internal,div_table)
3217 /* positive division factors */
3234 /* positive division constants */
3252 #elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3253 /* This code used shld, thus is not suitable for SH1 / SH2. */
3255 /* Signed / unsigned division without use of FPU, optimized for SH4.
3256 Uses a lookup table for divisors in the range -128 .. +128, and
3257 div1 with case distinction for larger divisors in three more ranges.
3258 The code is lumped together with the table to allow the use of mova. */
3259 #ifdef __LITTLE_ENDIAN__
3270 .global GLOBAL(udivsi3_i4i)
3271 FUNC(GLOBAL(udivsi3_i4i))
3272 GLOBAL(udivsi3_i4i):
3273 mov.w LOCAL(c128_w), r1
3279 bf LOCAL(udiv_le128)
3281 bf LOCAL(udiv_ge64k)
3294 mova LOCAL(div_table_ix),r0
3295 bra LOCAL(div_le128_2)
3299 mova LOCAL(div_table_ix),r0
3303 mova LOCAL(div_table_inv),r0
3307 mova LOCAL(div_table_clz),r0
3310 bt/s LOCAL(div_by_1)
3321 LOCAL(div_by_1_neg):
3332 bra LOCAL(div_ge64k_2)
3344 mov.l LOCAL(zero_l),r1
3350 mov.w LOCAL(m256_w),r1
3352 mov.b r0,@(L_LSWMSB,r15)
3355 bra LOCAL(div_ge64k_end)
3377 rotcl r0; div1 r5,r1
3386 ENDFUNC(GLOBAL(udivsi3_i4i))
3388 .global GLOBAL(sdivsi3_i4i)
3389 FUNC(GLOBAL(sdivsi3_i4i))
3390 /* This is link-compatible with a GLOBAL(sdivsi3) call,
3391 but we effectively clobber only r1. */
3392 GLOBAL(sdivsi3_i4i):
3395 mov.w LOCAL(c128_w), r1
3396 bt/s LOCAL(pos_divisor)
3400 bt/s LOCAL(neg_result)
3409 bf/s LOCAL(div_ge64k)
3417 mov.l LOCAL(zero_l),r1
3424 mov.b r0,@(L_MSWLSB,r15)
3430 mov.b r0,@(L_LSWMSB,r15)
3431 LOCAL(div_ge64k_end):
3435 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3443 LOCAL(div_le128_neg):
3445 mova LOCAL(div_table_ix),r0
3447 mova LOCAL(div_table_inv),r0
3448 bt/s LOCAL(div_by_1_neg)
3450 mova LOCAL(div_table_clz),r0
3465 bt/s LOCAL(pos_result)
3470 bf LOCAL(div_le128_neg)
3474 bf/s LOCAL(div_ge64k_neg)
3477 mov.l LOCAL(zero_l),r1
3484 mov.b r0,@(L_MSWLSB,r15)
3490 mov.b r0,@(L_LSWMSB,r15)
3491 LOCAL(div_ge64k_neg_end):
3495 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3499 LOCAL(div_r8_neg_end):
3505 LOCAL(div_ge64k_neg):
3506 bt/s LOCAL(div_r8_neg)
3509 mov.l LOCAL(zero_l),r1
3515 mov.w LOCAL(m256_w),r1
3517 mov.b r0,@(L_LSWMSB,r15)
3520 bra LOCAL(div_ge64k_neg_end)
3533 rotcl r1; div1 r5,r0
3537 bra LOCAL(div_r8_neg_end)
3542 /* This table has been generated by divtab-sh4.c. */
3544 LOCAL(div_table_clz):
3673 /* Lookup table translating positive divisor to index into table of
3674 normalized inverse. N.B. the '0' entry is also the last entry of the
3675 previous table, and causes an unaligned access for division by zero. */
3676 LOCAL(div_table_ix):
3806 /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
3841 LOCAL(div_table_inv):
3874 /* maximum error: 0.987342 scaled: 0.921875*/
3876 ENDFUNC(GLOBAL(sdivsi3_i4i))
3877 #endif /* SH3 / SH4 */
3879 #endif /* L_div_table */