1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003
2 Free Software Foundation, Inc.
4 This file is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 2, or (at your option) any
9 In addition to the permissions in the GNU General Public License, the
10 Free Software Foundation gives you unlimited permission to link the
11 compiled version of this file into combinations with other programs,
12 and to distribute those combinations without any restriction coming
13 from the use of this file. (The General Public License restrictions
14 do apply in other respects; for example, they cover modification of
15 the file, and distribution when not linked into a combine
18 This file is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; see the file COPYING. If not, write to
25 the Free Software Foundation, 59 Temple Place - Suite 330,
26 Boston, MA 02111-1307, USA. */
28 !! libgcc routines for the Renesas / SuperH SH CPUs.
29 !! Contributed by Steve Chamberlain.
32 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
33 !! recoded in assembly by Toshiyasu Morita
36 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
37 ELF local label prefixes by J"orn Rennecke
41 #define LOCAL(X) .L_##X
42 #define FUNC(X) .type X,@function
43 #define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X
44 #define ENDFUNC(X) ENDFUNC0(X)
46 #define LOCAL(X) L_##X
51 #define CONCAT(A,B) A##B
52 #define GLOBAL0(U,X) CONCAT(U,__##X)
53 #define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X)
55 #if defined __SH5__ && ! defined __SH4_NOFPU__ && ! defined (__LITTLE_ENDIAN__)
61 .global GLOBAL(ashiftrt_r4_0)
62 .global GLOBAL(ashiftrt_r4_1)
63 .global GLOBAL(ashiftrt_r4_2)
64 .global GLOBAL(ashiftrt_r4_3)
65 .global GLOBAL(ashiftrt_r4_4)
66 .global GLOBAL(ashiftrt_r4_5)
67 .global GLOBAL(ashiftrt_r4_6)
68 .global GLOBAL(ashiftrt_r4_7)
69 .global GLOBAL(ashiftrt_r4_8)
70 .global GLOBAL(ashiftrt_r4_9)
71 .global GLOBAL(ashiftrt_r4_10)
72 .global GLOBAL(ashiftrt_r4_11)
73 .global GLOBAL(ashiftrt_r4_12)
74 .global GLOBAL(ashiftrt_r4_13)
75 .global GLOBAL(ashiftrt_r4_14)
76 .global GLOBAL(ashiftrt_r4_15)
77 .global GLOBAL(ashiftrt_r4_16)
78 .global GLOBAL(ashiftrt_r4_17)
79 .global GLOBAL(ashiftrt_r4_18)
80 .global GLOBAL(ashiftrt_r4_19)
81 .global GLOBAL(ashiftrt_r4_20)
82 .global GLOBAL(ashiftrt_r4_21)
83 .global GLOBAL(ashiftrt_r4_22)
84 .global GLOBAL(ashiftrt_r4_23)
85 .global GLOBAL(ashiftrt_r4_24)
86 .global GLOBAL(ashiftrt_r4_25)
87 .global GLOBAL(ashiftrt_r4_26)
88 .global GLOBAL(ashiftrt_r4_27)
89 .global GLOBAL(ashiftrt_r4_28)
90 .global GLOBAL(ashiftrt_r4_29)
91 .global GLOBAL(ashiftrt_r4_30)
92 .global GLOBAL(ashiftrt_r4_31)
93 .global GLOBAL(ashiftrt_r4_32)
95 FUNC(GLOBAL(ashiftrt_r4_0))
96 FUNC(GLOBAL(ashiftrt_r4_1))
97 FUNC(GLOBAL(ashiftrt_r4_2))
98 FUNC(GLOBAL(ashiftrt_r4_3))
99 FUNC(GLOBAL(ashiftrt_r4_4))
100 FUNC(GLOBAL(ashiftrt_r4_5))
101 FUNC(GLOBAL(ashiftrt_r4_6))
102 FUNC(GLOBAL(ashiftrt_r4_7))
103 FUNC(GLOBAL(ashiftrt_r4_8))
104 FUNC(GLOBAL(ashiftrt_r4_9))
105 FUNC(GLOBAL(ashiftrt_r4_10))
106 FUNC(GLOBAL(ashiftrt_r4_11))
107 FUNC(GLOBAL(ashiftrt_r4_12))
108 FUNC(GLOBAL(ashiftrt_r4_13))
109 FUNC(GLOBAL(ashiftrt_r4_14))
110 FUNC(GLOBAL(ashiftrt_r4_15))
111 FUNC(GLOBAL(ashiftrt_r4_16))
112 FUNC(GLOBAL(ashiftrt_r4_17))
113 FUNC(GLOBAL(ashiftrt_r4_18))
114 FUNC(GLOBAL(ashiftrt_r4_19))
115 FUNC(GLOBAL(ashiftrt_r4_20))
116 FUNC(GLOBAL(ashiftrt_r4_21))
117 FUNC(GLOBAL(ashiftrt_r4_22))
118 FUNC(GLOBAL(ashiftrt_r4_23))
119 FUNC(GLOBAL(ashiftrt_r4_24))
120 FUNC(GLOBAL(ashiftrt_r4_25))
121 FUNC(GLOBAL(ashiftrt_r4_26))
122 FUNC(GLOBAL(ashiftrt_r4_27))
123 FUNC(GLOBAL(ashiftrt_r4_28))
124 FUNC(GLOBAL(ashiftrt_r4_29))
125 FUNC(GLOBAL(ashiftrt_r4_30))
126 FUNC(GLOBAL(ashiftrt_r4_31))
127 FUNC(GLOBAL(ashiftrt_r4_32))
130 GLOBAL(ashiftrt_r4_32):
131 GLOBAL(ashiftrt_r4_31):
136 GLOBAL(ashiftrt_r4_30):
138 GLOBAL(ashiftrt_r4_29):
140 GLOBAL(ashiftrt_r4_28):
142 GLOBAL(ashiftrt_r4_27):
144 GLOBAL(ashiftrt_r4_26):
146 GLOBAL(ashiftrt_r4_25):
148 GLOBAL(ashiftrt_r4_24):
154 GLOBAL(ashiftrt_r4_23):
156 GLOBAL(ashiftrt_r4_22):
158 GLOBAL(ashiftrt_r4_21):
160 GLOBAL(ashiftrt_r4_20):
162 GLOBAL(ashiftrt_r4_19):
164 GLOBAL(ashiftrt_r4_18):
166 GLOBAL(ashiftrt_r4_17):
168 GLOBAL(ashiftrt_r4_16):
173 GLOBAL(ashiftrt_r4_15):
175 GLOBAL(ashiftrt_r4_14):
177 GLOBAL(ashiftrt_r4_13):
179 GLOBAL(ashiftrt_r4_12):
181 GLOBAL(ashiftrt_r4_11):
183 GLOBAL(ashiftrt_r4_10):
185 GLOBAL(ashiftrt_r4_9):
187 GLOBAL(ashiftrt_r4_8):
189 GLOBAL(ashiftrt_r4_7):
191 GLOBAL(ashiftrt_r4_6):
193 GLOBAL(ashiftrt_r4_5):
195 GLOBAL(ashiftrt_r4_4):
197 GLOBAL(ashiftrt_r4_3):
199 GLOBAL(ashiftrt_r4_2):
201 GLOBAL(ashiftrt_r4_1):
205 GLOBAL(ashiftrt_r4_0):
209 ENDFUNC(GLOBAL(ashiftrt_r4_0))
210 ENDFUNC(GLOBAL(ashiftrt_r4_1))
211 ENDFUNC(GLOBAL(ashiftrt_r4_2))
212 ENDFUNC(GLOBAL(ashiftrt_r4_3))
213 ENDFUNC(GLOBAL(ashiftrt_r4_4))
214 ENDFUNC(GLOBAL(ashiftrt_r4_5))
215 ENDFUNC(GLOBAL(ashiftrt_r4_6))
216 ENDFUNC(GLOBAL(ashiftrt_r4_7))
217 ENDFUNC(GLOBAL(ashiftrt_r4_8))
218 ENDFUNC(GLOBAL(ashiftrt_r4_9))
219 ENDFUNC(GLOBAL(ashiftrt_r4_10))
220 ENDFUNC(GLOBAL(ashiftrt_r4_11))
221 ENDFUNC(GLOBAL(ashiftrt_r4_12))
222 ENDFUNC(GLOBAL(ashiftrt_r4_13))
223 ENDFUNC(GLOBAL(ashiftrt_r4_14))
224 ENDFUNC(GLOBAL(ashiftrt_r4_15))
225 ENDFUNC(GLOBAL(ashiftrt_r4_16))
226 ENDFUNC(GLOBAL(ashiftrt_r4_17))
227 ENDFUNC(GLOBAL(ashiftrt_r4_18))
228 ENDFUNC(GLOBAL(ashiftrt_r4_19))
229 ENDFUNC(GLOBAL(ashiftrt_r4_20))
230 ENDFUNC(GLOBAL(ashiftrt_r4_21))
231 ENDFUNC(GLOBAL(ashiftrt_r4_22))
232 ENDFUNC(GLOBAL(ashiftrt_r4_23))
233 ENDFUNC(GLOBAL(ashiftrt_r4_24))
234 ENDFUNC(GLOBAL(ashiftrt_r4_25))
235 ENDFUNC(GLOBAL(ashiftrt_r4_26))
236 ENDFUNC(GLOBAL(ashiftrt_r4_27))
237 ENDFUNC(GLOBAL(ashiftrt_r4_28))
238 ENDFUNC(GLOBAL(ashiftrt_r4_29))
239 ENDFUNC(GLOBAL(ashiftrt_r4_30))
240 ENDFUNC(GLOBAL(ashiftrt_r4_31))
241 ENDFUNC(GLOBAL(ashiftrt_r4_32))
263 .global GLOBAL(ashrsi3)
264 FUNC(GLOBAL(ashrsi3))
269 mova LOCAL(ashrsi3_table),r0
280 LOCAL(ashrsi3_table):
281 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
282 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
283 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
284 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
285 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
286 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
287 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
288 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
300 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
301 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
302 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
303 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
304 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
305 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
306 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
307 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
308 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
309 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
310 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
311 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
312 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
392 ENDFUNC(GLOBAL(ashrsi3))
413 .global GLOBAL(ashlsi3)
414 FUNC(GLOBAL(ashlsi3))
419 mova LOCAL(ashlsi3_table),r0
430 LOCAL(ashlsi3_table):
431 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
432 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
433 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
434 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
435 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
436 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
437 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
438 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
439 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
440 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
441 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
442 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
443 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
444 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
445 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
446 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
447 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
448 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
449 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
450 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
451 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
452 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
453 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
454 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
455 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
456 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
457 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
458 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
459 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
460 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
461 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
462 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
551 ENDFUNC(GLOBAL(ashlsi3))
572 .global GLOBAL(lshrsi3)
573 FUNC(GLOBAL(lshrsi3))
578 mova LOCAL(lshrsi3_table),r0
589 LOCAL(lshrsi3_table):
590 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
591 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
592 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
593 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
594 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
595 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
596 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
597 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
598 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
599 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
600 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
601 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
602 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
603 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
604 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
605 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
606 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
607 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
608 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
609 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
610 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
611 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
612 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
613 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
614 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
615 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
616 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
617 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
618 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
619 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
620 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
621 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
710 ENDFUNC(GLOBAL(lshrsi3))
715 ! done all the large groups, do the remainder
720 mova GLOBAL(movmemSI0),r0
726 .global GLOBAL(movmemSI64)
727 FUNC(GLOBAL(movmemSI64))
731 .global GLOBAL(movmemSI60)
732 FUNC(GLOBAL(movmemSI60))
736 .global GLOBAL(movmemSI56)
737 FUNC(GLOBAL(movmemSI56))
741 .global GLOBAL(movmemSI52)
742 FUNC(GLOBAL(movmemSI52))
746 .global GLOBAL(movmemSI48)
747 FUNC(GLOBAL(movmemSI48))
751 .global GLOBAL(movmemSI44)
752 FUNC(GLOBAL(movmemSI44))
756 .global GLOBAL(movmemSI40)
757 FUNC(GLOBAL(movmemSI40))
761 .global GLOBAL(movmemSI36)
762 FUNC(GLOBAL(movmemSI36))
766 .global GLOBAL(movmemSI32)
767 FUNC(GLOBAL(movmemSI32))
771 .global GLOBAL(movmemSI28)
772 FUNC(GLOBAL(movmemSI28))
776 .global GLOBAL(movmemSI24)
777 FUNC(GLOBAL(movmemSI24))
781 .global GLOBAL(movmemSI20)
782 FUNC(GLOBAL(movmemSI20))
786 .global GLOBAL(movmemSI16)
787 FUNC(GLOBAL(movmemSI16))
791 .global GLOBAL(movmemSI12)
792 FUNC(GLOBAL(movmemSI12))
796 .global GLOBAL(movmemSI8)
797 FUNC(GLOBAL(movmemSI8))
801 .global GLOBAL(movmemSI4)
802 FUNC(GLOBAL(movmemSI4))
806 .global GLOBAL(movmemSI0)
807 FUNC(GLOBAL(movmemSI0))
812 ENDFUNC(GLOBAL(movmemSI64))
813 ENDFUNC(GLOBAL(movmemSI60))
814 ENDFUNC(GLOBAL(movmemSI56))
815 ENDFUNC(GLOBAL(movmemSI52))
816 ENDFUNC(GLOBAL(movmemSI48))
817 ENDFUNC(GLOBAL(movmemSI44))
818 ENDFUNC(GLOBAL(movmemSI40))
819 ENDFUNC(GLOBAL(movmemSI36))
820 ENDFUNC(GLOBAL(movmemSI32))
821 ENDFUNC(GLOBAL(movmemSI28))
822 ENDFUNC(GLOBAL(movmemSI24))
823 ENDFUNC(GLOBAL(movmemSI20))
824 ENDFUNC(GLOBAL(movmemSI16))
825 ENDFUNC(GLOBAL(movmemSI12))
826 ENDFUNC(GLOBAL(movmemSI8))
827 ENDFUNC(GLOBAL(movmemSI4))
828 ENDFUNC(GLOBAL(movmemSI0))
832 .global GLOBAL(movmem)
896 .global GLOBAL(movmem_i4_even)
897 .global GLOBAL(movmem_i4_odd)
898 .global GLOBAL(movmemSI12_i4)
900 FUNC(GLOBAL(movmem_i4_even))
901 FUNC(GLOBAL(movmem_i4_odd))
902 FUNC(GLOBAL(movmemSI12_i4))
912 GLOBAL(movmem_i4_even):
914 bra L_movmem_start_even
917 GLOBAL(movmem_i4_odd):
929 bt/s L_movmem_2mod4_end
943 ENDFUNC(GLOBAL(movmem_i4_even))
944 ENDFUNC(GLOBAL(movmem_i4_odd))
947 GLOBAL(movmemSI12_i4):
956 ENDFUNC(GLOBAL(movmemSI12_i4))
962 .global GLOBAL(mulsi3)
967 ! r0 = aabb*ccdd via partial products
969 ! if aa == 0 and cc = 0
973 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
977 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
978 mov r5,r3 ! r3 = ccdd
979 swap.w r4,r2 ! r2 = bbaa
980 xtrct r2,r3 ! r3 = aacc
981 tst r3,r3 ! msws zero ?
983 rts ! yes - then we have the answer
986 hiset: sts macl,r0 ! r0 = bb*dd
987 mulu.w r2,r5 ! brewing macl = aa*dd
989 mulu.w r3,r4 ! brewing macl = cc*bb
998 #endif /* ! __SH5__ */
1001 !! 4 byte integer Divide code for the Renesas SH
1003 !! args in r4 and r5, result in fpul, clobber dr0, dr2
1005 .global GLOBAL(sdivsi3_i4)
1006 FUNC(GLOBAL(sdivsi3_i4))
1016 ENDFUNC(GLOBAL(sdivsi3_i4))
1017 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
1018 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
1020 #if ! __SH5__ || __SH5__ == 32
1024 .global GLOBAL(sdivsi3_i4)
1025 FUNC(GLOBAL(sdivsi3_i4))
1040 ENDFUNC(GLOBAL(sdivsi3_i4))
1041 #endif /* ! __SH5__ || __SH5__ == 32 */
1042 #endif /* ! __SH4__ */
1046 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1048 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1050 !! Steve Chamberlain
1055 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1057 .global GLOBAL(sdivsi3)
1058 FUNC(GLOBAL(sdivsi3))
1061 .section .text..SHmedia32,"ax"
1067 /* The assembly code that follows is a hand-optimized version of the C
1068 code that follows. Note that the registers that are modified are
1069 exactly those listed as clobbered in the patterns divsi3_i1 and
1072 int __sdivsi3 (i, j)
1075 register unsigned long long r18 asm ("r18");
1076 register unsigned long long r19 asm ("r19");
1077 register unsigned long long r0 asm ("r0") = 0;
1078 register unsigned long long r1 asm ("r1") = 1;
1079 register int r2 asm ("r2") = i >> 31;
1080 register int r3 asm ("r3") = j >> 31;
1092 r0 |= r1, r18 -= r19;
1093 while (r19 >>= 1, r1 >>= 1);
1095 return r2 * (int)r0;
1099 pt/l LOCAL(sdivsi3_dontadd), tr2
1100 pt/l LOCAL(sdivsi3_loop), tr1
1113 LOCAL(sdivsi3_loop):
1117 LOCAL(sdivsi3_dontadd):
1126 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1129 // can create absolute value without extra latency,
1130 // but dependent on proper sign extension of inputs:
1133 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1136 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1137 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1145 // If r4 was to be used in-place instead of r21, could use this sequence
1146 // to compute absolute:
1147 // sub r63,r4,r19 // compute absolute value of r4
1148 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1149 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1161 mmacnfx.wl r25,r2,r1
1187 #elif defined __SHMEDIA__
1188 /* m5compact-nofpu */
1189 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1191 .section .text..SHmedia32,"ax"
1194 pt/l LOCAL(sdivsi3_dontsub), tr0
1195 pt/l LOCAL(sdivsi3_loop), tr1
1207 LOCAL(sdivsi3_loop):
1211 LOCAL(sdivsi3_dontsub):
1217 #else /* ! __SHMEDIA__ */
1302 ENDFUNC(GLOBAL(sdivsi3))
1303 #endif /* ! __SHMEDIA__ */
1304 #endif /* ! __SH4__ */
1309 !! 4 byte integer Divide code for the Renesas SH
1311 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1314 .global GLOBAL(udivsi3_i4)
1315 FUNC(GLOBAL(udivsi3_i4))
1327 #ifdef __LITTLE_ENDIAN__
1351 .align 3 ! make double below 8 byte aligned.
1356 ENDFUNC(GLOBAL(udivsi3_i4))
1357 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1358 #if ! __SH5__ || __SH5__ == 32
1359 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1361 .global GLOBAL(udivsi3_i4)
1362 FUNC(GLOBAL(udivsi3_i4))
1376 ENDFUNC(GLOBAL(udivsi3_i4))
1377 #endif /* ! __SH5__ || __SH5__ == 32 */
1378 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1379 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1381 .global GLOBAL(udivsi3_i4)
1395 #ifdef __LITTLE_ENDIAN__
1415 .align 3 ! make double below 8 byte aligned.
1430 ENDFUNC(GLOBAL(udivsi3_i4))
1431 #endif /* ! __SH4__ */
1435 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1437 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1439 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1440 .global GLOBAL(udivsi3)
1441 FUNC(GLOBAL(udivsi3))
1445 .section .text..SHmedia32,"ax"
1451 /* The assembly code that follows is a hand-optimized version of the C
1452 code that follows. Note that the registers that are modified are
1453 exactly those listed as clobbered in the patterns udivsi3_i1 and
1460 register unsigned long long r0 asm ("r0") = 0;
1461 register unsigned long long r18 asm ("r18") = 1;
1462 register unsigned long long r4 asm ("r4") = i;
1463 register unsigned long long r19 asm ("r19") = j;
1469 r0 |= r18, r4 -= r19;
1470 while (r19 >>= 1, r18 >>= 1);
1476 pt/l LOCAL(udivsi3_dontadd), tr2
1477 pt/l LOCAL(udivsi3_loop), tr1
1485 LOCAL(udivsi3_loop):
1489 LOCAL(udivsi3_dontadd):
1497 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1503 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1505 mmulfx.w r21,r21,r19
1506 mshflo.w r21,r63,r21
1508 mmulfx.w r25,r19,r19
1512 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1513 before the msub.w, but we need a different value for
1514 r19 to keep errors under control. */
1516 mmulfx.w r19,r19,r19
1520 mmacnfx.wl r25,r19,r21
1545 #elif defined (__SHMEDIA__)
1546 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1547 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1548 So use a short shmedia loop. */
1549 // clobbered: r20,r21,r25,tr0,tr1,tr2
1551 .section .text..SHmedia32,"ax"
1554 pt/l LOCAL(udivsi3_dontsub), tr0
1555 pt/l LOCAL(udivsi3_loop), tr1
1560 LOCAL(udivsi3_loop):
1564 LOCAL(udivsi3_dontsub):
1569 #else /* ! defined (__SHMEDIA__) */
1573 div1 r5,r4; div1 r5,r4; div1 r5,r4
1574 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1577 div1 r5,r4; rotcl r0
1578 div1 r5,r4; rotcl r0
1579 div1 r5,r4; rotcl r0
1587 bf LOCAL(large_divisor)
1589 bf/s LOCAL(large_divisor)
1611 LOCAL(large_divisor):
1630 ENDFUNC(GLOBAL(udivsi3))
1631 #endif /* ! __SHMEDIA__ */
1632 #endif /* __SH4__ */
1633 #endif /* L_udivsi3 */
1638 .section .text..SHmedia32,"ax"
1640 .global GLOBAL(udivdi3)
1641 FUNC(GLOBAL(udivdi3))
1647 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1651 sub r63,r22,r20 // r63 == 64 % 64
1653 pta LOCAL(large_divisor),tr0
1659 bgt/u r9,r63,tr0 // large_divisor
1668 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1669 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1670 the case may be, %0000000000000000 000.11111111111, still */
1671 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1676 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1678 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1687 mcmpgt.l r21,r63,r21 // See Note 1
1689 mshfhi.l r63,r21,r21
1693 /* small divisor: need a third divide step */
1703 /* could test r3 here to check for divide by zero. */
1706 LOCAL(large_divisor):
1715 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1716 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1717 the case may be, %0000000000000000 000.11111111111, still */
1718 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1723 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1725 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1729 pta LOCAL(no_lo_adj),tr0
1736 bgtu/u r7,r25,tr0 // no_lo_adj
1742 /* large_divisor: only needs a few adjustments. */
1749 ENDFUNC(GLOBAL(udivdi3))
1750 /* Note 1: To shift the result of the second divide stage so that the result
1751 always fits into 32 bits, yet we still reduce the rest sufficiently
1752 would require a lot of instructions to do the shifts just right. Using
1753 the full 64 bit shift result to multiply with the divisor would require
1754 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1755 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1756 know that the rest after taking this partial result into account will
1757 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1758 upper 32 bits of the partial result are nonzero. */
1759 #endif /* __SHMEDIA__ */
1760 #endif /* L_udivdi3 */
1765 .section .text..SHmedia32,"ax"
1767 .global GLOBAL(divdi3)
1768 FUNC(GLOBAL(divdi3))
1770 pta GLOBAL(udivdi3),tr0
1782 ENDFUNC(GLOBAL(divdi3))
1783 #endif /* __SHMEDIA__ */
1784 #endif /* L_divdi3 */
1789 .section .text..SHmedia32,"ax"
1791 .global GLOBAL(umoddi3)
1792 FUNC(GLOBAL(umoddi3))
1798 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1802 sub r63,r22,r20 // r63 == 64 % 64
1804 pta LOCAL(large_divisor),tr0
1810 bgt/u r9,r63,tr0 // large_divisor
1819 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1820 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1821 the case may be, %0000000000000000 000.11111111111, still */
1822 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1827 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1829 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1834 /* bubble */ /* could test r3 here to check for divide by zero. */
1837 mcmpgt.l r21,r63,r21 // See Note 1
1839 mshfhi.l r63,r21,r21
1843 /* small divisor: need a third divide step */
1846 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1856 LOCAL(large_divisor):
1865 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1866 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1867 the case may be, %0000000000000000 000.11111111111, still */
1868 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1873 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1875 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1879 pta LOCAL(no_lo_adj),tr0
1886 bgtu/u r7,r25,tr0 // no_lo_adj
1892 /* large_divisor: only needs a few adjustments. */
1901 ENDFUNC(GLOBAL(umoddi3))
1902 /* Note 1: To shift the result of the second divide stage so that the result
1903 always fits into 32 bits, yet we still reduce the rest sufficiently
1904 would require a lot of instructions to do the shifts just right. Using
1905 the full 64 bit shift result to multiply with the divisor would require
1906 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1907 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1908 know that the rest after taking this partial result into account will
1909 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1910 upper 32 bits of the partial result are nonzero. */
1911 #endif /* __SHMEDIA__ */
1912 #endif /* L_umoddi3 */
1917 .section .text..SHmedia32,"ax"
1919 .global GLOBAL(moddi3)
1920 FUNC(GLOBAL(moddi3))
1922 pta GLOBAL(umoddi3),tr0
1934 ENDFUNC(GLOBAL(moddi3))
1935 #endif /* __SHMEDIA__ */
1936 #endif /* L_moddi3 */
1939 #if defined (__SH2E__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1943 .global GLOBAL(set_fpscr)
1944 FUNC(GLOBAL(set_fpscr))
1949 mova LOCAL(set_fpscr_L0),r0
1950 mov.l LOCAL(set_fpscr_L0),r12
1952 mov.l LOCAL(set_fpscr_L1),r0
1956 mov.l LOCAL(set_fpscr_L1),r1
1963 #if defined(__SH4__)
1966 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1975 #if defined(__SH4__)
1979 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1986 LOCAL(set_fpscr_L0):
1987 .long _GLOBAL_OFFSET_TABLE_
1988 LOCAL(set_fpscr_L1):
1989 .long GLOBAL(fpscr_values@GOT)
1991 LOCAL(set_fpscr_L1):
1992 .long GLOBAL(fpscr_values)
1995 ENDFUNC(GLOBAL(set_fpscr))
1996 #ifndef NO_FPSCR_VALUES
1998 .comm GLOBAL(fpscr_values),8,4
2000 .comm GLOBAL(fpscr_values),8
2002 #endif /* NO_FPSCR_VALUES */
2003 #endif /* SH2E / SH3E / SH4 */
2004 #endif /* L_set_fpscr */
2005 #ifdef L_ic_invalidate
2008 .section .text..SHmedia32,"ax"
2010 .global GLOBAL(init_trampoline)
2011 FUNC(GLOBAL(init_trampoline))
2012 GLOBAL(init_trampoline):
2014 #ifdef __LITTLE_ENDIAN__
2020 movi 0xffffffffffffd002,r20
2027 .global GLOBAL(ic_invalidate)
2028 FUNC(GLOBAL(ic_invalidate))
2029 GLOBAL(ic_invalidate):
2037 ENDFUNC(GLOBAL(ic_invalidate))
2038 ENDFUNC(GLOBAL(init_trampoline))
2039 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
2040 .global GLOBAL(ic_invalidate)
2041 FUNC(GLOBAL(ic_invalidate))
2042 GLOBAL(ic_invalidate):
2046 /* Compute how many cache lines 0f is away from r4. */
2049 /* Prepare to branch to 0f plus the cache-line offset. */
2056 /* This must be aligned to the beginning of a cache line. */
2058 .rept 256 /* There are 256 cache lines of 32 bytes. */
2065 ENDFUNC(GLOBAL(ic_invalidate))
2067 #endif /* L_ic_invalidate */
2069 #if defined (__SH5__) && __SH5__ == 32
2070 #ifdef L_shcompact_call_trampoline
2073 LOCAL(ct_main_table):
2074 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2075 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2076 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2077 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2078 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2079 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2080 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2081 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2082 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2083 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2084 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2085 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2086 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2087 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2088 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2089 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2090 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2091 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2092 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2093 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2094 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2095 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2096 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2097 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2098 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2099 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2100 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2101 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2102 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2103 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2104 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2105 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2106 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2108 .section .text..SHmedia32, "ax"
2111 /* This function loads 64-bit general-purpose registers from the
2112 stack, from a memory address contained in them or from an FP
2113 register, according to a cookie passed in r1. Its execution
2114 time is linear on the number of registers that actually have
2115 to be copied. See sh.h for details on the actual bit pattern.
2117 The function to be called is passed in r0. If a 32-bit return
2118 value is expected, the actual function will be tail-called,
2119 otherwise the return address will be stored in r10 (that the
2120 caller should expect to be clobbered) and the return value
2121 will be expanded into r2/r3 upon return. */
2123 .global GLOBAL(GCC_shcompact_call_trampoline)
2124 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2125 GLOBAL(GCC_shcompact_call_trampoline):
2126 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2127 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2128 pt/l LOCAL(ct_loop), tr1
2130 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2135 LOCAL(ct_main_label):
2138 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2139 /* It must be dr0, so just do it. */
2145 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2146 /* It is either dr0 or dr2. */
2155 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2156 shlri r1, 23 - 3, r34
2157 andi r34, 3 << 3, r33
2158 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2159 LOCAL(ct_r4_fp_base):
2165 LOCAL(ct_r4_fp_copy):
2172 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2173 shlri r1, 20 - 3, r34
2174 andi r34, 3 << 3, r33
2175 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2176 LOCAL(ct_r5_fp_base):
2182 LOCAL(ct_r5_fp_copy):
2191 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2192 /* It must be dr8. */
2198 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2199 shlri r1, 16 - 3, r34
2200 andi r34, 3 << 3, r33
2201 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2202 LOCAL(ct_r6_fp_base):
2208 LOCAL(ct_r6_fp_copy):
2217 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2218 /* It is either dr8 or dr10. */
2226 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2227 shlri r1, 12 - 3, r34
2228 andi r34, 3 << 3, r33
2229 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2230 LOCAL(ct_r7_fp_base):
2235 LOCAL(ct_r7_fp_copy):
2244 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2245 /* It is either dr8 or dr10. */
2247 andi r1, 1 << 8, r32
2253 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2254 shlri r1, 8 - 3, r34
2255 andi r34, 3 << 3, r33
2256 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2257 LOCAL(ct_r8_fp_base):
2262 LOCAL(ct_r8_fp_copy):
2271 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2272 /* It is either dr8 or dr10. */
2274 andi r1, 1 << 4, r32
2280 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2281 shlri r1, 4 - 3, r34
2282 andi r34, 3 << 3, r33
2283 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2284 LOCAL(ct_r9_fp_base):
2289 LOCAL(ct_r9_fp_copy):
2298 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2299 pt/l LOCAL(ct_r2_load), tr2
2308 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2309 pt/l LOCAL(ct_r3_load), tr2
2317 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2318 pt/l LOCAL(ct_r4_load), tr2
2326 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2327 pt/l LOCAL(ct_r5_load), tr2
2335 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2336 pt/l LOCAL(ct_r6_load), tr2
2343 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2344 pt/l LOCAL(ct_r7_load), tr2
2351 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2352 pt/l LOCAL(ct_r8_load), tr2
2359 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2360 pt/l LOCAL(ct_check_tramp), tr2
2384 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2391 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2398 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2405 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2412 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2419 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2425 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2431 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2432 andi r1, 7 << 1, r30
2433 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2435 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2439 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2452 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2455 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2456 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2457 pt/u LOCAL(ct_ret_wide), tr2
2460 LOCAL(ct_call_func): /* Just branch to the function. */
2462 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2463 64-bit return value. */
2467 #if __LITTLE_ENDIAN__
2476 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2477 #endif /* L_shcompact_call_trampoline */
2479 #ifdef L_shcompact_return_trampoline
2480 /* This function does the converse of the code in `ret_wide'
2481 above. It is tail-called by SHcompact functions returning
2482 64-bit non-floating-point values, to pack the 32-bit values in
2483 r2 and r3 into r2. */
2486 .section .text..SHmedia32, "ax"
2488 .global GLOBAL(GCC_shcompact_return_trampoline)
2489 FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2490 GLOBAL(GCC_shcompact_return_trampoline):
2492 #if __LITTLE_ENDIAN__
2502 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2503 #endif /* L_shcompact_return_trampoline */
2505 #ifdef L_shcompact_incoming_args
2508 LOCAL(ia_main_table):
2509 .word 1 /* Invalid, just loop */
2510 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2511 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2512 .word 1 /* Invalid, just loop */
2513 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2514 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2515 .word 1 /* Invalid, just loop */
2516 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2517 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2518 .word 1 /* Invalid, just loop */
2519 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2520 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2521 .word 1 /* Invalid, just loop */
2522 .word 1 /* Invalid, just loop */
2523 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2524 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2525 .word 1 /* Invalid, just loop */
2526 .word 1 /* Invalid, just loop */
2527 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2528 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2529 .word 1 /* Invalid, just loop */
2530 .word 1 /* Invalid, just loop */
2531 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2532 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2533 .word 1 /* Invalid, just loop */
2534 .word 1 /* Invalid, just loop */
2535 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2536 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2537 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2538 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2539 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2540 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2541 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2543 .section .text..SHmedia32, "ax"
2546 /* This function stores 64-bit general-purpose registers back in
2547 the stack, and loads the address in which each register
2548 was stored into itself. The lower 32 bits of r17 hold the address
2549 to begin storing, and the upper 32 bits of r17 hold the cookie.
2550 Its execution time is linear on the
2551 number of registers that actually have to be copied, and it is
2552 optimized for structures larger than 64 bits, as opposed to
2553 individual `long long' arguments. See sh.h for details on the
2554 actual bit pattern. */
2556 .global GLOBAL(GCC_shcompact_incoming_args)
2557 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2558 GLOBAL(GCC_shcompact_incoming_args):
2559 ptabs/l r18, tr0 /* Prepare to return. */
2560 shlri r17, 32, r0 /* Load the cookie. */
2561 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2562 pt/l LOCAL(ia_loop), tr1
2564 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2569 LOCAL(ia_main_label):
2572 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2581 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2590 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2599 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2608 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2617 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2625 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2633 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2637 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2644 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2651 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2658 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2665 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2672 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2678 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2684 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2685 andi r0, 7 << 1, r38
2686 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2688 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2692 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2705 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2707 LOCAL(ia_return): /* Return. */
2709 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2710 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2711 #endif /* L_shcompact_incoming_args */
2714 #ifdef L_nested_trampoline
2716 .section .text..SHmedia32,"ax"
2720 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2721 .global GLOBAL(GCC_nested_trampoline)
2722 FUNC(GLOBAL(GCC_nested_trampoline))
2723 GLOBAL(GCC_nested_trampoline):
2740 ENDFUNC(GLOBAL(GCC_nested_trampoline))
2741 #endif /* L_nested_trampoline */
2742 #endif /* __SH5__ */
2744 #ifdef L_push_pop_shmedia_regs
2745 .section .text..SHmedia32,"ax"
2748 #ifndef __SH4_NOFPU__
2749 .global GLOBAL(GCC_push_shmedia_regs)
2750 FUNC(GLOBAL(GCC_push_shmedia_regs))
2751 GLOBAL(GCC_push_shmedia_regs):
2752 addi.l r15, -14*8, r15
2753 fst.d r15, 13*8, dr62
2754 fst.d r15, 12*8, dr60
2755 fst.d r15, 11*8, dr58
2756 fst.d r15, 10*8, dr56
2757 fst.d r15, 9*8, dr54
2758 fst.d r15, 8*8, dr52
2759 fst.d r15, 7*8, dr50
2760 fst.d r15, 6*8, dr48
2761 fst.d r15, 5*8, dr46
2762 fst.d r15, 4*8, dr44
2763 fst.d r15, 3*8, dr42
2764 fst.d r15, 2*8, dr40
2765 fst.d r15, 1*8, dr38
2766 fst.d r15, 0*8, dr36
2768 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2769 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2770 GLOBAL(GCC_push_shmedia_regs_nofpu):
2772 addi.l r15, -27*8, r15
2805 #ifndef __SH4_NOFPU__
2806 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2808 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2809 #ifndef __SH4_NOFPU__
2810 .global GLOBAL(GCC_pop_shmedia_regs)
2811 FUNC(GLOBAL(GCC_pop_shmedia_regs))
2812 GLOBAL(GCC_pop_shmedia_regs):
2815 fld.d r15, 40*8, dr62
2816 fld.d r15, 39*8, dr60
2817 fld.d r15, 38*8, dr58
2818 fld.d r15, 37*8, dr56
2819 fld.d r15, 36*8, dr54
2820 fld.d r15, 35*8, dr52
2821 fld.d r15, 34*8, dr50
2822 fld.d r15, 33*8, dr48
2823 fld.d r15, 32*8, dr46
2824 fld.d r15, 31*8, dr44
2825 fld.d r15, 30*8, dr42
2826 fld.d r15, 29*8, dr40
2827 fld.d r15, 28*8, dr38
2828 fld.d r15, 27*8, dr36
2831 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2832 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2833 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2870 #ifndef __SH4_NOFPU__
2871 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
2873 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2874 #endif /* __SH5__ == 32 */
2875 #endif /* L_push_pop_shmedia_regs */