1 ;; libgcc routines for the Hitachi H8/300 CPU.
2 ;; Contributed by Steve Chamberlain <sac@cygnus.com>
3 ;; Optimizations by Toshiyasu Morita <toshiyasu.morita@hsa.hitachi.com>
5 /* Copyright (C) 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
7 This file is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 In addition to the permissions in the GNU General Public License, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of this file into combinations with other programs,
15 and to distribute those combinations without any restriction coming
16 from the use of this file. (The General Public License restrictions
17 do apply in other respects; for example, they cover modification of
18 the file, and distribution when not linked into a combine
21 This file is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; see the file COPYING. If not, write to
28 the Free Software Foundation, 59 Temple Place - Suite 330,
29 Boston, MA 02111-1307, USA. */
31 /* Assembler register definitions. */
62 #define MOVP mov.w /* pointers are 16 bits */
77 #if defined (__H8300H__) || defined (__H8300S__)
78 #define MOVP mov.l /* pointers are 32 bits */
130 #endif /* L_cmpsi2 */
156 #endif /* L_ucmpsi2 */
160 ;; HImode divides for the H8/300.
161 ;; We bunch all of this into one object file since there are several
162 ;; "supporting routines".
164 ; general purpose normalize routine
168 ; turns both into +ve numbers, and leaves what the answer sign
176 or A0H,A0H ; is divisor > 0
178 not A0H ; no - then make it +ve
181 xor #0x1,A2L ; and remember that in A2L
182 _lab1: or A1H,A1H ; look at dividend
184 not A1H ; it is -ve, make it positive
187 xor #0x1,A2L; and toggle sign of result
189 ;; Basically the same, except that the sign of the divisor determines
193 or A0H,A0H ; is divisor > 0
195 not A0H ; no - then make it +ve
198 xor #0x1,A2L ; and remember that in A2L
199 _lab7: or A1H,A1H ; look at dividend
201 not A1H ; it is -ve, make it positive
212 negans: or A2L,A2L ; should answer be negative ?
214 not A0H ; yes, so make it so
239 ; D high 8 bits of denom
240 ; d low 8 bits of denom
241 ; N high 8 bits of num
242 ; n low 8 bits of num
243 ; M high 8 bits of mod
244 ; m low 8 bits of mod
245 ; Q high 8 bits of quot
246 ; q low 8 bits of quot
249 ; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
250 ; see how to partition up the expression.
256 sub.w A3,A3 ; Nn Dd xP 00
262 ; we know that D == 0 and N is != 0
263 mov.b A0H,A3L ; Nn Dd xP 0N
267 _lab6: mov.b A0L,A3L ; n
271 mov.b #0x0,A3H ; Qq 0m
274 ; D != 0 - which means the denominator is
275 ; loop around to get the result.
278 mov.b A0H,A3L ; Nn Dd xP 0N
279 mov.b #0x0,A0H ; high byte of answer has to be zero
281 div8: add.b A0L,A0L ; n*=2
282 rotxl A3L ; Make remainder bigger
285 bhs setbit ; set a bit ?
286 add.w A1,A3 ; no : too far , Q+=N
292 setbit: inc A0L ; do insert bit
297 #endif /* __H8300__ */
298 #endif /* L_divhi3 */
302 ;; 4 byte integer divides for the H8/300.
304 ;; We have one routine which does all the work and lots of
305 ;; little ones which prepare the args and massage the sign.
306 ;; We bunch all of this into one object file since there are several
307 ;; "supporting routines".
312 ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
313 ; This function is here to keep branch displacements small.
318 mov.b #0,S2L ; keep the sign in S2
319 mov.b A0H,A0H ; is the numerator -ve
333 mov.b #1,S2L ; the sign will be -ve
335 mov.b A2H,A2H ; is the denominator -ve
345 xor #1,S2L ; toggle result sign
349 ;; Basically the same, except that the sign of the divisor determines
352 mov.b #0,S2L ; keep the sign in S2
353 mov.b A0H,A0H ; is the numerator -ve
367 mov.b #1,S2L ; the sign will be -ve
369 mov.b A2H,A2H ; is the denominator -ve
382 #else /* __H8300H__ */
385 mov.b #0,S2L ; keep the sign in S2
386 mov.l A0P,A0P ; is the numerator -ve
389 neg.l A0P ; negate arg
390 mov.b #1,S2L ; the sign will be -ve
393 mov.l A1P,A1P ; is the denominator -ve
396 neg.l A1P ; negate arg
397 xor.b #1,S2L ; toggle result sign
402 ;; Basically the same, except that the sign of the divisor determines
405 mov.b #0,S2L ; keep the sign in S2
406 mov.l A0P,A0P ; is the numerator -ve
409 neg.l A0P ; negate arg
410 mov.b #1,S2L ; the sign will be -ve
413 mov.l A1P,A1P ; is the denominator -ve
416 neg.l A1P ; negate arg
424 ; denominator in A2/A3
446 mov.b #0,S2L ; keep sign low
455 mov.b #0,S2L ; keep sign low
473 ; examine what the sign should be
492 #else /* __H8300H__ */
500 ; takes A0/A1 numerator (A0P for H8/300H)
501 ; A2/A3 denominator (A1P for H8/300H)
502 ; returns A0/A1 quotient (A0P for H8/300H)
503 ; S0/S1 remainder (S0P for H8/300H)
509 sub.w S0,S0 ; zero play area
543 ; have to do the divide by shift and test
551 mov.b #24,S2H ; only do 24 iterations
554 add.w A1,A1 ; double the answer guess
558 rotxl S1L ; double remainder
562 sub.w A3,S1 ; does it all fit
567 add.w A3,S1 ; no, restore mistake
581 #else /* __H8300H__ */
584 sub.l S0P,S0P ; zero play area
585 mov.w A1E,A1E ; denominator top word 0?
588 ; do it the easy way, see page 107 in manual
607 mov.b #24,S2H ; only do 24 iterations
610 shll.l A0P ; double the answer guess
611 rotxl.l S0P ; double remainder
612 sub.l A1P,S0P ; does it all fit?
615 add.l A1P,S0P ; no, restore mistake
627 #endif /* L_divsi3 */
632 ; The H8/300 only has an 8*8->16 multiply.
633 ; The answer is the same as:
635 ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
636 ; (we can ignore A1.h * A0.h cause that will all off the top)
646 mov.b A1L,A2L ; A2l gets srcb.l
647 mulxu A0L,A2 ; A2 gets first sub product
649 mov.b A0H,A3L ; prepare for
650 mulxu A1L,A3 ; second sub product
652 add.b A3L,A2H ; sum first two terms
654 mov.b A1H,A3L ; third sub product
657 add.b A3L,A2H ; almost there
658 mov.w A2,A0 ; that is
662 #endif /* L_mulhi3 */
668 ;; I think that shift and add may be sufficient for this. Using the
669 ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
670 ;; the inner loop uses maybe 20 cycles + overhead, but terminates
671 ;; quickly on small args.
731 #else /* __H8300H__ */
734 ; mulsi3 for H8/300H - based on Hitachi SH implementation
736 ; by Toshiyasu Morita
740 ; 16b * 16b = 372 states (worst case)
741 ; 32b * 32b = 724 states (worst case)
745 ; 16b * 16b = 48 states
746 ; 16b * 32b = 72 states
747 ; 32b * 32b = 92 states
752 mov.w r1,r2 ; ( 2 states) b * d
753 mulxu r0,er2 ; (22 states)
755 mov.w e0,r3 ; ( 2 states) a * d
756 beq L_skip1 ; ( 4 states)
757 mulxu r1,er3 ; (22 states)
758 add.w r3,e2 ; ( 2 states)
761 mov.w e1,r3 ; ( 2 states) c * b
762 beq L_skip2 ; ( 4 states)
763 mulxu r0,er3 ; (22 states)
764 add.w r3,e2 ; ( 2 states)
767 mov.l er2,er0 ; ( 2 states)
771 #endif /* L_mulsi3 */
772 #ifdef L_fixunssfsi_asm
773 /* For the h8300 we use asm to save some bytes, to
774 allow more programs to fit into the tiny address
775 space. For the H8/300H and H8S, the C version is good enough. */
777 /* We still treat NANs different than libgcc2.c, but then, the
778 behavior is undefined anyways. */
779 .global ___fixunssfsi
799 #endif /* L_fixunssfsi_asm */