1 ;; libgcc routines for the Renesas H8/300 CPU.
2 ;; Contributed by Steve Chamberlain <sac@cygnus.com>
3 ;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
5 /* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004
6 Free Software Foundation, Inc.
8 This file is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 In addition to the permissions in the GNU General Public License, the
14 Free Software Foundation gives you unlimited permission to link the
15 compiled version of this file into combinations with other programs,
16 and to distribute those combinations without any restriction coming
17 from the use of this file. (The General Public License restrictions
18 do apply in other respects; for example, they cover modification of
19 the file, and distribution when not linked into a combine
22 This file is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 General Public License for more details.
27 You should have received a copy of the GNU General Public License
28 along with this program; see the file COPYING. If not, write to
29 the Free Software Foundation, 59 Temple Place - Suite 330,
30 Boston, MA 02111-1307, USA. */
32 /* Assembler register definitions. */
75 #if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
94 #ifdef __NORMAL_MODE__
102 #ifdef __NORMAL_MODE__
109 #ifdef __NORMAL_MODE__
140 #endif /* L_cmpsi2 */
166 #endif /* L_ucmpsi2 */
170 ;; HImode divides for the H8/300.
171 ;; We bunch all of this into one object file since there are several
172 ;; "supporting routines".
174 ; general purpose normalize routine
178 ; turns both into +ve numbers, and leaves what the answer sign
185 or A0H,A0H ; is divisor > 0
188 not A0H ; no - then make it +ve
191 _lab1: or A1H,A1H ; look at dividend
193 not A1H ; it is -ve, make it positive
196 xor #0x8,A2L; and toggle sign of result
198 ;; Basically the same, except that the sign of the divisor determines
201 or A0H,A0H ; is divisor > 0
204 not A0H ; no - then make it +ve
207 _lab7: or A1H,A1H ; look at dividend
209 not A1H ; it is -ve, make it positive
220 negans: btst #3,A2L ; should answer be negative ?
222 not A0H ; yes, so make it so
247 ; D high 8 bits of denom
248 ; d low 8 bits of denom
249 ; N high 8 bits of num
250 ; n low 8 bits of num
251 ; M high 8 bits of mod
252 ; m low 8 bits of mod
253 ; Q high 8 bits of quot
254 ; q low 8 bits of quot
257 ; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
258 ; see how to partition up the expression.
264 sub.w A3,A3 ; Nn Dd xP 00
270 ; we know that D == 0 and N is != 0
271 mov.b A0H,A3L ; Nn Dd xP 0N
275 _lab6: mov.b A0L,A3L ; n
279 mov.b #0x0,A3H ; Qq 0m
282 ; D != 0 - which means the denominator is
283 ; loop around to get the result.
286 mov.b A0H,A3L ; Nn Dd xP 0N
287 mov.b #0x0,A0H ; high byte of answer has to be zero
289 div8: add.b A0L,A0L ; n*=2
290 rotxl A3L ; Make remainder bigger
293 bhs setbit ; set a bit ?
294 add.w A1,A3 ; no : too far , Q+=N
300 setbit: inc A0L ; do insert bit
305 #endif /* __H8300__ */
306 #endif /* L_divhi3 */
310 ;; 4 byte integer divides for the H8/300.
312 ;; We have one routine which does all the work and lots of
313 ;; little ones which prepare the args and massage the sign.
314 ;; We bunch all of this into one object file since there are several
315 ;; "supporting routines".
320 ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
321 ; This function is here to keep branch displacements small.
326 mov.b A0H,A0H ; is the numerator -ve
327 stc ccr,S2L ; keep the sign in bit 3 of S2L
341 mov.b A2H,A2H ; is the denominator -ve
351 xor.b #0x08,S2L ; toggle the result sign
355 ;; Basically the same, except that the sign of the divisor determines
358 mov.b A0H,A0H ; is the numerator -ve
359 stc ccr,S2L ; keep the sign in bit 3 of S2L
373 mov.b A2H,A2H ; is the denominator -ve
386 #else /* __H8300H__ */
389 mov.l A0P,A0P ; is the numerator -ve
390 stc ccr,S2L ; keep the sign in bit 3 of S2L
393 neg.l A0P ; negate arg
396 mov.l A1P,A1P ; is the denominator -ve
399 neg.l A1P ; negate arg
400 xor.b #0x08,S2L ; toggle the result sign
405 ;; Basically the same, except that the sign of the divisor determines
408 mov.l A0P,A0P ; is the numerator -ve
409 stc ccr,S2L ; keep the sign in bit 3 of S2L
412 neg.l A0P ; negate arg
415 mov.l A1P,A1P ; is the denominator -ve
418 neg.l A1P ; negate arg
426 ; denominator in A2/A3
446 ;; H8/300H and H8S version of ___udivsi3 is defined later in
488 ; examine what the sign should be
504 #else /* __H8300H__ */
516 ; takes A0/A1 numerator (A0P for H8/300H)
517 ; A2/A3 denominator (A1P for H8/300H)
518 ; returns A0/A1 quotient (A0P for H8/300H)
519 ; S0/S1 remainder (S0P for H8/300H)
525 sub.w S0,S0 ; zero play area
559 ; have to do the divide by shift and test
567 mov.b #24,S2H ; only do 24 iterations
570 add.w A1,A1 ; double the answer guess
574 rotxl S1L ; double remainder
578 sub.w A3,S1 ; does it all fit
583 add.w A3,S1 ; no, restore mistake
597 #else /* __H8300H__ */
599 ;; This function also computes the remainder and stores it in er3.
602 mov.w A1E,A1E ; denominator top word 0?
605 ; do it the easy way, see page 107 in manual
619 ; expects er1 >= 2^16
626 shlr.l er2 ; make divisor < 2^16
630 shlr.l #2,er2 ; make divisor < 2^16
636 shlr.l #2,er2 ; make divisor < 2^16
650 ;; er0 contains shifted dividend
651 ;; er1 contains divisor
652 ;; er2 contains shifted divisor
653 ;; er3 contains dividend, later remainder
654 divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ)
657 subs #1,er0 ; er0 = AQ - 1
659 mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor
660 sub.w r2,e3 ; dividend - 65536 * er2
662 mulxu.w r0,er2 ; compute er3 = remainder (tentative)
663 sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor
665 cmp.l er1,er3 ; is divisor < remainder?
668 sub.l er1,er3 ; correct the remainder
673 #endif /* L_divsi3 */
678 ; The H8/300 only has an 8*8->16 multiply.
679 ; The answer is the same as:
681 ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
682 ; (we can ignore A1.h * A0.h cause that will all off the top)
692 mov.b A1L,A2L ; A2l gets srcb.l
693 mulxu A0L,A2 ; A2 gets first sub product
695 mov.b A0H,A3L ; prepare for
696 mulxu A1L,A3 ; second sub product
698 add.b A3L,A2H ; sum first two terms
700 mov.b A1H,A3L ; third sub product
703 add.b A3L,A2H ; almost there
704 mov.w A2,A0 ; that is
708 #endif /* L_mulhi3 */
714 ;; I think that shift and add may be sufficient for this. Using the
715 ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
716 ;; the inner loop uses maybe 20 cycles + overhead, but terminates
717 ;; quickly on small args.
775 #else /* __H8300H__ */
778 ; mulsi3 for H8/300H - based on Renesas SH implementation
780 ; by Toshiyasu Morita
784 ; 16b * 16b = 372 states (worst case)
785 ; 32b * 32b = 724 states (worst case)
789 ; 16b * 16b = 48 states
790 ; 16b * 32b = 72 states
791 ; 32b * 32b = 92 states
796 mov.w r1,r2 ; ( 2 states) b * d
797 mulxu r0,er2 ; (22 states)
799 mov.w e0,r3 ; ( 2 states) a * d
800 beq L_skip1 ; ( 4 states)
801 mulxu r1,er3 ; (22 states)
802 add.w r3,e2 ; ( 2 states)
805 mov.w e1,r3 ; ( 2 states) c * b
806 beq L_skip2 ; ( 4 states)
807 mulxu r0,er3 ; (22 states)
808 add.w r3,e2 ; ( 2 states)
811 mov.l er2,er0 ; ( 2 states)
815 #endif /* L_mulsi3 */
816 #ifdef L_fixunssfsi_asm
817 /* For the h8300 we use asm to save some bytes, to
818 allow more programs to fit into the tiny address
819 space. For the H8/300H and H8S, the C version is good enough. */
821 /* We still treat NANs different than libgcc2.c, but then, the
822 behavior is undefined anyways. */
823 .global ___fixunssfsi
843 #endif /* L_fixunssfsi_asm */