* config/h8300/h8300.md (*addsi3_upper): New.

[pf3gnuchains/gcc-fork.git] / gcc / config / h8300 / lib1funcs.asm
diff --git a/gcc/config/h8300/lib1funcs.asm b/gcc/config/h8300/lib1funcs.asm

index 370ba75..2f62572 100644 (file)
--- a/gcc/config/h8300/lib1funcs.asm
+++ b/gcc/config/h8300/lib1funcs.asm
@@ -1,7 +1,32 @@
-;; libgcc1 routines for the Hitachi h8/300 cpu.
-;; Contributed by Steve Chamberlain.
-;; sac@cygnus.com
-;; This file is in the public domain.
+;; libgcc routines for the Hitachi H8/300 CPU.
+;; Contributed by Steve Chamberlain <sac@cygnus.com>
+;; Optimizations by Toshiyasu Morita <toshiyasu.morita@hsa.hitachi.com>
+
+/* Copyright (C) 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
  
  /* Assembler register definitions.  */
  
@@ -49,7 +74,7 @@
  #define S2P    r6
  #endif
  
-#ifdef __H8300H__
+#if defined (__H8300H__) || defined (__H8300S__)
  #define MOVP   mov.l   /* pointers are 32 bits */
  #define ADDP   add.l
  #define CMPP   cmp.l
@@ -70,30 +95,35 @@
  #define A3E    e3
  #endif
  
+#ifdef __H8300H__
+       .h8300h
+#endif
+
+#ifdef __H8300S__
+       .h8300s
+#endif
+
  #ifdef L_cmpsi2
  #ifdef __H8300__
         .section .text
         .align 2
         .global ___cmpsi2
  ___cmpsi2:
-       cmp.w   A2,A0
-       bne     .L2
-       cmp.w   A3,A1
+       cmp.w   A0,A2
         bne     .L2
+       cmp.w   A1,A3
+       bne     .L4
         mov.w   #1,A0
         rts
  .L2:
-       cmp.w   A0,A2
-       bgt     .L4
-       bne     .L3
-       cmp.w   A1,A3
-       bls     .L3
-.L4:
-       sub.w   A0,A0
-       rts
+       bgt     .L5
  .L3:
         mov.w   #2,A0
+       rts
+.L4:
+       bls     .L3
  .L5:
+       sub.w   A0,A0
         rts
         .end
  #endif
@@ -105,24 +135,21 @@ ___cmpsi2:
         .align 2
         .global ___ucmpsi2
  ___ucmpsi2:
-       cmp.w   A2,A0
-       bne     .L2
-       cmp.w   A3,A1
+       cmp.w   A0,A2
         bne     .L2
+       cmp.w   A1,A3
+       bne     .L4
         mov.w   #1,A0
         rts
  .L2:
-       cmp.w   A0,A2
-       bhi     .L4
-       bne     .L3
-       cmp.w   A1,A3
-       bls     .L3
-.L4:
-       sub.w   A0,A0
-       rts
+       bhi     .L5
  .L3:
         mov.w   #2,A0
+       rts
+.L4:
+       bls     .L3
  .L5:
+       sub.w   A0,A0
         rts
         .end
  #endif
@@ -135,7 +162,7 @@ ___ucmpsi2:
  ;; "supporting routines".
  
  ; general purpose normalize routine
-; 
+;
  ; divisor in A0
  ; dividend in A1
  ; turns both into +ve numbers, and leaves what the answer sign
@@ -147,18 +174,34 @@ ___ucmpsi2:
  divnorm:
         mov.b   #0x0,A2L
         or      A0H,A0H         ; is divisor > 0
-       bge     _lab1                   
+       bge     _lab1
         not     A0H             ; no - then make it +ve
         not     A0L
-       adds    #1,A0                   
+       adds    #1,A0
         xor     #0x1,A2L        ; and remember that in A2L
  _lab1: or      A1H,A1H ; look at dividend
-       bge     _lab2           
+       bge     _lab2
         not     A1H             ; it is -ve, make it positive
         not     A1L
         adds    #1,A1
         xor     #0x1,A2L; and toggle sign of result
  _lab2: rts
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+       mov.b   #0x0,A2L
+       or      A0H,A0H         ; is divisor > 0
+       bge     _lab7
+       not     A0H             ; no - then make it +ve
+       not     A0L
+       adds    #1,A0
+       xor     #0x1,A2L        ; and remember that in A2L
+_lab7: or      A1H,A1H ; look at dividend
+       bge     _lab8
+       not     A1H             ; it is -ve, make it positive
+       not     A1L
+       adds    #1,A1
+_lab8: rts
  
  ; A0=A0/A1 signed
  
@@ -171,13 +214,13 @@ negans:   or      A2L,A2L ; should answer be negative ?
         not     A0H     ; yes, so make it so
         not     A0L
         adds    #1,A0
-_lab4: rts     
+_lab4: rts
  
  ; A0=A0%A1 signed
  
         .global ___modhi3
  ___modhi3:
-       bsr     divnorm
+       bsr     modnorm
         bsr     ___udivhi3
         mov     A3,A0
         bra     negans
@@ -203,18 +246,18 @@ ___umodhi3:
  ; q low 8 bits of quot
  ; P preserve
  
-; The h8 only has a 16/8 bit divide, so we look at the incoming and
+; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
  ; see how to partition up the expression.
  
         .global ___udivhi3
  ___udivhi3:
-                               ; A0 A1 A2 A3 
+                               ; A0 A1 A2 A3
                                 ; Nn Dd       P
-       sub.w   A3,A3           ; Nn Dd xP 00 
-       or      A1H,A1H          
+       sub.w   A3,A3           ; Nn Dd xP 00
+       or      A1H,A1H
         bne     divlongway
-       or      A0H,A0H         
-       beq     _lab6           
+       or      A0H,A0H
+       beq     _lab6
  
  ; we know that D == 0 and N is != 0
         mov.b   A0H,A3L         ; Nn Dd xP 0N
@@ -226,7 +269,7 @@ _lab6:      mov.b   A0L,A3L         ;           n
         mov.b   A3L,A0L         ; Qq
         mov.b   A3H,A3L         ;           m
         mov.b   #0x0,A3H        ; Qq       0m
-       rts     
+       rts
  
  ; D != 0 - which means the denominator is
  ;          loop around to get the result.
@@ -237,19 +280,19 @@ divlongway:
         mov.b   #0x8,A2H        ;       8
  div8:  add.b   A0L,A0L         ; n*=2
         rotxl   A3L             ; Make remainder bigger
-       rotxl   A3H             
+       rotxl   A3H
         sub.w   A1,A3           ; Q-=N
         bhs     setbit          ; set a bit ?
         add.w   A1,A3           ;  no : too far , Q+=N
  
-       dec     A2H             
-       bne     div8            ; next bit      
-       rts     
+       dec     A2H
+       bne     div8            ; next bit
+       rts
  
  setbit:        inc     A0L             ; do insert bit
-       dec     A2H             
-       bne     div8            ; next bit      
-       rts     
+       dec     A2H
+       bne     div8            ; next bit
+       rts
  
  #endif /* __H8300__ */
  #endif /* L_divhi3 */
@@ -258,15 +301,11 @@ setbit:   inc     A0L             ; do insert bit
  
  ;; 4 byte integer divides for the H8/300.
  ;;
-;; We have one routine which does all the work and lots of 
+;; We have one routine which does all the work and lots of
  ;; little ones which prepare the args and massage the sign.
  ;; We bunch all of this into one object file since there are several
  ;; "supporting routines".
  
-#ifdef __H8300H__
-       .h8300h
-#endif
-
         .section .text
         .align 2
  
@@ -288,18 +327,18 @@ divnorm:
  
         add     #1,A1L
         addx    #0,A1H
-       addx    #0,A0H
         addx    #0,A0L
+       addx    #0,A0H
  
         mov.b   #1,S2L          ; the sign will be -ve
  postive:
         mov.b   A2H,A2H         ; is the denominator -ve
         bge     postive2
-       not     A2L             
+       not     A2L
         not     A2H
         not     A3L
         not     A3H
-       add.b   #1,A3L  
+       add.b   #1,A3L
         addx    #0,A3H
         addx    #0,A2L
         addx    #0,A2H
@@ -307,6 +346,39 @@ postive:
  postive2:
         rts
  
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+       mov.b   #0,S2L          ; keep the sign in S2
+       mov.b   A0H,A0H         ; is the numerator -ve
+       bge     mpostive
+
+       ; negate arg
+       not     A0H
+       not     A1H
+       not     A0L
+       not     A1L
+
+       add     #1,A1L
+       addx    #0,A1H
+       addx    #0,A0L
+       addx    #0,A0H
+
+       mov.b   #1,S2L          ; the sign will be -ve
+mpostive:
+       mov.b   A2H,A2H         ; is the denominator -ve
+       bge     mpostive2
+       not     A2L
+       not     A2H
+       not     A3L
+       not     A3H
+       add.b   #1,A3L
+       addx    #0,A3H
+       addx    #0,A2L
+       addx    #0,A2H
+mpostive2:
+       rts
+
  #else /* __H8300H__ */
  
  divnorm:
@@ -327,17 +399,36 @@ postive:
  postive2:
         rts
  
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+       mov.b   #0,S2L          ; keep the sign in S2
+       mov.l   A0P,A0P         ; is the numerator -ve
+       bge     mpostive
+
+       neg.l   A0P             ; negate arg
+       mov.b   #1,S2L          ; the sign will be -ve
+
+mpostive:
+       mov.l   A1P,A1P         ; is the denominator -ve
+       bge     mpostive2
+
+       neg.l   A1P             ; negate arg
+
+mpostive2:
+       rts
+
  #endif
  
  ; numerator in A0/A1
  ; denominator in A2/A3
         .global ___modsi3
  ___modsi3:
-       PUSHP   S2P             
+       PUSHP   S2P
         PUSHP   S0P
         PUSHP   S1P
  
-       bsr     divnorm
+       bsr     modnorm
         bsr     divmodsi4
  #ifdef __H8300__
         mov     S0,A0
@@ -370,7 +461,7 @@ ___umodsi3:
         mov.l   S0P,A0P
  #endif
         bra     exitdiv
-       
+
         .global ___divsi3
  ___divsi3:
         PUSHP   S2P
@@ -386,7 +477,7 @@ exitdiv:
  
         or      S2L,S2L
         beq     reti
-       
+
         ; should be -ve
  #ifdef __H8300__
         not     A0H
@@ -396,20 +487,20 @@ exitdiv:
  
         add     #1,A1L
         addx    #0,A1H
-       addx    #0,A0H
         addx    #0,A0L
+       addx    #0,A0H
  #else /* __H8300H__ */
         neg.l   A0P
  #endif
  
  reti:
         POPP    S2P
-       rts     
+       rts
  
-       ; takes A0/A1 numerator (A0P for 300h)
-       ; A2/A3 denominator (A1P for 300h)
-       ; returns A0/A1 quotient (A0P for 300h)
-       ; S0/S1 remainder (S0P for 300h)
+       ; takes A0/A1 numerator (A0P for H8/300H)
+       ; A2/A3 denominator (A1P for H8/300H)
+       ; returns A0/A1 quotient (A0P for H8/300H)
+       ; S0/S1 remainder (S0P for H8/300H)
         ; trashes S2
  
  #ifdef __H8300__
@@ -447,7 +538,7 @@ NumByte3Zero:
  
          mov.b  S1H,S1L
          mov.b  #0x0,S1H
-        rts    
+        rts
  
  ; have to do the divide by shift and test
  DenHighZero:
@@ -471,7 +562,7 @@ nextbit:
          sub.w  A3,S1   ; does it all fit
          subx   A2L,S0L
          subx   A2H,S0H
-        bhs    setone   
+        bhs    setone
  
          add.w  A3,S1   ; no, restore mistake
          addx   A2L,S0L
@@ -479,13 +570,13 @@ nextbit:
  
          dec    S2H
          bne    nextbit
-        rts    
-       
+        rts
+
  setone:
         inc     A1L
          dec    S2H
          bne    nextbit
-        rts    
+        rts
  
  #else /* __H8300H__ */
  
@@ -538,13 +629,13 @@ setone:
  #ifdef L_mulhi3
  
  ;; HImode multiply.
-; The h8 only has an 8*8->16 multiply.
+; The H8/300 only has an 8*8->16 multiply.
  ; The answer is the same as:
-; 
+;
  ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
  ; (we can ignore A1.h * A0.h cause that will all off the top)
  ; A0 in
-; A1 in 
+; A1 in
  ; A0 answer
  
  #ifdef __H8300__
@@ -553,7 +644,7 @@ setone:
         .global ___mulhi3
  ___mulhi3:
         mov.b   A1L,A2L         ; A2l gets srcb.l
-       mulxu   A0L,A2          ; A2 gets first sub product 
+       mulxu   A0L,A2          ; A2 gets first sub product
  
         mov.b   A0H,A3L         ; prepare for
         mulxu   A1L,A3          ; second sub product
@@ -561,7 +652,7 @@ ___mulhi3:
         add.b   A3L,A2H         ; sum first two terms
  
         mov.b   A1H,A3L         ; third sub product
-       mulxu   A0L,A3          
+       mulxu   A0L,A3
  
         add.b   A3L,A2H         ; almost there
         mov.w   A2,A0           ; that is
@@ -573,7 +664,7 @@ ___mulhi3:
  #ifdef L_mulsi3
  
  ;; SImode multiply.
-;; 
+;;
  ;; I think that shift and add may be sufficient for this.  Using the
  ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead.  This way
  ;; the inner loop uses maybe 20 cycles + overhead, but terminates
@@ -582,7 +673,7 @@ ___mulhi3:
  ;; A0/A1 src_a
  ;; A2/A3 src_b
  ;;
-;;  while (a) 
+;;  while (a)
  ;;    {
  ;;      if (a & 1)
  ;;        r += b;
@@ -600,10 +691,10 @@ ___mulsi3:
         PUSHP   S0P
         PUSHP   S1P
         PUSHP   S2P
-       
+
         sub.w   S0,S0
         sub.w   S1,S1
-       
+
         ; while (a)
  _top:  mov.w   A0,A0
         bne     _more
@@ -622,7 +713,7 @@ _nobit:
         rotxr   A0L
         rotxr   A1H
         rotxr   A1L
-       
+
         ; b <<= 1
         add.w   A3,A3
         addx    A2L,A2L
@@ -630,7 +721,7 @@ _nobit:
         bra     _top
  
  _done:
-       mov.w   S0,A0   
+       mov.w   S0,A0
         mov.w   S1,A1
         POPP    S2P
         POPP    S1P
@@ -639,34 +730,70 @@ _done:
  
  #else /* __H8300H__ */
  
-       .h8300h
+;
+; mulsi3 for H8/300H - based on Hitachi SH implementation
+;
+; by Toshiyasu Morita
+;
+; Old code:
+;
+; 16b * 16b = 372 states (worst case)
+; 32b * 32b = 724 states (worst case)
+;
+; New code:
+;
+; 16b * 16b =  48 states
+; 16b * 32b =  72 states
+; 32b * 32b =  92 states
+;
  
         .global ___mulsi3
  ___mulsi3:
-       sub.l   A2P,A2P
+       mov.w   r1,r2   ; ( 2 states) b * d
+       mulxu   r0,er2  ; (22 states)
  
-       ; while (a)
-_top:  mov.l   A0P,A0P
-       beq     _done
-
-       ; if (a & 1)
-       bld     #0,A0L
-       bcc     _nobit
+       mov.w   e0,r3   ; ( 2 states) a * d
+       beq     L_skip1 ; ( 4 states)
+       mulxu   r1,er3  ; (22 states)
+       add.w   r3,e2   ; ( 2 states)
  
-       ; r += b
-       add.l   A1P,A2P
-
-_nobit:
-       ; a >>= 1
-       shlr.l  A0P
+L_skip1:
+       mov.w   e1,r3   ; ( 2 states) c * b
+       beq     L_skip2 ; ( 4 states)
+       mulxu   r0,er3  ; (22 states)
+       add.w   r3,e2   ; ( 2 states)
  
-       ; b <<= 1
-       shll.l  A1P
-       bra     _top
+L_skip2:
+       mov.l   er2,er0 ; ( 2 states)
+       rts             ; (10 states)
  
-_done:
-       mov.l   A2P,A0P
+#endif
+#endif /* L_mulsi3 */
+#ifdef L_fixunssfsi_asm
+/* For the h8300 we use asm to save some bytes, to
+   allow more programs to fit into the tiny address
+   space.  For the H8/300H and H8S, the C version is good enough.  */
+#ifdef __H8300__
+/* We still treat NANs different than libgcc2.c, but then, the
+   behavior is undefined anyways.  */
+       .global ___fixunssfsi
+___fixunssfsi:
+       cmp.b #0x47,r0h
+       bge Large_num
+       jmp     @___fixsfsi
+Large_num:
+       bhi L_huge_num
+       xor.b #0x80,A0L
+       bmi L_shift8
+L_huge_num:
+       mov.w #65535,A0
+       mov.w A0,A1
+       rts
+L_shift8:
+       mov.b A0L,A0H
+       mov.b A1H,A0L
+       mov.b A1L,A1H
+       mov.b #0,A1L
         rts
-
  #endif
-#endif /* L_mulsi3 */
-\ No newline at end of file
+#endif /* L_fixunssfsi_asm */