* config/h8300/h8300.md (*addsi3_upper): New.

[pf3gnuchains/gcc-fork.git] / gcc / config / h8300 / lib1funcs.asm
diff --git a/gcc/config/h8300/lib1funcs.asm b/gcc/config/h8300/lib1funcs.asm

index c52bcff..2f62572 100644 (file)
--- a/gcc/config/h8300/lib1funcs.asm
+++ b/gcc/config/h8300/lib1funcs.asm
@@ -1,8 +1,8 @@
-;; libgcc1 routines for the Hitachi h8/300 cpu.
-;; Contributed by Steve Chamberlain.
-;; sac@cygnus.com
+;; libgcc routines for the Hitachi H8/300 CPU.
+;; Contributed by Steve Chamberlain <sac@cygnus.com>
+;; Optimizations by Toshiyasu Morita <toshiyasu.morita@hsa.hitachi.com>
  
-/* Copyright (C) 1994 Free Software Foundation, Inc.
+/* Copyright (C) 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
  
  This file is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
@@ -11,11 +11,12 @@ later version.
  
  In addition to the permissions in the GNU General Public License, the
  Free Software Foundation gives you unlimited permission to link the
-compiled version of this file with other programs, and to distribute
-those programs without any restriction coming from the use of this
-file.  (The General Public License restrictions do apply in other
-respects; for example, they cover modification of the file, and
-distribution when not linked into another program.)
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
  
  This file is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -27,13 +28,6 @@ along with this program; see the file COPYING.  If not, write to
  the Free Software Foundation, 59 Temple Place - Suite 330,
  Boston, MA 02111-1307, USA.  */
  
-/* As a special exception, if you link this library with other files,
-   some of which are compiled with GCC, to produce an executable,
-   this library does not by itself cause the resulting executable
-   to be covered by the GNU General Public License.
-   This exception does not however invalidate any other reasons why
-   the executable file might be covered by the GNU General Public License.  */
-
  /* Assembler register definitions.  */
  
  #define A0 r0
@@ -80,7 +74,7 @@ Boston, MA 02111-1307, USA.  */
  #define S2P    r6
  #endif
  
-#ifdef __H8300H__
+#if defined (__H8300H__) || defined (__H8300S__)
  #define MOVP   mov.l   /* pointers are 32 bits */
  #define ADDP   add.l
  #define CMPP   cmp.l
@@ -101,30 +95,35 @@ Boston, MA 02111-1307, USA.  */
  #define A3E    e3
  #endif
  
+#ifdef __H8300H__
+       .h8300h
+#endif
+
+#ifdef __H8300S__
+       .h8300s
+#endif
+
  #ifdef L_cmpsi2
  #ifdef __H8300__
         .section .text
         .align 2
         .global ___cmpsi2
  ___cmpsi2:
-       cmp.w   A2,A0
-       bne     .L2
-       cmp.w   A3,A1
+       cmp.w   A0,A2
         bne     .L2
+       cmp.w   A1,A3
+       bne     .L4
         mov.w   #1,A0
         rts
  .L2:
-       cmp.w   A0,A2
-       bgt     .L4
-       bne     .L3
-       cmp.w   A1,A3
-       bls     .L3
-.L4:
-       sub.w   A0,A0
-       rts
+       bgt     .L5
  .L3:
         mov.w   #2,A0
+       rts
+.L4:
+       bls     .L3
  .L5:
+       sub.w   A0,A0
         rts
         .end
  #endif
@@ -136,24 +135,21 @@ ___cmpsi2:
         .align 2
         .global ___ucmpsi2
  ___ucmpsi2:
-       cmp.w   A2,A0
-       bne     .L2
-       cmp.w   A3,A1
+       cmp.w   A0,A2
         bne     .L2
+       cmp.w   A1,A3
+       bne     .L4
         mov.w   #1,A0
         rts
  .L2:
-       cmp.w   A0,A2
-       bhi     .L4
-       bne     .L3
-       cmp.w   A1,A3
-       bls     .L3
-.L4:
-       sub.w   A0,A0
-       rts
+       bhi     .L5
  .L3:
         mov.w   #2,A0
+       rts
+.L4:
+       bls     .L3
  .L5:
+       sub.w   A0,A0
         rts
         .end
  #endif
@@ -166,7 +162,7 @@ ___ucmpsi2:
  ;; "supporting routines".
  
  ; general purpose normalize routine
-; 
+;
  ; divisor in A0
  ; dividend in A1
  ; turns both into +ve numbers, and leaves what the answer sign
@@ -178,13 +174,13 @@ ___ucmpsi2:
  divnorm:
         mov.b   #0x0,A2L
         or      A0H,A0H         ; is divisor > 0
-       bge     _lab1                   
+       bge     _lab1
         not     A0H             ; no - then make it +ve
         not     A0L
-       adds    #1,A0                   
+       adds    #1,A0
         xor     #0x1,A2L        ; and remember that in A2L
  _lab1: or      A1H,A1H ; look at dividend
-       bge     _lab2           
+       bge     _lab2
         not     A1H             ; it is -ve, make it positive
         not     A1L
         adds    #1,A1
@@ -195,13 +191,13 @@ _lab2:    rts
  modnorm:
         mov.b   #0x0,A2L
         or      A0H,A0H         ; is divisor > 0
-       bge     _lab7                   
+       bge     _lab7
         not     A0H             ; no - then make it +ve
         not     A0L
-       adds    #1,A0                   
+       adds    #1,A0
         xor     #0x1,A2L        ; and remember that in A2L
  _lab7: or      A1H,A1H ; look at dividend
-       bge     _lab8           
+       bge     _lab8
         not     A1H             ; it is -ve, make it positive
         not     A1L
         adds    #1,A1
@@ -218,7 +214,7 @@ negans:     or      A2L,A2L ; should answer be negative ?
         not     A0H     ; yes, so make it so
         not     A0L
         adds    #1,A0
-_lab4: rts     
+_lab4: rts
  
  ; A0=A0%A1 signed
  
@@ -250,18 +246,18 @@ ___umodhi3:
  ; q low 8 bits of quot
  ; P preserve
  
-; The h8 only has a 16/8 bit divide, so we look at the incoming and
+; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
  ; see how to partition up the expression.
  
         .global ___udivhi3
  ___udivhi3:
-                               ; A0 A1 A2 A3 
+                               ; A0 A1 A2 A3
                                 ; Nn Dd       P
-       sub.w   A3,A3           ; Nn Dd xP 00 
-       or      A1H,A1H          
+       sub.w   A3,A3           ; Nn Dd xP 00
+       or      A1H,A1H
         bne     divlongway
-       or      A0H,A0H         
-       beq     _lab6           
+       or      A0H,A0H
+       beq     _lab6
  
  ; we know that D == 0 and N is != 0
         mov.b   A0H,A3L         ; Nn Dd xP 0N
@@ -273,7 +269,7 @@ _lab6:      mov.b   A0L,A3L         ;           n
         mov.b   A3L,A0L         ; Qq
         mov.b   A3H,A3L         ;           m
         mov.b   #0x0,A3H        ; Qq       0m
-       rts     
+       rts
  
  ; D != 0 - which means the denominator is
  ;          loop around to get the result.
@@ -284,19 +280,19 @@ divlongway:
         mov.b   #0x8,A2H        ;       8
  div8:  add.b   A0L,A0L         ; n*=2
         rotxl   A3L             ; Make remainder bigger
-       rotxl   A3H             
+       rotxl   A3H
         sub.w   A1,A3           ; Q-=N
         bhs     setbit          ; set a bit ?
         add.w   A1,A3           ;  no : too far , Q+=N
  
-       dec     A2H             
-       bne     div8            ; next bit      
-       rts     
+       dec     A2H
+       bne     div8            ; next bit
+       rts
  
  setbit:        inc     A0L             ; do insert bit
-       dec     A2H             
-       bne     div8            ; next bit      
-       rts     
+       dec     A2H
+       bne     div8            ; next bit
+       rts
  
  #endif /* __H8300__ */
  #endif /* L_divhi3 */
@@ -305,15 +301,11 @@ setbit:   inc     A0L             ; do insert bit
  
  ;; 4 byte integer divides for the H8/300.
  ;;
-;; We have one routine which does all the work and lots of 
+;; We have one routine which does all the work and lots of
  ;; little ones which prepare the args and massage the sign.
  ;; We bunch all of this into one object file since there are several
  ;; "supporting routines".
  
-#ifdef __H8300H__
-       .h8300h
-#endif
-
         .section .text
         .align 2
  
@@ -335,18 +327,18 @@ divnorm:
  
         add     #1,A1L
         addx    #0,A1H
-       addx    #0,A0H
         addx    #0,A0L
+       addx    #0,A0H
  
         mov.b   #1,S2L          ; the sign will be -ve
  postive:
         mov.b   A2H,A2H         ; is the denominator -ve
         bge     postive2
-       not     A2L             
+       not     A2L
         not     A2H
         not     A3L
         not     A3H
-       add.b   #1,A3L  
+       add.b   #1,A3L
         addx    #0,A3H
         addx    #0,A2L
         addx    #0,A2H
@@ -369,18 +361,18 @@ modnorm:
  
         add     #1,A1L
         addx    #0,A1H
-       addx    #0,A0H
         addx    #0,A0L
+       addx    #0,A0H
  
         mov.b   #1,S2L          ; the sign will be -ve
  mpostive:
         mov.b   A2H,A2H         ; is the denominator -ve
         bge     mpostive2
-       not     A2L             
+       not     A2L
         not     A2H
         not     A3L
         not     A3H
-       add.b   #1,A3L  
+       add.b   #1,A3L
         addx    #0,A3H
         addx    #0,A2L
         addx    #0,A2H
@@ -432,7 +424,7 @@ mpostive2:
  ; denominator in A2/A3
         .global ___modsi3
  ___modsi3:
-       PUSHP   S2P             
+       PUSHP   S2P
         PUSHP   S0P
         PUSHP   S1P
  
@@ -469,7 +461,7 @@ ___umodsi3:
         mov.l   S0P,A0P
  #endif
         bra     exitdiv
-       
+
         .global ___divsi3
  ___divsi3:
         PUSHP   S2P
@@ -485,7 +477,7 @@ exitdiv:
  
         or      S2L,S2L
         beq     reti
-       
+
         ; should be -ve
  #ifdef __H8300__
         not     A0H
@@ -495,20 +487,20 @@ exitdiv:
  
         add     #1,A1L
         addx    #0,A1H
-       addx    #0,A0H
         addx    #0,A0L
+       addx    #0,A0H
  #else /* __H8300H__ */
         neg.l   A0P
  #endif
  
  reti:
         POPP    S2P
-       rts     
+       rts
  
-       ; takes A0/A1 numerator (A0P for 300h)
-       ; A2/A3 denominator (A1P for 300h)
-       ; returns A0/A1 quotient (A0P for 300h)
-       ; S0/S1 remainder (S0P for 300h)
+       ; takes A0/A1 numerator (A0P for H8/300H)
+       ; A2/A3 denominator (A1P for H8/300H)
+       ; returns A0/A1 quotient (A0P for H8/300H)
+       ; S0/S1 remainder (S0P for H8/300H)
         ; trashes S2
  
  #ifdef __H8300__
@@ -546,7 +538,7 @@ NumByte3Zero:
  
          mov.b  S1H,S1L
          mov.b  #0x0,S1H
-        rts    
+        rts
  
  ; have to do the divide by shift and test
  DenHighZero:
@@ -570,7 +562,7 @@ nextbit:
          sub.w  A3,S1   ; does it all fit
          subx   A2L,S0L
          subx   A2H,S0H
-        bhs    setone   
+        bhs    setone
  
          add.w  A3,S1   ; no, restore mistake
          addx   A2L,S0L
@@ -578,13 +570,13 @@ nextbit:
  
          dec    S2H
          bne    nextbit
-        rts    
-       
+        rts
+
  setone:
         inc     A1L
          dec    S2H
          bne    nextbit
-        rts    
+        rts
  
  #else /* __H8300H__ */
  
@@ -637,13 +629,13 @@ setone:
  #ifdef L_mulhi3
  
  ;; HImode multiply.
-; The h8 only has an 8*8->16 multiply.
+; The H8/300 only has an 8*8->16 multiply.
  ; The answer is the same as:
-; 
+;
  ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
  ; (we can ignore A1.h * A0.h cause that will all off the top)
  ; A0 in
-; A1 in 
+; A1 in
  ; A0 answer
  
  #ifdef __H8300__
@@ -652,7 +644,7 @@ setone:
         .global ___mulhi3
  ___mulhi3:
         mov.b   A1L,A2L         ; A2l gets srcb.l
-       mulxu   A0L,A2          ; A2 gets first sub product 
+       mulxu   A0L,A2          ; A2 gets first sub product
  
         mov.b   A0H,A3L         ; prepare for
         mulxu   A1L,A3          ; second sub product
@@ -660,7 +652,7 @@ ___mulhi3:
         add.b   A3L,A2H         ; sum first two terms
  
         mov.b   A1H,A3L         ; third sub product
-       mulxu   A0L,A3          
+       mulxu   A0L,A3
  
         add.b   A3L,A2H         ; almost there
         mov.w   A2,A0           ; that is
@@ -672,7 +664,7 @@ ___mulhi3:
  #ifdef L_mulsi3
  
  ;; SImode multiply.
-;; 
+;;
  ;; I think that shift and add may be sufficient for this.  Using the
  ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead.  This way
  ;; the inner loop uses maybe 20 cycles + overhead, but terminates
@@ -681,7 +673,7 @@ ___mulhi3:
  ;; A0/A1 src_a
  ;; A2/A3 src_b
  ;;
-;;  while (a) 
+;;  while (a)
  ;;    {
  ;;      if (a & 1)
  ;;        r += b;
@@ -699,10 +691,10 @@ ___mulsi3:
         PUSHP   S0P
         PUSHP   S1P
         PUSHP   S2P
-       
+
         sub.w   S0,S0
         sub.w   S1,S1
-       
+
         ; while (a)
  _top:  mov.w   A0,A0
         bne     _more
@@ -721,7 +713,7 @@ _nobit:
         rotxr   A0L
         rotxr   A1H
         rotxr   A1L
-       
+
         ; b <<= 1
         add.w   A3,A3
         addx    A2L,A2L
@@ -729,7 +721,7 @@ _nobit:
         bra     _top
  
  _done:
-       mov.w   S0,A0   
+       mov.w   S0,A0
         mov.w   S1,A1
         POPP    S2P
         POPP    S1P
@@ -738,34 +730,70 @@ _done:
  
  #else /* __H8300H__ */
  
-       .h8300h
+;
+; mulsi3 for H8/300H - based on Hitachi SH implementation
+;
+; by Toshiyasu Morita
+;
+; Old code:
+;
+; 16b * 16b = 372 states (worst case)
+; 32b * 32b = 724 states (worst case)
+;
+; New code:
+;
+; 16b * 16b =  48 states
+; 16b * 32b =  72 states
+; 32b * 32b =  92 states
+;
  
         .global ___mulsi3
  ___mulsi3:
-       sub.l   A2P,A2P
-
-       ; while (a)
-_top:  mov.l   A0P,A0P
-       beq     _done
-
-       ; if (a & 1)
-       bld     #0,A0L
-       bcc     _nobit
-
-       ; r += b
-       add.l   A1P,A2P
+       mov.w   r1,r2   ; ( 2 states) b * d
+       mulxu   r0,er2  ; (22 states)
  
-_nobit:
-       ; a >>= 1
-       shlr.l  A0P
+       mov.w   e0,r3   ; ( 2 states) a * d
+       beq     L_skip1 ; ( 4 states)
+       mulxu   r1,er3  ; (22 states)
+       add.w   r3,e2   ; ( 2 states)
  
-       ; b <<= 1
-       shll.l  A1P
-       bra     _top
+L_skip1:
+       mov.w   e1,r3   ; ( 2 states) c * b
+       beq     L_skip2 ; ( 4 states)
+       mulxu   r0,er3  ; (22 states)
+       add.w   r3,e2   ; ( 2 states)
  
-_done:
-       mov.l   A2P,A0P
-       rts
+L_skip2:
+       mov.l   er2,er0 ; ( 2 states)
+       rts             ; (10 states)
  
  #endif
  #endif /* L_mulsi3 */
+#ifdef L_fixunssfsi_asm
+/* For the h8300 we use asm to save some bytes, to
+   allow more programs to fit into the tiny address
+   space.  For the H8/300H and H8S, the C version is good enough.  */
+#ifdef __H8300__
+/* We still treat NANs different than libgcc2.c, but then, the
+   behavior is undefined anyways.  */
+       .global ___fixunssfsi
+___fixunssfsi:
+       cmp.b #0x47,r0h
+       bge Large_num
+       jmp     @___fixsfsi
+Large_num:
+       bhi L_huge_num
+       xor.b #0x80,A0L
+       bmi L_shift8
+L_huge_num:
+       mov.w #65535,A0
+       mov.w A0,A1
+       rts
+L_shift8:
+       mov.b A0L,A0H
+       mov.b A1H,A0L
+       mov.b A1L,A1H
+       mov.b #0,A1L
+       rts
+#endif
+#endif /* L_fixunssfsi_asm */