gcc/config/score/mul-div.S

   1 /* Copyright (C) 2005 Free Software Foundation, Inc.
   2    Contributed by Sunnorth
   3
   4    This file is part of GCC.
   5
   6    GCC is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published
   8    by the Free Software Foundation; either version 2, or (at your
   9    option) any later version.
  10
  11    GCC is distributed in the hope that it will be useful, but WITHOUT
  12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  14    License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GCC; see the file COPYING.  If not, write to
  18    the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  19    Boston, MA 02110-1301, USA.  */
  20
  21 #define ra r3
  22 #define a0 r4
  23 #define a1 r5
  24 #define a2 r6
  25 #define a3 r7
  26 #define v0 r23
  27
  28 #define t0 r8
  29 #define t1 r9
  30 #define t2 r10
  31 #define t3 r11
  32
  33 #define t4 r22
  34
  35 #if defined(__scorebe__)
  36 #define LIBGCC1_BIG_ENDIAN
  37 #define out_H        v0
  38 #define out_L        v1
  39 #define in0_H        a0
  40 #define in0_L        a1
  41 #define in1_H        a2
  42 #define in1_L        a3
  43 #elif defined(__scorele__)
  44 #define out_H        v1
  45 #define out_L        v0
  46 #define in0_H        a1
  47 #define in0_L        a0
  48 #define in1_H        a3
  49 #define in1_L        a2
  50 #else
  51 #err  "must specify S+core endian!"
  52 #endif
  53
  54 #if !defined(L_mulsi3) && !defined(L_divsi3)
  55                .text
  56                .global _flush_cache
  57 _flush_cache:
  58                 srli    r9, r5, 4
  59                 mv      r8, r4
  60                 mtsr    r9, sr0
  61 1:
  62                 cache   0xe, [r8, 0]                  # write back invalid dcache
  63                 addi    r8, 16
  64                 bcnz    1b
  65                 mfcr    r8, cr4
  66                 bittst! r8, 0x3                       # if LDM is enable, write back LDM
  67                 beq!    6f
  68                 ldi     r10, 0
  69                 cache   0xc, [r10, 0]
  70 6:
  71                 bittst! r8, 0x2                       # if LIM is enable, refill it
  72                 beq!    7f
  73                 cache   0x4, [r10, 0]
  74 7:
  75                 #nop!
  76                 #nop!
  77                 #nop!
  78                 #nop!
  79                 #nop!
  80                 mv      r8, r4
  81                 mtsr    r9, sr0
  82 2:
  83                 cache   0x2, [r8, 0]                  # invalid unlock icache
  84                 #nop!
  85                 #nop!
  86                 #nop!
  87                 #nop!
  88                 #nop!
  89                 addi    r8, 16
  90                 bcnz    2b
  91                 br      r3
  92 #endif
  93
  94 /* FUNCTION
  95    (U) INT32 v0 = __mulsi3 ((U) INT32 a0, (U) INT32 a1);
  96    REGISTERS:
  97         use             t0
  98         modify          a0
  99                         a1        -> become 0
 100    NOTE:
 101          this seems to give better performance to just rotate and add.  */
 102
 103 #ifdef L_mulsi3
 104                 .text
 105                 .global __umulsi3
 106                 .global __mulsi3
 107                 /* signed multiplication (32x32)  */
 108                 .ent    __mulsi3
 109 __umulsi3:
 110 __mulsi3:
 111                 li      t1, 0
 112 __mulsi3_loop:
 113                 andri.c t0, a1, 1                 /* t0 = multiplier[0]     */
 114                 srli    a1, a1, 1                 /* a1 /= 2                */
 115                 beq     __mulsi3_loop2            /* skip if (t0 == 0)      */
 116                 add     t1, t1, a0                /* add multiplicand       */
 117 __mulsi3_loop2:
 118                 slli    a0, a0, 1                 /* multiplicand mul 2     */
 119                 cmpi.c  a1, 0
 120                 bne     __mulsi3_loop
 121                 mv      r4, t1
 122                 br      ra
 123                 .end    __mulsi3
 124 #endif /* L_mulsi3 */
 125
 126
 127 /* FUNCTION
 128    UINT32 (v0) = __udivsi3 (UINT32 (a0), UINT32 (a1));
 129    INT32 (v0) = __divsi3 (INT32 (a0),  INT32 (a1));
 130    UINT32 (v0) = __umodsi3 (UINT32 (a0), UINT32 (a1));
 131    INT32 (v0) = __modsi3 (INT32 (a0),  INT32 (a1));
 132    DESCRIPTION
 133         performs 32-bit division/modulo.
 134    REGISTERS
 135         used        t0        bit-index
 136                     t1
 137         modify      a0         becomes remainer  */
 138 #ifdef L_divsi3
 139                 .text
 140                 .global __udivsi3
 141                 .global __umodsi3
 142                 .global __divsi3
 143                 .global __modsi3
 144
 145                 /* unsigned division  */
 146                 .ent    __udivsi3
 147 __udivsi3:
 148                 li      t4, 0
 149                 cmpi.c  a1, 0
 150                 beq     __uds_exit
 151                 li      t0, 1
 152                 blt     __uds_ok
 153 __uds_normalize:
 154                 cmp.c   a0, a1
 155                 bcc     __uds_ok
 156                 slli    a1, a1, 1
 157                 slli    t0, t0, 1
 158                 cmpi.c  a1, 0
 159                 bge     __uds_normalize
 160 __uds_ok:
 161 __uds_loop2:
 162                 cmp.c   a0, a1
 163                 bcc     __uds_loop3
 164                 sub     a0, a0, a1
 165                 or      t4, t4, t0
 166 __uds_loop3:
 167                 srli    t0, t0, 1
 168                 srli    a1, a1, 1
 169                 cmpi.c  t0, 0
 170                 bne     __uds_loop2
 171 __uds_exit:
 172                 mv      a1, a0
 173                 mv      r4, t4
 174                 br      ra
 175                 .end    __udivsi3
 176
 177                 /* unsigned modulus  */
 178                 .ent    __umodsi3
 179 __umodsi3:
 180                 mv      t3, ra
 181                 jl      __udivsi3
 182                 mv      r4, a1
 183                 br      t3
 184                 .end    __umodsi3
 185
 186                 /* abs and div  */
 187                 .ent    __orgsi3
 188 __orgsi3:
 189                 cmpi.c  a0, 0
 190                 bge     __orgsi3_a0p
 191                 neg     a0, a0
 192 __orgsi3_a0p:
 193                 cmpi.c  a1, 0
 194                 bge     __udivsi3
 195                 neg     a1, a1
 196                 b       __udivsi3                /* goto udivsi3  */
 197                 .end    __orgsi3
 198
 199                 /* signed division  */
 200                 .ent    __divsi3
 201 __divsi3:
 202                 mv      t3, ra
 203                 xor     t2, a0, a1
 204                 jl      __orgsi3
 205 __divsi3_adjust:
 206                 cmpi.c  t2, 0
 207                 bge     __divsi3_exit
 208                 neg     r4, r4
 209 __divsi3_exit:
 210                 br      t3
 211                 .end    __divsi3
 212
 213                 /* signed modulus  */
 214                 .ent    __modsi3
 215 __modsi3:
 216                 mv      t3, ra
 217                 mv      t2, a0
 218                 jl      __orgsi3
 219                 mv      r4, a1
 220                 b       __divsi3_adjust
 221                 .end    __modsi3
 222
 223 #endif /* L_divsi3 */
 224