gcc/config/xtensa/lib1funcs.asm

   1 /* Assembly functions for the Xtensa version of libgcc1.
   2    Copyright (C) 2001,2002,2003 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 In addition to the permissions in the GNU General Public License, the
  13 Free Software Foundation gives you unlimited permission to link the
  14 compiled version of this file into combinations with other programs,
  15 and to distribute those combinations without any restriction coming
  16 from the use of this file.  (The General Public License restrictions
  17 do apply in other respects; for example, they cover modification of
  18 the file, and distribution when not linked into a combine
  19 executable.)
  20
  21 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  22 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  23 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  24 for more details.
  25
  26 You should have received a copy of the GNU General Public License
  27 along with GCC; see the file COPYING.  If not, write to the Free
  28 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  29 02111-1307, USA.  */
  30
  31 #include "xtensa/xtensa-config.h"
  32
  33         # Define macros for the ABS and ADDX* instructions to handle cases
  34         # where they are not included in the Xtensa processor configuration.
  35
  36         .macro  do_abs dst, src, tmp
  37 #if XCHAL_HAVE_ABS
  38         abs     \dst, \src
  39 #else
  40         neg     \tmp, \src
  41         movgez  \tmp, \src, \src
  42         mov     \dst, \tmp
  43 #endif
  44         .endm
  45
  46         .macro  do_addx2 dst, as, at, tmp
  47 #if XCHAL_HAVE_ADDX
  48         addx2   \dst, \as, \at
  49 #else
  50         slli    \tmp, \as, 1
  51         add     \dst, \tmp, \at
  52 #endif
  53         .endm
  54
  55         .macro  do_addx4 dst, as, at, tmp
  56 #if XCHAL_HAVE_ADDX
  57         addx4   \dst, \as, \at
  58 #else
  59         slli    \tmp, \as, 2
  60         add     \dst, \tmp, \at
  61 #endif
  62         .endm
  63
  64         .macro  do_addx8 dst, as, at, tmp
  65 #if XCHAL_HAVE_ADDX
  66         addx8   \dst, \as, \at
  67 #else
  68         slli    \tmp, \as, 3
  69         add     \dst, \tmp, \at
  70 #endif
  71         .endm
  72
  73 #ifdef L_mulsi3
  74         .align  4
  75         .global __mulsi3
  76         .type   __mulsi3,@function
  77 __mulsi3:
  78         entry   sp, 16
  79
  80 #if XCHAL_HAVE_MUL16
  81         or      a4, a2, a3
  82         srai    a4, a4, 16
  83         bnez    a4, .LMUL16
  84         mul16u  a2, a2, a3
  85         retw
  86 .LMUL16:
  87         srai    a4, a2, 16
  88         srai    a5, a3, 16
  89         mul16u  a7, a4, a3
  90         mul16u  a6, a5, a2
  91         mul16u  a4, a2, a3
  92         add     a7, a7, a6
  93         slli    a7, a7, 16
  94         add     a2, a7, a4
  95
  96 #elif XCHAL_HAVE_MAC16
  97         mul.aa.hl a2, a3
  98         mula.aa.lh a2, a3
  99         rsr     a5, 16 # ACCLO
 100         umul.aa.ll a2, a3
 101         rsr     a4, 16 # ACCLO
 102         slli    a5, a5, 16
 103         add     a2, a4, a5
 104
 105 #else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
 106
 107         # Multiply one bit at a time, but unroll the loop 4x to better
 108         # exploit the addx instructions and avoid overhead.
 109         # Peel the first iteration to save a cycle on init.
 110
 111         # Avoid negative numbers.
 112         xor     a5, a2, a3  # top bit is 1 iff one of the inputs is negative
 113         do_abs  a3, a3, a6
 114         do_abs  a2, a2, a6
 115
 116         # Swap so the second argument is smaller.
 117         sub     a7, a2, a3
 118         mov     a4, a3
 119         movgez  a4, a2, a7  # a4 = max(a2, a3)
 120         movltz  a3, a2, a7  # a3 = min(a2, a3)
 121
 122         movi    a2, 0
 123         extui   a6, a3, 0, 1
 124         movnez  a2, a4, a6
 125
 126         do_addx2 a7, a4, a2, a7
 127         extui   a6, a3, 1, 1
 128         movnez  a2, a7, a6
 129
 130         do_addx4 a7, a4, a2, a7
 131         extui   a6, a3, 2, 1
 132         movnez  a2, a7, a6
 133
 134         do_addx8 a7, a4, a2, a7
 135         extui   a6, a3, 3, 1
 136         movnez  a2, a7, a6
 137
 138         bgeui   a3, 16, .Lmult_main_loop
 139         neg     a3, a2
 140         movltz  a2, a3, a5
 141         retw
 142
 143         .align  4
 144 .Lmult_main_loop:
 145         srli    a3, a3, 4
 146         slli    a4, a4, 4
 147
 148         add     a7, a4, a2
 149         extui   a6, a3, 0, 1
 150         movnez  a2, a7, a6
 151
 152         do_addx2 a7, a4, a2, a7
 153         extui   a6, a3, 1, 1
 154         movnez  a2, a7, a6
 155
 156         do_addx4 a7, a4, a2, a7
 157         extui   a6, a3, 2, 1
 158         movnez  a2, a7, a6
 159
 160         do_addx8 a7, a4, a2, a7
 161         extui   a6, a3, 3, 1
 162         movnez  a2, a7, a6
 163
 164         bgeui   a3, 16, .Lmult_main_loop
 165
 166         neg     a3, a2
 167         movltz  a2, a3, a5
 168
 169 #endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
 170
 171         retw
 172         .size   __mulsi3,.-__mulsi3
 173
 174 #endif /* L_mulsi3 */
 175
 176
 177         # Define a macro for the NSAU (unsigned normalize shift amount)
 178         # instruction, which computes the number of leading zero bits,
 179         # to handle cases where it is not included in the Xtensa processor
 180         # configuration.
 181
 182         .macro  do_nsau cnt, val, tmp, a
 183 #if XCHAL_HAVE_NSA
 184         nsau    \cnt, \val
 185 #else
 186         mov     \a, \val
 187         movi    \cnt, 0
 188         extui   \tmp, \a, 16, 16
 189         bnez    \tmp, 0f
 190         movi    \cnt, 16
 191         slli    \a, \a, 16
 192 0:
 193         extui   \tmp, \a, 24, 8
 194         bnez    \tmp, 1f
 195         addi    \cnt, \cnt, 8
 196         slli    \a, \a, 8
 197 1:
 198         movi    \tmp, __nsau_data
 199         extui   \a, \a, 24, 8
 200         add     \tmp, \tmp, \a
 201         l8ui    \tmp, \tmp, 0
 202         add     \cnt, \cnt, \tmp
 203 #endif /* !XCHAL_HAVE_NSA */
 204         .endm
 205
 206 #ifdef L_nsau
 207         .section .rodata
 208         .align  4
 209         .global __nsau_data
 210         .type   __nsau_data,@object
 211 __nsau_data:
 212 #if !XCHAL_HAVE_NSA
 213         .byte   8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
 214         .byte   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
 215         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 216         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 217         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 218         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 219         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 220         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 221         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 222         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 223         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 224         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 225         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 226         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 227         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 228         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 229 #endif /* !XCHAL_HAVE_NSA */
 230         .size   __nsau_data,.-__nsau_data
 231         .hidden __nsau_data
 232 #endif /* L_nsau */
 233
 234
 235 #ifdef L_udivsi3
 236         .align  4
 237         .global __udivsi3
 238         .type   __udivsi3,@function
 239 __udivsi3:
 240         entry   sp, 16
 241         bltui   a3, 2, .Lle_one # check if the divisor <= 1
 242
 243         mov     a6, a2          # keep dividend in a6
 244         do_nsau a5, a6, a2, a7  # dividend_shift = nsau(dividend)
 245         do_nsau a4, a3, a2, a7  # divisor_shift = nsau(divisor)
 246         bgeu    a5, a4, .Lspecial
 247
 248         sub     a4, a4, a5      # count = divisor_shift - dividend_shift
 249         ssl     a4
 250         sll     a3, a3          # divisor <<= count
 251         movi    a2, 0           # quotient = 0
 252
 253         # test-subtract-and-shift loop; one quotient bit on each iteration
 254 #if XCHAL_HAVE_LOOPS
 255         loopnez a4, .Lloopend
 256 #endif /* XCHAL_HAVE_LOOPS */
 257 .Lloop:
 258         bltu    a6, a3, .Lzerobit
 259         sub     a6, a6, a3
 260         addi    a2, a2, 1
 261 .Lzerobit:
 262         slli    a2, a2, 1
 263         srli    a3, a3, 1
 264 #if !XCHAL_HAVE_LOOPS
 265         addi    a4, a4, -1
 266         bnez    a4, .Lloop
 267 #endif /* !XCHAL_HAVE_LOOPS */
 268 .Lloopend:
 269
 270         bltu    a6, a3, .Lreturn
 271         addi    a2, a2, 1       # increment quotient if dividend >= divisor
 272 .Lreturn:
 273         retw
 274
 275 .Lspecial:
 276         # return dividend >= divisor
 277         movi    a2, 0
 278         bltu    a6, a3, .Lreturn2
 279         movi    a2, 1
 280 .Lreturn2:
 281         retw
 282
 283 .Lle_one:
 284         beqz    a3, .Lerror     # if divisor == 1, return the dividend
 285         retw
 286 .Lerror:
 287         movi    a2, 0           # just return 0; could throw an exception
 288         retw
 289         .size   __udivsi3,.-__udivsi3
 290
 291 #endif /* L_udivsi3 */
 292
 293
 294 #ifdef L_divsi3
 295         .align  4
 296         .global __divsi3
 297         .type   __divsi3,@function
 298 __divsi3:
 299         entry   sp, 16
 300         xor     a7, a2, a3      # sign = dividend ^ divisor
 301         do_abs  a6, a2, a4      # udividend = abs(dividend)
 302         do_abs  a3, a3, a4      # udivisor = abs(divisor)
 303         bltui   a3, 2, .Lle_one # check if udivisor <= 1
 304         do_nsau a5, a6, a2, a8  # udividend_shift = nsau(udividend)
 305         do_nsau a4, a3, a2, a8  # udivisor_shift = nsau(udivisor)
 306         bgeu    a5, a4, .Lspecial
 307
 308         sub     a4, a4, a5      # count = udivisor_shift - udividend_shift
 309         ssl     a4
 310         sll     a3, a3          # udivisor <<= count
 311         movi    a2, 0           # quotient = 0
 312
 313         # test-subtract-and-shift loop; one quotient bit on each iteration
 314 #if XCHAL_HAVE_LOOPS
 315         loopnez a4, .Lloopend
 316 #endif /* XCHAL_HAVE_LOOPS */
 317 .Lloop:
 318         bltu    a6, a3, .Lzerobit
 319         sub     a6, a6, a3
 320         addi    a2, a2, 1
 321 .Lzerobit:
 322         slli    a2, a2, 1
 323         srli    a3, a3, 1
 324 #if !XCHAL_HAVE_LOOPS
 325         addi    a4, a4, -1
 326         bnez    a4, .Lloop
 327 #endif /* !XCHAL_HAVE_LOOPS */
 328 .Lloopend:
 329
 330         bltu    a6, a3, .Lreturn
 331         addi    a2, a2, 1       # increment quotient if udividend >= udivisor
 332 .Lreturn:
 333         neg     a5, a2
 334         movltz  a2, a5, a7      # return (sign < 0) ? -quotient : quotient
 335         retw
 336
 337 .Lspecial:
 338         movi    a2, 0
 339         bltu    a6, a3, .Lreturn2 #  if dividend < divisor, return 0
 340         movi    a2, 1
 341         movi    a4, -1
 342         movltz  a2, a4, a7      # else return (sign < 0) ? -1 :  1
 343 .Lreturn2:
 344         retw
 345
 346 .Lle_one:
 347         beqz    a3, .Lerror
 348         neg     a2, a6          # if udivisor == 1, then return...
 349         movgez  a2, a6, a7      # (sign < 0) ? -udividend : udividend
 350         retw
 351 .Lerror:
 352         movi    a2, 0           # just return 0; could throw an exception
 353         retw
 354         .size   __divsi3,.-__divsi3
 355
 356 #endif /* L_divsi3 */
 357
 358
 359 #ifdef L_umodsi3
 360         .align  4
 361         .global __umodsi3
 362         .type   __umodsi3,@function
 363 __umodsi3:
 364         entry   sp, 16
 365         bltui   a3, 2, .Lle_one # check if the divisor is <= 1
 366
 367         do_nsau a5, a2, a6, a7  # dividend_shift = nsau(dividend)
 368         do_nsau a4, a3, a6, a7  # divisor_shift = nsau(divisor)
 369         bgeu    a5, a4, .Lspecial
 370
 371         sub     a4, a4, a5      # count = divisor_shift - dividend_shift
 372         ssl     a4
 373         sll     a3, a3          # divisor <<= count
 374
 375         # test-subtract-and-shift loop
 376 #if XCHAL_HAVE_LOOPS
 377         loopnez a4, .Lloopend
 378 #endif /* XCHAL_HAVE_LOOPS */
 379 .Lloop:
 380         bltu    a2, a3, .Lzerobit
 381         sub     a2, a2, a3
 382 .Lzerobit:
 383         srli    a3, a3, 1
 384 #if !XCHAL_HAVE_LOOPS
 385         addi    a4, a4, -1
 386         bnez    a4, .Lloop
 387 #endif /* !XCHAL_HAVE_LOOPS */
 388 .Lloopend:
 389
 390         bltu    a2, a3, .Lreturn
 391         sub     a2, a2, a3      # subtract once more if dividend >= divisor
 392 .Lreturn:
 393         retw
 394
 395 .Lspecial:
 396         bltu    a2, a3, .Lreturn2
 397         sub     a2, a2, a3      # subtract once if dividend >= divisor
 398 .Lreturn2:
 399         retw
 400
 401 .Lle_one:
 402         # the divisor is either 0 or 1, so just return 0.
 403         # someday we may want to throw an exception if the divisor is 0.
 404         movi    a2, 0
 405         retw
 406         .size   __umodsi3,.-__umodsi3
 407
 408 #endif /* L_umodsi3 */
 409
 410
 411 #ifdef L_modsi3
 412         .align  4
 413         .global __modsi3
 414         .type   __modsi3,@function
 415 __modsi3:
 416         entry   sp, 16
 417         mov     a7, a2          # save original (signed) dividend
 418         do_abs  a2, a2, a4      # udividend = abs(dividend)
 419         do_abs  a3, a3, a4      # udivisor = abs(divisor)
 420         bltui   a3, 2, .Lle_one # check if udivisor <= 1
 421         do_nsau a5, a2, a6, a8  # udividend_shift = nsau(udividend)
 422         do_nsau a4, a3, a6, a8  # udivisor_shift = nsau(udivisor)
 423         bgeu    a5, a4, .Lspecial
 424
 425         sub     a4, a4, a5      # count = udivisor_shift - udividend_shift
 426         ssl     a4
 427         sll     a3, a3          # udivisor <<= count
 428
 429         # test-subtract-and-shift loop
 430 #if XCHAL_HAVE_LOOPS
 431         loopnez a4, .Lloopend
 432 #endif /* XCHAL_HAVE_LOOPS */
 433 .Lloop:
 434         bltu    a2, a3, .Lzerobit
 435         sub     a2, a2, a3
 436 .Lzerobit:
 437         srli    a3, a3, 1
 438 #if !XCHAL_HAVE_LOOPS
 439         addi    a4, a4, -1
 440         bnez    a4, .Lloop
 441 #endif /* !XCHAL_HAVE_LOOPS */
 442 .Lloopend:
 443
 444         bltu    a2, a3, .Lreturn
 445         sub     a2, a2, a3      # subtract once more if udividend >= udivisor
 446 .Lreturn:
 447         bgez    a7, .Lpositive
 448         neg     a2, a2          # if (dividend < 0), return -udividend
 449 .Lpositive:
 450         retw
 451
 452 .Lspecial:
 453         bltu    a2, a3, .Lreturn2
 454         sub     a2, a2, a3      # subtract once if dividend >= divisor
 455 .Lreturn2:
 456         bgez    a7, .Lpositive2
 457         neg     a2, a2          # if (dividend < 0), return -udividend
 458 .Lpositive2:
 459         retw
 460
 461 .Lle_one:
 462         # udivisor is either 0 or 1, so just return 0.
 463         # someday we may want to throw an exception if udivisor is 0.
 464         movi    a2, 0
 465         retw
 466         .size   __modsi3,.-__modsi3
 467
 468 #endif /* L_modsi3 */