gcc/config/xtensa/lib1funcs.asm

   1 /* Assembly functions for the Xtensa version of libgcc1.
   2    Copyright (C) 2001,2002,2003 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 In addition to the permissions in the GNU General Public License, the
  13 Free Software Foundation gives you unlimited permission to link the
  14 compiled version of this file into combinations with other programs,
  15 and to distribute those combinations without any restriction coming
  16 from the use of this file.  (The General Public License restrictions
  17 do apply in other respects; for example, they cover modification of
  18 the file, and distribution when not linked into a combine
  19 executable.)
  20
  21 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  22 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  23 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  24 for more details.
  25
  26 You should have received a copy of the GNU General Public License
  27 along with GCC; see the file COPYING.  If not, write to the Free
  28 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  29 02111-1307, USA.  */
  30
  31 #include "xtensa-config.h"
  32
  33 # Note: These functions use a minimum stack frame size of 32.  This is
  34 # necessary for Xtensa configurations that only support a fixed register
  35 # window size of 8, where even leaf functions (such as these) need to
  36 # allocate space for a 4-word "extra save area".
  37
  38 # Define macros for the ABS and ADDX* instructions to handle cases
  39 # where they are not included in the Xtensa processor configuration.
  40
  41         .macro  do_abs dst, src, tmp
  42 #if XCHAL_HAVE_ABS
  43         abs     \dst, \src
  44 #else
  45         neg     \tmp, \src
  46         movgez  \tmp, \src, \src
  47         mov     \dst, \tmp
  48 #endif
  49         .endm
  50
  51         .macro  do_addx2 dst, as, at, tmp
  52 #if XCHAL_HAVE_ADDX
  53         addx2   \dst, \as, \at
  54 #else
  55         slli    \tmp, \as, 1
  56         add     \dst, \tmp, \at
  57 #endif
  58         .endm
  59
  60         .macro  do_addx4 dst, as, at, tmp
  61 #if XCHAL_HAVE_ADDX
  62         addx4   \dst, \as, \at
  63 #else
  64         slli    \tmp, \as, 2
  65         add     \dst, \tmp, \at
  66 #endif
  67         .endm
  68
  69         .macro  do_addx8 dst, as, at, tmp
  70 #if XCHAL_HAVE_ADDX
  71         addx8   \dst, \as, \at
  72 #else
  73         slli    \tmp, \as, 3
  74         add     \dst, \tmp, \at
  75 #endif
  76         .endm
  77
  78 # Define macros for function entry and return, supporting either the
  79 # standard register windowed ABI or the non-windowed call0 ABI.  These
  80 # macros do not allocate any extra stack space, so they only work for
  81 # leaf functions that do not need to spill anything to the stack.
  82
  83         .macro abi_entry reg, size
  84 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
  85         entry \reg, \size
  86 #else
  87         /* do nothing */
  88 #endif
  89         .endm
  90
  91         .macro abi_return
  92 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
  93         retw
  94 #else
  95         ret
  96 #endif
  97         .endm
  98
  99
 100 #ifdef L_mulsi3
 101         .align  4
 102         .global __mulsi3
 103         .type   __mulsi3,@function
 104 __mulsi3:
 105         abi_entry sp, 32
 106
 107 #if XCHAL_HAVE_MUL16
 108         or      a4, a2, a3
 109         srai    a4, a4, 16
 110         bnez    a4, .LMUL16
 111         mul16u  a2, a2, a3
 112         abi_return
 113 .LMUL16:
 114         srai    a4, a2, 16
 115         srai    a5, a3, 16
 116         mul16u  a7, a4, a3
 117         mul16u  a6, a5, a2
 118         mul16u  a4, a2, a3
 119         add     a7, a7, a6
 120         slli    a7, a7, 16
 121         add     a2, a7, a4
 122
 123 #elif XCHAL_HAVE_MAC16
 124         mul.aa.hl a2, a3
 125         mula.aa.lh a2, a3
 126         rsr     a5, ACCLO
 127         umul.aa.ll a2, a3
 128         rsr     a4, ACCLO
 129         slli    a5, a5, 16
 130         add     a2, a4, a5
 131
 132 #else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
 133
 134         # Multiply one bit at a time, but unroll the loop 4x to better
 135         # exploit the addx instructions and avoid overhead.
 136         # Peel the first iteration to save a cycle on init.
 137
 138         # Avoid negative numbers.
 139         xor     a5, a2, a3  # top bit is 1 iff one of the inputs is negative
 140         do_abs  a3, a3, a6
 141         do_abs  a2, a2, a6
 142
 143         # Swap so the second argument is smaller.
 144         sub     a7, a2, a3
 145         mov     a4, a3
 146         movgez  a4, a2, a7  # a4 = max(a2, a3)
 147         movltz  a3, a2, a7  # a3 = min(a2, a3)
 148
 149         movi    a2, 0
 150         extui   a6, a3, 0, 1
 151         movnez  a2, a4, a6
 152
 153         do_addx2 a7, a4, a2, a7
 154         extui   a6, a3, 1, 1
 155         movnez  a2, a7, a6
 156
 157         do_addx4 a7, a4, a2, a7
 158         extui   a6, a3, 2, 1
 159         movnez  a2, a7, a6
 160
 161         do_addx8 a7, a4, a2, a7
 162         extui   a6, a3, 3, 1
 163         movnez  a2, a7, a6
 164
 165         bgeui   a3, 16, .Lmult_main_loop
 166         neg     a3, a2
 167         movltz  a2, a3, a5
 168         abi_return
 169
 170         .align  4
 171 .Lmult_main_loop:
 172         srli    a3, a3, 4
 173         slli    a4, a4, 4
 174
 175         add     a7, a4, a2
 176         extui   a6, a3, 0, 1
 177         movnez  a2, a7, a6
 178
 179         do_addx2 a7, a4, a2, a7
 180         extui   a6, a3, 1, 1
 181         movnez  a2, a7, a6
 182
 183         do_addx4 a7, a4, a2, a7
 184         extui   a6, a3, 2, 1
 185         movnez  a2, a7, a6
 186
 187         do_addx8 a7, a4, a2, a7
 188         extui   a6, a3, 3, 1
 189         movnez  a2, a7, a6
 190
 191         bgeui   a3, 16, .Lmult_main_loop
 192
 193         neg     a3, a2
 194         movltz  a2, a3, a5
 195
 196 #endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
 197
 198         abi_return
 199         .size   __mulsi3,.-__mulsi3
 200
 201 #endif /* L_mulsi3 */
 202
 203
 204 # Define a macro for the NSAU (unsigned normalize shift amount)
 205 # instruction, which computes the number of leading zero bits,
 206 # to handle cases where it is not included in the Xtensa processor
 207 # configuration.
 208
 209         .macro  do_nsau cnt, val, tmp, a
 210 #if XCHAL_HAVE_NSA
 211         nsau    \cnt, \val
 212 #else
 213         mov     \a, \val
 214         movi    \cnt, 0
 215         extui   \tmp, \a, 16, 16
 216         bnez    \tmp, 0f
 217         movi    \cnt, 16
 218         slli    \a, \a, 16
 219 0:
 220         extui   \tmp, \a, 24, 8
 221         bnez    \tmp, 1f
 222         addi    \cnt, \cnt, 8
 223         slli    \a, \a, 8
 224 1:
 225         movi    \tmp, __nsau_data
 226         extui   \a, \a, 24, 8
 227         add     \tmp, \tmp, \a
 228         l8ui    \tmp, \tmp, 0
 229         add     \cnt, \cnt, \tmp
 230 #endif /* !XCHAL_HAVE_NSA */
 231         .endm
 232
 233 #ifdef L_nsau
 234         .section .rodata
 235         .align  4
 236         .global __nsau_data
 237         .type   __nsau_data,@object
 238 __nsau_data:
 239 #if !XCHAL_HAVE_NSA
 240         .byte   8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
 241         .byte   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
 242         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 243         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 244         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 245         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 246         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 247         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 248         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 249         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 250         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 251         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 252         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 253         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 254         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 255         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 256 #endif /* !XCHAL_HAVE_NSA */
 257         .size   __nsau_data,.-__nsau_data
 258         .hidden __nsau_data
 259 #endif /* L_nsau */
 260
 261
 262 #ifdef L_udivsi3
 263         .align  4
 264         .global __udivsi3
 265         .type   __udivsi3,@function
 266 __udivsi3:
 267         abi_entry sp, 32
 268         bltui   a3, 2, .Lle_one # check if the divisor <= 1
 269
 270         mov     a6, a2          # keep dividend in a6
 271         do_nsau a5, a6, a2, a7  # dividend_shift = nsau(dividend)
 272         do_nsau a4, a3, a2, a7  # divisor_shift = nsau(divisor)
 273         bgeu    a5, a4, .Lspecial
 274
 275         sub     a4, a4, a5      # count = divisor_shift - dividend_shift
 276         ssl     a4
 277         sll     a3, a3          # divisor <<= count
 278         movi    a2, 0           # quotient = 0
 279
 280         # test-subtract-and-shift loop; one quotient bit on each iteration
 281 #if XCHAL_HAVE_LOOPS
 282         loopnez a4, .Lloopend
 283 #endif /* XCHAL_HAVE_LOOPS */
 284 .Lloop:
 285         bltu    a6, a3, .Lzerobit
 286         sub     a6, a6, a3
 287         addi    a2, a2, 1
 288 .Lzerobit:
 289         slli    a2, a2, 1
 290         srli    a3, a3, 1
 291 #if !XCHAL_HAVE_LOOPS
 292         addi    a4, a4, -1
 293         bnez    a4, .Lloop
 294 #endif /* !XCHAL_HAVE_LOOPS */
 295 .Lloopend:
 296
 297         bltu    a6, a3, .Lreturn
 298         addi    a2, a2, 1       # increment quotient if dividend >= divisor
 299 .Lreturn:
 300         abi_return
 301
 302 .Lspecial:
 303         # return dividend >= divisor
 304         movi    a2, 0
 305         bltu    a6, a3, .Lreturn2
 306         movi    a2, 1
 307 .Lreturn2:
 308         abi_return
 309
 310 .Lle_one:
 311         beqz    a3, .Lerror     # if divisor == 1, return the dividend
 312         abi_return
 313 .Lerror:
 314         movi    a2, 0           # just return 0; could throw an exception
 315         abi_return
 316         .size   __udivsi3,.-__udivsi3
 317
 318 #endif /* L_udivsi3 */
 319
 320
 321 #ifdef L_divsi3
 322         .align  4
 323         .global __divsi3
 324         .type   __divsi3,@function
 325 __divsi3:
 326         abi_entry sp, 32
 327         xor     a7, a2, a3      # sign = dividend ^ divisor
 328         do_abs  a6, a2, a4      # udividend = abs(dividend)
 329         do_abs  a3, a3, a4      # udivisor = abs(divisor)
 330         bltui   a3, 2, .Lle_one # check if udivisor <= 1
 331         do_nsau a5, a6, a2, a8  # udividend_shift = nsau(udividend)
 332         do_nsau a4, a3, a2, a8  # udivisor_shift = nsau(udivisor)
 333         bgeu    a5, a4, .Lspecial
 334
 335         sub     a4, a4, a5      # count = udivisor_shift - udividend_shift
 336         ssl     a4
 337         sll     a3, a3          # udivisor <<= count
 338         movi    a2, 0           # quotient = 0
 339
 340         # test-subtract-and-shift loop; one quotient bit on each iteration
 341 #if XCHAL_HAVE_LOOPS
 342         loopnez a4, .Lloopend
 343 #endif /* XCHAL_HAVE_LOOPS */
 344 .Lloop:
 345         bltu    a6, a3, .Lzerobit
 346         sub     a6, a6, a3
 347         addi    a2, a2, 1
 348 .Lzerobit:
 349         slli    a2, a2, 1
 350         srli    a3, a3, 1
 351 #if !XCHAL_HAVE_LOOPS
 352         addi    a4, a4, -1
 353         bnez    a4, .Lloop
 354 #endif /* !XCHAL_HAVE_LOOPS */
 355 .Lloopend:
 356
 357         bltu    a6, a3, .Lreturn
 358         addi    a2, a2, 1       # increment quotient if udividend >= udivisor
 359 .Lreturn:
 360         neg     a5, a2
 361         movltz  a2, a5, a7      # return (sign < 0) ? -quotient : quotient
 362         abi_return
 363
 364 .Lspecial:
 365         movi    a2, 0
 366         bltu    a6, a3, .Lreturn2 #  if dividend < divisor, return 0
 367         movi    a2, 1
 368         movi    a4, -1
 369         movltz  a2, a4, a7      # else return (sign < 0) ? -1 :  1
 370 .Lreturn2:
 371         abi_return
 372
 373 .Lle_one:
 374         beqz    a3, .Lerror
 375         neg     a2, a6          # if udivisor == 1, then return...
 376         movgez  a2, a6, a7      # (sign < 0) ? -udividend : udividend
 377         abi_return
 378 .Lerror:
 379         movi    a2, 0           # just return 0; could throw an exception
 380         abi_return
 381         .size   __divsi3,.-__divsi3
 382
 383 #endif /* L_divsi3 */
 384
 385
 386 #ifdef L_umodsi3
 387         .align  4
 388         .global __umodsi3
 389         .type   __umodsi3,@function
 390 __umodsi3:
 391         abi_entry sp, 32
 392         bltui   a3, 2, .Lle_one # check if the divisor is <= 1
 393
 394         do_nsau a5, a2, a6, a7  # dividend_shift = nsau(dividend)
 395         do_nsau a4, a3, a6, a7  # divisor_shift = nsau(divisor)
 396         bgeu    a5, a4, .Lspecial
 397
 398         sub     a4, a4, a5      # count = divisor_shift - dividend_shift
 399         ssl     a4
 400         sll     a3, a3          # divisor <<= count
 401
 402         # test-subtract-and-shift loop
 403 #if XCHAL_HAVE_LOOPS
 404         loopnez a4, .Lloopend
 405 #endif /* XCHAL_HAVE_LOOPS */
 406 .Lloop:
 407         bltu    a2, a3, .Lzerobit
 408         sub     a2, a2, a3
 409 .Lzerobit:
 410         srli    a3, a3, 1
 411 #if !XCHAL_HAVE_LOOPS
 412         addi    a4, a4, -1
 413         bnez    a4, .Lloop
 414 #endif /* !XCHAL_HAVE_LOOPS */
 415 .Lloopend:
 416
 417         bltu    a2, a3, .Lreturn
 418         sub     a2, a2, a3      # subtract once more if dividend >= divisor
 419 .Lreturn:
 420         abi_return
 421
 422 .Lspecial:
 423         bltu    a2, a3, .Lreturn2
 424         sub     a2, a2, a3      # subtract once if dividend >= divisor
 425 .Lreturn2:
 426         abi_return
 427
 428 .Lle_one:
 429         # the divisor is either 0 or 1, so just return 0.
 430         # someday we may want to throw an exception if the divisor is 0.
 431         movi    a2, 0
 432         abi_return
 433         .size   __umodsi3,.-__umodsi3
 434
 435 #endif /* L_umodsi3 */
 436
 437
 438 #ifdef L_modsi3
 439         .align  4
 440         .global __modsi3
 441         .type   __modsi3,@function
 442 __modsi3:
 443         abi_entry sp, 32
 444         mov     a7, a2          # save original (signed) dividend
 445         do_abs  a2, a2, a4      # udividend = abs(dividend)
 446         do_abs  a3, a3, a4      # udivisor = abs(divisor)
 447         bltui   a3, 2, .Lle_one # check if udivisor <= 1
 448         do_nsau a5, a2, a6, a8  # udividend_shift = nsau(udividend)
 449         do_nsau a4, a3, a6, a8  # udivisor_shift = nsau(udivisor)
 450         bgeu    a5, a4, .Lspecial
 451
 452         sub     a4, a4, a5      # count = udivisor_shift - udividend_shift
 453         ssl     a4
 454         sll     a3, a3          # udivisor <<= count
 455
 456         # test-subtract-and-shift loop
 457 #if XCHAL_HAVE_LOOPS
 458         loopnez a4, .Lloopend
 459 #endif /* XCHAL_HAVE_LOOPS */
 460 .Lloop:
 461         bltu    a2, a3, .Lzerobit
 462         sub     a2, a2, a3
 463 .Lzerobit:
 464         srli    a3, a3, 1
 465 #if !XCHAL_HAVE_LOOPS
 466         addi    a4, a4, -1
 467         bnez    a4, .Lloop
 468 #endif /* !XCHAL_HAVE_LOOPS */
 469 .Lloopend:
 470
 471         bltu    a2, a3, .Lreturn
 472         sub     a2, a2, a3      # subtract once more if udividend >= udivisor
 473 .Lreturn:
 474         bgez    a7, .Lpositive
 475         neg     a2, a2          # if (dividend < 0), return -udividend
 476 .Lpositive:
 477         abi_return
 478
 479 .Lspecial:
 480         bltu    a2, a3, .Lreturn2
 481         sub     a2, a2, a3      # subtract once if dividend >= divisor
 482 .Lreturn2:
 483         bgez    a7, .Lpositive2
 484         neg     a2, a2          # if (dividend < 0), return -udividend
 485 .Lpositive2:
 486         abi_return
 487
 488 .Lle_one:
 489         # udivisor is either 0 or 1, so just return 0.
 490         # someday we may want to throw an exception if udivisor is 0.
 491         movi    a2, 0
 492         abi_return
 493         .size   __modsi3,.-__modsi3
 494
 495 #endif /* L_modsi3 */