gcc/config/xtensa/lib1funcs.asm

   1 /* Assembly functions for the Xtensa version of libgcc1.
   2    Copyright (C) 2001,2002,2003 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 In addition to the permissions in the GNU General Public License, the
  13 Free Software Foundation gives you unlimited permission to link the
  14 compiled version of this file into combinations with other programs,
  15 and to distribute those combinations without any restriction coming
  16 from the use of this file.  (The General Public License restrictions
  17 do apply in other respects; for example, they cover modification of
  18 the file, and distribution when not linked into a combine
  19 executable.)
  20
  21 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  22 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  23 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  24 for more details.
  25
  26 You should have received a copy of the GNU General Public License
  27 along with GCC; see the file COPYING.  If not, write to the Free
  28 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  29 02111-1307, USA.  */
  30
  31 #include "xtensa-config.h"
  32
  33 # Note: These functions use a minimum stack frame size of 32.  This is
  34 # necessary for Xtensa configurations that only support a fixed register
  35 # window size of 8, where even leaf functions (such as these) need to
  36 # allocate space for a 4-word "extra save area".
  37
  38 # Define macros for the ABS and ADDX* instructions to handle cases
  39 # where they are not included in the Xtensa processor configuration.
  40
  41         .macro  do_abs dst, src, tmp
  42 #if XCHAL_HAVE_ABS
  43         abs     \dst, \src
  44 #else
  45         neg     \tmp, \src
  46         movgez  \tmp, \src, \src
  47         mov     \dst, \tmp
  48 #endif
  49         .endm
  50
  51         .macro  do_addx2 dst, as, at, tmp
  52 #if XCHAL_HAVE_ADDX
  53         addx2   \dst, \as, \at
  54 #else
  55         slli    \tmp, \as, 1
  56         add     \dst, \tmp, \at
  57 #endif
  58         .endm
  59
  60         .macro  do_addx4 dst, as, at, tmp
  61 #if XCHAL_HAVE_ADDX
  62         addx4   \dst, \as, \at
  63 #else
  64         slli    \tmp, \as, 2
  65         add     \dst, \tmp, \at
  66 #endif
  67         .endm
  68
  69         .macro  do_addx8 dst, as, at, tmp
  70 #if XCHAL_HAVE_ADDX
  71         addx8   \dst, \as, \at
  72 #else
  73         slli    \tmp, \as, 3
  74         add     \dst, \tmp, \at
  75 #endif
  76         .endm
  77
  78 #ifdef L_mulsi3
  79         .align  4
  80         .global __mulsi3
  81         .type   __mulsi3,@function
  82 __mulsi3:
  83         entry   sp, 32
  84
  85 #if XCHAL_HAVE_MUL16
  86         or      a4, a2, a3
  87         srai    a4, a4, 16
  88         bnez    a4, .LMUL16
  89         mul16u  a2, a2, a3
  90         retw
  91 .LMUL16:
  92         srai    a4, a2, 16
  93         srai    a5, a3, 16
  94         mul16u  a7, a4, a3
  95         mul16u  a6, a5, a2
  96         mul16u  a4, a2, a3
  97         add     a7, a7, a6
  98         slli    a7, a7, 16
  99         add     a2, a7, a4
 100
 101 #elif XCHAL_HAVE_MAC16
 102         mul.aa.hl a2, a3
 103         mula.aa.lh a2, a3
 104         rsr     a5, 16 # ACCLO
 105         umul.aa.ll a2, a3
 106         rsr     a4, 16 # ACCLO
 107         slli    a5, a5, 16
 108         add     a2, a4, a5
 109
 110 #else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
 111
 112         # Multiply one bit at a time, but unroll the loop 4x to better
 113         # exploit the addx instructions and avoid overhead.
 114         # Peel the first iteration to save a cycle on init.
 115
 116         # Avoid negative numbers.
 117         xor     a5, a2, a3  # top bit is 1 iff one of the inputs is negative
 118         do_abs  a3, a3, a6
 119         do_abs  a2, a2, a6
 120
 121         # Swap so the second argument is smaller.
 122         sub     a7, a2, a3
 123         mov     a4, a3
 124         movgez  a4, a2, a7  # a4 = max(a2, a3)
 125         movltz  a3, a2, a7  # a3 = min(a2, a3)
 126
 127         movi    a2, 0
 128         extui   a6, a3, 0, 1
 129         movnez  a2, a4, a6
 130
 131         do_addx2 a7, a4, a2, a7
 132         extui   a6, a3, 1, 1
 133         movnez  a2, a7, a6
 134
 135         do_addx4 a7, a4, a2, a7
 136         extui   a6, a3, 2, 1
 137         movnez  a2, a7, a6
 138
 139         do_addx8 a7, a4, a2, a7
 140         extui   a6, a3, 3, 1
 141         movnez  a2, a7, a6
 142
 143         bgeui   a3, 16, .Lmult_main_loop
 144         neg     a3, a2
 145         movltz  a2, a3, a5
 146         retw
 147
 148         .align  4
 149 .Lmult_main_loop:
 150         srli    a3, a3, 4
 151         slli    a4, a4, 4
 152
 153         add     a7, a4, a2
 154         extui   a6, a3, 0, 1
 155         movnez  a2, a7, a6
 156
 157         do_addx2 a7, a4, a2, a7
 158         extui   a6, a3, 1, 1
 159         movnez  a2, a7, a6
 160
 161         do_addx4 a7, a4, a2, a7
 162         extui   a6, a3, 2, 1
 163         movnez  a2, a7, a6
 164
 165         do_addx8 a7, a4, a2, a7
 166         extui   a6, a3, 3, 1
 167         movnez  a2, a7, a6
 168
 169         bgeui   a3, 16, .Lmult_main_loop
 170
 171         neg     a3, a2
 172         movltz  a2, a3, a5
 173
 174 #endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
 175
 176         retw
 177         .size   __mulsi3,.-__mulsi3
 178
 179 #endif /* L_mulsi3 */
 180
 181
 182 # Define a macro for the NSAU (unsigned normalize shift amount)
 183 # instruction, which computes the number of leading zero bits,
 184 # to handle cases where it is not included in the Xtensa processor
 185 # configuration.
 186
 187         .macro  do_nsau cnt, val, tmp, a
 188 #if XCHAL_HAVE_NSA
 189         nsau    \cnt, \val
 190 #else
 191         mov     \a, \val
 192         movi    \cnt, 0
 193         extui   \tmp, \a, 16, 16
 194         bnez    \tmp, 0f
 195         movi    \cnt, 16
 196         slli    \a, \a, 16
 197 0:
 198         extui   \tmp, \a, 24, 8
 199         bnez    \tmp, 1f
 200         addi    \cnt, \cnt, 8
 201         slli    \a, \a, 8
 202 1:
 203         movi    \tmp, __nsau_data
 204         extui   \a, \a, 24, 8
 205         add     \tmp, \tmp, \a
 206         l8ui    \tmp, \tmp, 0
 207         add     \cnt, \cnt, \tmp
 208 #endif /* !XCHAL_HAVE_NSA */
 209         .endm
 210
 211 #ifdef L_nsau
 212         .section .rodata
 213         .align  4
 214         .global __nsau_data
 215         .type   __nsau_data,@object
 216 __nsau_data:
 217 #if !XCHAL_HAVE_NSA
 218         .byte   8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
 219         .byte   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
 220         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 221         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 222         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 223         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 224         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 225         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 226         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 227         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 228         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 229         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 230         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 231         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 232         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 233         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 234 #endif /* !XCHAL_HAVE_NSA */
 235         .size   __nsau_data,.-__nsau_data
 236         .hidden __nsau_data
 237 #endif /* L_nsau */
 238
 239
 240 #ifdef L_udivsi3
 241         .align  4
 242         .global __udivsi3
 243         .type   __udivsi3,@function
 244 __udivsi3:
 245         entry   sp, 32
 246         bltui   a3, 2, .Lle_one # check if the divisor <= 1
 247
 248         mov     a6, a2          # keep dividend in a6
 249         do_nsau a5, a6, a2, a7  # dividend_shift = nsau(dividend)
 250         do_nsau a4, a3, a2, a7  # divisor_shift = nsau(divisor)
 251         bgeu    a5, a4, .Lspecial
 252
 253         sub     a4, a4, a5      # count = divisor_shift - dividend_shift
 254         ssl     a4
 255         sll     a3, a3          # divisor <<= count
 256         movi    a2, 0           # quotient = 0
 257
 258         # test-subtract-and-shift loop; one quotient bit on each iteration
 259 #if XCHAL_HAVE_LOOPS
 260         loopnez a4, .Lloopend
 261 #endif /* XCHAL_HAVE_LOOPS */
 262 .Lloop:
 263         bltu    a6, a3, .Lzerobit
 264         sub     a6, a6, a3
 265         addi    a2, a2, 1
 266 .Lzerobit:
 267         slli    a2, a2, 1
 268         srli    a3, a3, 1
 269 #if !XCHAL_HAVE_LOOPS
 270         addi    a4, a4, -1
 271         bnez    a4, .Lloop
 272 #endif /* !XCHAL_HAVE_LOOPS */
 273 .Lloopend:
 274
 275         bltu    a6, a3, .Lreturn
 276         addi    a2, a2, 1       # increment quotient if dividend >= divisor
 277 .Lreturn:
 278         retw
 279
 280 .Lspecial:
 281         # return dividend >= divisor
 282         movi    a2, 0
 283         bltu    a6, a3, .Lreturn2
 284         movi    a2, 1
 285 .Lreturn2:
 286         retw
 287
 288 .Lle_one:
 289         beqz    a3, .Lerror     # if divisor == 1, return the dividend
 290         retw
 291 .Lerror:
 292         movi    a2, 0           # just return 0; could throw an exception
 293         retw
 294         .size   __udivsi3,.-__udivsi3
 295
 296 #endif /* L_udivsi3 */
 297
 298
 299 #ifdef L_divsi3
 300         .align  4
 301         .global __divsi3
 302         .type   __divsi3,@function
 303 __divsi3:
 304         entry   sp, 32
 305         xor     a7, a2, a3      # sign = dividend ^ divisor
 306         do_abs  a6, a2, a4      # udividend = abs(dividend)
 307         do_abs  a3, a3, a4      # udivisor = abs(divisor)
 308         bltui   a3, 2, .Lle_one # check if udivisor <= 1
 309         do_nsau a5, a6, a2, a8  # udividend_shift = nsau(udividend)
 310         do_nsau a4, a3, a2, a8  # udivisor_shift = nsau(udivisor)
 311         bgeu    a5, a4, .Lspecial
 312
 313         sub     a4, a4, a5      # count = udivisor_shift - udividend_shift
 314         ssl     a4
 315         sll     a3, a3          # udivisor <<= count
 316         movi    a2, 0           # quotient = 0
 317
 318         # test-subtract-and-shift loop; one quotient bit on each iteration
 319 #if XCHAL_HAVE_LOOPS
 320         loopnez a4, .Lloopend
 321 #endif /* XCHAL_HAVE_LOOPS */
 322 .Lloop:
 323         bltu    a6, a3, .Lzerobit
 324         sub     a6, a6, a3
 325         addi    a2, a2, 1
 326 .Lzerobit:
 327         slli    a2, a2, 1
 328         srli    a3, a3, 1
 329 #if !XCHAL_HAVE_LOOPS
 330         addi    a4, a4, -1
 331         bnez    a4, .Lloop
 332 #endif /* !XCHAL_HAVE_LOOPS */
 333 .Lloopend:
 334
 335         bltu    a6, a3, .Lreturn
 336         addi    a2, a2, 1       # increment quotient if udividend >= udivisor
 337 .Lreturn:
 338         neg     a5, a2
 339         movltz  a2, a5, a7      # return (sign < 0) ? -quotient : quotient
 340         retw
 341
 342 .Lspecial:
 343         movi    a2, 0
 344         bltu    a6, a3, .Lreturn2 #  if dividend < divisor, return 0
 345         movi    a2, 1
 346         movi    a4, -1
 347         movltz  a2, a4, a7      # else return (sign < 0) ? -1 :  1
 348 .Lreturn2:
 349         retw
 350
 351 .Lle_one:
 352         beqz    a3, .Lerror
 353         neg     a2, a6          # if udivisor == 1, then return...
 354         movgez  a2, a6, a7      # (sign < 0) ? -udividend : udividend
 355         retw
 356 .Lerror:
 357         movi    a2, 0           # just return 0; could throw an exception
 358         retw
 359         .size   __divsi3,.-__divsi3
 360
 361 #endif /* L_divsi3 */
 362
 363
 364 #ifdef L_umodsi3
 365         .align  4
 366         .global __umodsi3
 367         .type   __umodsi3,@function
 368 __umodsi3:
 369         entry   sp, 32
 370         bltui   a3, 2, .Lle_one # check if the divisor is <= 1
 371
 372         do_nsau a5, a2, a6, a7  # dividend_shift = nsau(dividend)
 373         do_nsau a4, a3, a6, a7  # divisor_shift = nsau(divisor)
 374         bgeu    a5, a4, .Lspecial
 375
 376         sub     a4, a4, a5      # count = divisor_shift - dividend_shift
 377         ssl     a4
 378         sll     a3, a3          # divisor <<= count
 379
 380         # test-subtract-and-shift loop
 381 #if XCHAL_HAVE_LOOPS
 382         loopnez a4, .Lloopend
 383 #endif /* XCHAL_HAVE_LOOPS */
 384 .Lloop:
 385         bltu    a2, a3, .Lzerobit
 386         sub     a2, a2, a3
 387 .Lzerobit:
 388         srli    a3, a3, 1
 389 #if !XCHAL_HAVE_LOOPS
 390         addi    a4, a4, -1
 391         bnez    a4, .Lloop
 392 #endif /* !XCHAL_HAVE_LOOPS */
 393 .Lloopend:
 394
 395         bltu    a2, a3, .Lreturn
 396         sub     a2, a2, a3      # subtract once more if dividend >= divisor
 397 .Lreturn:
 398         retw
 399
 400 .Lspecial:
 401         bltu    a2, a3, .Lreturn2
 402         sub     a2, a2, a3      # subtract once if dividend >= divisor
 403 .Lreturn2:
 404         retw
 405
 406 .Lle_one:
 407         # the divisor is either 0 or 1, so just return 0.
 408         # someday we may want to throw an exception if the divisor is 0.
 409         movi    a2, 0
 410         retw
 411         .size   __umodsi3,.-__umodsi3
 412
 413 #endif /* L_umodsi3 */
 414
 415
 416 #ifdef L_modsi3
 417         .align  4
 418         .global __modsi3
 419         .type   __modsi3,@function
 420 __modsi3:
 421         entry   sp, 32
 422         mov     a7, a2          # save original (signed) dividend
 423         do_abs  a2, a2, a4      # udividend = abs(dividend)
 424         do_abs  a3, a3, a4      # udivisor = abs(divisor)
 425         bltui   a3, 2, .Lle_one # check if udivisor <= 1
 426         do_nsau a5, a2, a6, a8  # udividend_shift = nsau(udividend)
 427         do_nsau a4, a3, a6, a8  # udivisor_shift = nsau(udivisor)
 428         bgeu    a5, a4, .Lspecial
 429
 430         sub     a4, a4, a5      # count = udivisor_shift - udividend_shift
 431         ssl     a4
 432         sll     a3, a3          # udivisor <<= count
 433
 434         # test-subtract-and-shift loop
 435 #if XCHAL_HAVE_LOOPS
 436         loopnez a4, .Lloopend
 437 #endif /* XCHAL_HAVE_LOOPS */
 438 .Lloop:
 439         bltu    a2, a3, .Lzerobit
 440         sub     a2, a2, a3
 441 .Lzerobit:
 442         srli    a3, a3, 1
 443 #if !XCHAL_HAVE_LOOPS
 444         addi    a4, a4, -1
 445         bnez    a4, .Lloop
 446 #endif /* !XCHAL_HAVE_LOOPS */
 447 .Lloopend:
 448
 449         bltu    a2, a3, .Lreturn
 450         sub     a2, a2, a3      # subtract once more if udividend >= udivisor
 451 .Lreturn:
 452         bgez    a7, .Lpositive
 453         neg     a2, a2          # if (dividend < 0), return -udividend
 454 .Lpositive:
 455         retw
 456
 457 .Lspecial:
 458         bltu    a2, a3, .Lreturn2
 459         sub     a2, a2, a3      # subtract once if dividend >= divisor
 460 .Lreturn2:
 461         bgez    a7, .Lpositive2
 462         neg     a2, a2          # if (dividend < 0), return -udividend
 463 .Lpositive2:
 464         retw
 465
 466 .Lle_one:
 467         # udivisor is either 0 or 1, so just return 0.
 468         # someday we may want to throw an exception if udivisor is 0.
 469         movi    a2, 0
 470         retw
 471         .size   __modsi3,.-__modsi3
 472
 473 #endif /* L_modsi3 */