gcc/config/xtensa/ieee754-sf.S

   1 /* IEEE-754 single-precision functions for Xtensa
   2    Copyright (C) 2006, 2007 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    In addition to the permissions in the GNU General Public License,
  13    the Free Software Foundation gives you unlimited permission to link
  14    the compiled version of this file into combinations with other
  15    programs, and to distribute those combinations without any
  16    restriction coming from the use of this file.  (The General Public
  17    License restrictions do apply in other respects; for example, they
  18    cover modification of the file, and distribution when not linked
  19    into a combine executable.)
  20
  21    GCC is distributed in the hope that it will be useful, but WITHOUT
  22    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  23    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  24    License for more details.
  25
  26    You should have received a copy of the GNU General Public License
  27    along with GCC; see the file COPYING.  If not, write to the Free
  28    Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  29    02110-1301, USA.  */
  30
  31 #ifdef __XTENSA_EB__
  32 #define xh a2
  33 #define xl a3
  34 #define yh a4
  35 #define yl a5
  36 #else
  37 #define xh a3
  38 #define xl a2
  39 #define yh a5
  40 #define yl a4
  41 #endif
  42
  43 /*  Warning!  The branch displacements for some Xtensa branch instructions
  44     are quite small, and this code has been carefully laid out to keep
  45     branch targets in range.  If you change anything, be sure to check that
  46     the assembler is not relaxing anything to branch over a jump.  */
  47
  48 #ifdef L_negsf2
  49
  50         .align  4
  51         .global __negsf2
  52         .type   __negsf2, @function
  53 __negsf2:
  54         leaf_entry sp, 16
  55         movi    a4, 0x80000000
  56         xor     a2, a2, a4
  57         leaf_return
  58
  59 #endif /* L_negsf2 */
  60
  61 #ifdef L_addsubsf3
  62
  63         /* Addition */
  64 __addsf3_aux:
  65
  66         /* Handle NaNs and Infinities.  (This code is placed before the
  67            start of the function just to keep it in range of the limited
  68            branch displacements.)  */
  69
  70 .Ladd_xnan_or_inf:
  71         /* If y is neither Infinity nor NaN, return x.  */
  72         bnall   a3, a6, 1f
  73         /* If x is a NaN, return it.  Otherwise, return y.  */
  74         slli    a7, a2, 9
  75         beqz    a7, .Ladd_ynan_or_inf
  76 1:      leaf_return
  77
  78 .Ladd_ynan_or_inf:
  79         /* Return y.  */
  80         mov     a2, a3
  81         leaf_return
  82
  83 .Ladd_opposite_signs:
  84         /* Operand signs differ.  Do a subtraction.  */
  85         slli    a7, a6, 8
  86         xor     a3, a3, a7
  87         j       .Lsub_same_sign
  88
  89         .align  4
  90         .global __addsf3
  91         .type   __addsf3, @function
  92 __addsf3:
  93         leaf_entry sp, 16
  94         movi    a6, 0x7f800000
  95
  96         /* Check if the two operands have the same sign.  */
  97         xor     a7, a2, a3
  98         bltz    a7, .Ladd_opposite_signs
  99
 100 .Ladd_same_sign:
 101         /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
 102         ball    a2, a6, .Ladd_xnan_or_inf
 103         ball    a3, a6, .Ladd_ynan_or_inf
 104
 105         /* Compare the exponents.  The smaller operand will be shifted
 106            right by the exponent difference and added to the larger
 107            one.  */
 108         extui   a7, a2, 23, 9
 109         extui   a8, a3, 23, 9
 110         bltu    a7, a8, .Ladd_shiftx
 111
 112 .Ladd_shifty:
 113         /* Check if the smaller (or equal) exponent is zero.  */
 114         bnone   a3, a6, .Ladd_yexpzero
 115
 116         /* Replace y sign/exponent with 0x008.  */
 117         or      a3, a3, a6
 118         slli    a3, a3, 8
 119         srli    a3, a3, 8
 120
 121 .Ladd_yexpdiff:
 122         /* Compute the exponent difference.  */
 123         sub     a10, a7, a8
 124
 125         /* Exponent difference > 32 -- just return the bigger value.  */
 126         bgeui   a10, 32, 1f
 127
 128         /* Shift y right by the exponent difference.  Any bits that are
 129            shifted out of y are saved in a9 for rounding the result.  */
 130         ssr     a10
 131         movi    a9, 0
 132         src     a9, a3, a9
 133         srl     a3, a3
 134
 135         /* Do the addition.  */
 136         add     a2, a2, a3
 137
 138         /* Check if the add overflowed into the exponent.  */
 139         extui   a10, a2, 23, 9
 140         beq     a10, a7, .Ladd_round
 141         mov     a8, a7
 142         j       .Ladd_carry
 143
 144 .Ladd_yexpzero:
 145         /* y is a subnormal value.  Replace its sign/exponent with zero,
 146            i.e., no implicit "1.0", and increment the apparent exponent
 147            because subnormals behave as if they had the minimum (nonzero)
 148            exponent.  Test for the case when both exponents are zero.  */
 149         slli    a3, a3, 9
 150         srli    a3, a3, 9
 151         bnone   a2, a6, .Ladd_bothexpzero
 152         addi    a8, a8, 1
 153         j       .Ladd_yexpdiff
 154
 155 .Ladd_bothexpzero:
 156         /* Both exponents are zero.  Handle this as a special case.  There
 157            is no need to shift or round, and the normal code for handling
 158            a carry into the exponent field will not work because it
 159            assumes there is an implicit "1.0" that needs to be added.  */
 160         add     a2, a2, a3
 161 1:      leaf_return
 162
 163 .Ladd_xexpzero:
 164         /* Same as "yexpzero" except skip handling the case when both
 165            exponents are zero.  */
 166         slli    a2, a2, 9
 167         srli    a2, a2, 9
 168         addi    a7, a7, 1
 169         j       .Ladd_xexpdiff
 170
 171 .Ladd_shiftx:
 172         /* Same thing as the "shifty" code, but with x and y swapped.  Also,
 173            because the exponent difference is always nonzero in this version,
 174            the shift sequence can use SLL and skip loading a constant zero.  */
 175         bnone   a2, a6, .Ladd_xexpzero
 176
 177         or      a2, a2, a6
 178         slli    a2, a2, 8
 179         srli    a2, a2, 8
 180
 181 .Ladd_xexpdiff:
 182         sub     a10, a8, a7
 183         bgeui   a10, 32, .Ladd_returny
 184
 185         ssr     a10
 186         sll     a9, a2
 187         srl     a2, a2
 188
 189         add     a2, a2, a3
 190
 191         /* Check if the add overflowed into the exponent.  */
 192         extui   a10, a2, 23, 9
 193         bne     a10, a8, .Ladd_carry
 194
 195 .Ladd_round:
 196         /* Round up if the leftover fraction is >= 1/2.  */
 197         bgez    a9, 1f
 198         addi    a2, a2, 1
 199
 200         /* Check if the leftover fraction is exactly 1/2.  */
 201         slli    a9, a9, 1
 202         beqz    a9, .Ladd_exactlyhalf
 203 1:      leaf_return
 204
 205 .Ladd_returny:
 206         mov     a2, a3
 207         leaf_return
 208
 209 .Ladd_carry:
 210         /* The addition has overflowed into the exponent field, so the
 211            value needs to be renormalized.  The mantissa of the result
 212            can be recovered by subtracting the original exponent and
 213            adding 0x800000 (which is the explicit "1.0" for the
 214            mantissa of the non-shifted operand -- the "1.0" for the
 215            shifted operand was already added).  The mantissa can then
 216            be shifted right by one bit.  The explicit "1.0" of the
 217            shifted mantissa then needs to be replaced by the exponent,
 218            incremented by one to account for the normalizing shift.
 219            It is faster to combine these operations: do the shift first
 220            and combine the additions and subtractions.  If x is the
 221            original exponent, the result is:
 222                shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
 223            or:
 224                shifted mantissa + ((x + 1) << 22)
 225            Note that the exponent is incremented here by leaving the
 226            explicit "1.0" of the mantissa in the exponent field.  */
 227
 228         /* Shift x right by one bit.  Save the lsb.  */
 229         mov     a10, a2
 230         srli    a2, a2, 1
 231
 232         /* See explanation above.  The original exponent is in a8.  */
 233         addi    a8, a8, 1
 234         slli    a8, a8, 22
 235         add     a2, a2, a8
 236
 237         /* Return an Infinity if the exponent overflowed.  */
 238         ball    a2, a6, .Ladd_infinity
 239
 240         /* Same thing as the "round" code except the msb of the leftover
 241            fraction is bit 0 of a10, with the rest of the fraction in a9.  */
 242         bbci.l  a10, 0, 1f
 243         addi    a2, a2, 1
 244         beqz    a9, .Ladd_exactlyhalf
 245 1:      leaf_return
 246
 247 .Ladd_infinity:
 248         /* Clear the mantissa.  */
 249         srli    a2, a2, 23
 250         slli    a2, a2, 23
 251
 252         /* The sign bit may have been lost in a carry-out.  Put it back.  */
 253         slli    a8, a8, 1
 254         or      a2, a2, a8
 255         leaf_return
 256
 257 .Ladd_exactlyhalf:
 258         /* Round down to the nearest even value.  */
 259         srli    a2, a2, 1
 260         slli    a2, a2, 1
 261         leaf_return
 262
 263
 264         /* Subtraction */
 265 __subsf3_aux:
 266
 267         /* Handle NaNs and Infinities.  (This code is placed before the
 268            start of the function just to keep it in range of the limited
 269            branch displacements.)  */
 270
 271 .Lsub_xnan_or_inf:
 272         /* If y is neither Infinity nor NaN, return x.  */
 273         bnall   a3, a6, 1f
 274         /* Both x and y are either NaN or Inf, so the result is NaN.  */
 275         movi    a4, 0x400000    /* make it a quiet NaN */
 276         or      a2, a2, a4
 277 1:      leaf_return
 278
 279 .Lsub_ynan_or_inf:
 280         /* Negate y and return it.  */
 281         slli    a7, a6, 8
 282         xor     a2, a3, a7
 283         leaf_return
 284
 285 .Lsub_opposite_signs:
 286         /* Operand signs differ.  Do an addition.  */
 287         slli    a7, a6, 8
 288         xor     a3, a3, a7
 289         j       .Ladd_same_sign
 290
 291         .align  4
 292         .global __subsf3
 293         .type   __subsf3, @function
 294 __subsf3:
 295         leaf_entry sp, 16
 296         movi    a6, 0x7f800000
 297
 298         /* Check if the two operands have the same sign.  */
 299         xor     a7, a2, a3
 300         bltz    a7, .Lsub_opposite_signs
 301
 302 .Lsub_same_sign:
 303         /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
 304         ball    a2, a6, .Lsub_xnan_or_inf
 305         ball    a3, a6, .Lsub_ynan_or_inf
 306
 307         /* Compare the operands.  In contrast to addition, the entire
 308            value matters here.  */
 309         extui   a7, a2, 23, 8
 310         extui   a8, a3, 23, 8
 311         bltu    a2, a3, .Lsub_xsmaller
 312
 313 .Lsub_ysmaller:
 314         /* Check if the smaller (or equal) exponent is zero.  */
 315         bnone   a3, a6, .Lsub_yexpzero
 316
 317         /* Replace y sign/exponent with 0x008.  */
 318         or      a3, a3, a6
 319         slli    a3, a3, 8
 320         srli    a3, a3, 8
 321
 322 .Lsub_yexpdiff:
 323         /* Compute the exponent difference.  */
 324         sub     a10, a7, a8
 325
 326         /* Exponent difference > 32 -- just return the bigger value.  */
 327         bgeui   a10, 32, 1f
 328
 329         /* Shift y right by the exponent difference.  Any bits that are
 330            shifted out of y are saved in a9 for rounding the result.  */
 331         ssr     a10
 332         movi    a9, 0
 333         src     a9, a3, a9
 334         srl     a3, a3
 335
 336         sub     a2, a2, a3
 337
 338         /* Subtract the leftover bits in a9 from zero and propagate any
 339            borrow from a2.  */
 340         neg     a9, a9
 341         addi    a10, a2, -1
 342         movnez  a2, a10, a9
 343
 344         /* Check if the subtract underflowed into the exponent.  */
 345         extui   a10, a2, 23, 8
 346         beq     a10, a7, .Lsub_round
 347         j       .Lsub_borrow
 348
 349 .Lsub_yexpzero:
 350         /* Return zero if the inputs are equal.  (For the non-subnormal
 351            case, subtracting the "1.0" will cause a borrow from the exponent
 352            and this case can be detected when handling the borrow.)  */
 353         beq     a2, a3, .Lsub_return_zero
 354
 355         /* y is a subnormal value.  Replace its sign/exponent with zero,
 356            i.e., no implicit "1.0".  Unless x is also a subnormal, increment
 357            y's apparent exponent because subnormals behave as if they had
 358            the minimum (nonzero) exponent.  */
 359         slli    a3, a3, 9
 360         srli    a3, a3, 9
 361         bnone   a2, a6, .Lsub_yexpdiff
 362         addi    a8, a8, 1
 363         j       .Lsub_yexpdiff
 364
 365 .Lsub_returny:
 366         /* Negate and return y.  */
 367         slli    a7, a6, 8
 368         xor     a2, a3, a7
 369 1:      leaf_return
 370
 371 .Lsub_xsmaller:
 372         /* Same thing as the "ysmaller" code, but with x and y swapped and
 373            with y negated.  */
 374         bnone   a2, a6, .Lsub_xexpzero
 375
 376         or      a2, a2, a6
 377         slli    a2, a2, 8
 378         srli    a2, a2, 8
 379
 380 .Lsub_xexpdiff:
 381         sub     a10, a8, a7
 382         bgeui   a10, 32, .Lsub_returny
 383
 384         ssr     a10
 385         movi    a9, 0
 386         src     a9, a2, a9
 387         srl     a2, a2
 388
 389         /* Negate y.  */
 390         slli    a11, a6, 8
 391         xor     a3, a3, a11
 392
 393         sub     a2, a3, a2
 394
 395         neg     a9, a9
 396         addi    a10, a2, -1
 397         movnez  a2, a10, a9
 398
 399         /* Check if the subtract underflowed into the exponent.  */
 400         extui   a10, a2, 23, 8
 401         bne     a10, a8, .Lsub_borrow
 402
 403 .Lsub_round:
 404         /* Round up if the leftover fraction is >= 1/2.  */
 405         bgez    a9, 1f
 406         addi    a2, a2, 1
 407
 408         /* Check if the leftover fraction is exactly 1/2.  */
 409         slli    a9, a9, 1
 410         beqz    a9, .Lsub_exactlyhalf
 411 1:      leaf_return
 412
 413 .Lsub_xexpzero:
 414         /* Same as "yexpzero".  */
 415         beq     a2, a3, .Lsub_return_zero
 416         slli    a2, a2, 9
 417         srli    a2, a2, 9
 418         bnone   a3, a6, .Lsub_xexpdiff
 419         addi    a7, a7, 1
 420         j       .Lsub_xexpdiff
 421
 422 .Lsub_return_zero:
 423         movi    a2, 0
 424         leaf_return
 425
 426 .Lsub_borrow:
 427         /* The subtraction has underflowed into the exponent field, so the
 428            value needs to be renormalized.  Shift the mantissa left as
 429            needed to remove any leading zeros and adjust the exponent
 430            accordingly.  If the exponent is not large enough to remove
 431            all the leading zeros, the result will be a subnormal value.  */
 432
 433         slli    a8, a2, 9
 434         beqz    a8, .Lsub_xzero
 435         do_nsau a6, a8, a7, a11
 436         srli    a8, a8, 9
 437         bge     a6, a10, .Lsub_subnormal
 438         addi    a6, a6, 1
 439
 440 .Lsub_normalize_shift:
 441         /* Shift the mantissa (a8/a9) left by a6.  */
 442         ssl     a6
 443         src     a8, a8, a9
 444         sll     a9, a9
 445
 446         /* Combine the shifted mantissa with the sign and exponent,
 447            decrementing the exponent by a6.  (The exponent has already
 448            been decremented by one due to the borrow from the subtraction,
 449            but adding the mantissa will increment the exponent by one.)  */
 450         srli    a2, a2, 23
 451         sub     a2, a2, a6
 452         slli    a2, a2, 23
 453         add     a2, a2, a8
 454         j       .Lsub_round
 455
 456 .Lsub_exactlyhalf:
 457         /* Round down to the nearest even value.  */
 458         srli    a2, a2, 1
 459         slli    a2, a2, 1
 460         leaf_return
 461
 462 .Lsub_xzero:
 463         /* If there was a borrow from the exponent, and the mantissa and
 464            guard digits are all zero, then the inputs were equal and the
 465            result should be zero.  */
 466         beqz    a9, .Lsub_return_zero
 467
 468         /* Only the guard digit is nonzero.  Shift by min(24, a10).  */
 469         addi    a11, a10, -24
 470         movi    a6, 24
 471         movltz  a6, a10, a11
 472         j       .Lsub_normalize_shift
 473
 474 .Lsub_subnormal:
 475         /* The exponent is too small to shift away all the leading zeros.
 476            Set a6 to the current exponent (which has already been
 477            decremented by the borrow) so that the exponent of the result
 478            will be zero.  Do not add 1 to a6 in this case, because: (1)
 479            adding the mantissa will not increment the exponent, so there is
 480            no need to subtract anything extra from the exponent to
 481            compensate, and (2) the effective exponent of a subnormal is 1
 482            not 0 so the shift amount must be 1 smaller than normal. */
 483         mov     a6, a10
 484         j       .Lsub_normalize_shift
 485
 486 #endif /* L_addsubsf3 */
 487
 488 #ifdef L_mulsf3
 489
 490         /* Multiplication */
 491 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 492 #define XCHAL_NO_MUL 1
 493 #endif
 494
 495 __mulsf3_aux:
 496
 497         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
 498            (This code is placed before the start of the function just to
 499            keep it in range of the limited branch displacements.)  */
 500
 501 .Lmul_xexpzero:
 502         /* Clear the sign bit of x.  */
 503         slli    a2, a2, 1
 504         srli    a2, a2, 1
 505
 506         /* If x is zero, return zero.  */
 507         beqz    a2, .Lmul_return_zero
 508
 509         /* Normalize x.  Adjust the exponent in a8.  */
 510         do_nsau a10, a2, a11, a12
 511         addi    a10, a10, -8
 512         ssl     a10
 513         sll     a2, a2
 514         movi    a8, 1
 515         sub     a8, a8, a10
 516         j       .Lmul_xnormalized
 517
 518 .Lmul_yexpzero:
 519         /* Clear the sign bit of y.  */
 520         slli    a3, a3, 1
 521         srli    a3, a3, 1
 522
 523         /* If y is zero, return zero.  */
 524         beqz    a3, .Lmul_return_zero
 525
 526         /* Normalize y.  Adjust the exponent in a9.  */
 527         do_nsau a10, a3, a11, a12
 528         addi    a10, a10, -8
 529         ssl     a10
 530         sll     a3, a3
 531         movi    a9, 1
 532         sub     a9, a9, a10
 533         j       .Lmul_ynormalized
 534
 535 .Lmul_return_zero:
 536         /* Return zero with the appropriate sign bit.  */
 537         srli    a2, a7, 31
 538         slli    a2, a2, 31
 539         j       .Lmul_done
 540
 541 .Lmul_xnan_or_inf:
 542         /* If y is zero, return NaN.  */
 543         slli    a8, a3, 1
 544         bnez    a8, 1f
 545         movi    a4, 0x400000    /* make it a quiet NaN */
 546         or      a2, a2, a4
 547         j       .Lmul_done
 548 1:
 549         /* If y is NaN, return y.  */
 550         bnall   a3, a6, .Lmul_returnx
 551         slli    a8, a3, 9
 552         beqz    a8, .Lmul_returnx
 553
 554 .Lmul_returny:
 555         mov     a2, a3
 556
 557 .Lmul_returnx:
 558         /* Set the sign bit and return.  */
 559         extui   a7, a7, 31, 1
 560         slli    a2, a2, 1
 561         ssai    1
 562         src     a2, a7, a2
 563         j       .Lmul_done
 564
 565 .Lmul_ynan_or_inf:
 566         /* If x is zero, return NaN.  */
 567         slli    a8, a2, 1
 568         bnez    a8, .Lmul_returny
 569         movi    a7, 0x400000    /* make it a quiet NaN */
 570         or      a2, a3, a7
 571         j       .Lmul_done
 572
 573         .align  4
 574         .global __mulsf3
 575         .type   __mulsf3, @function
 576 __mulsf3:
 577 #if __XTENSA_CALL0_ABI__
 578         leaf_entry sp, 32
 579         addi    sp, sp, -32
 580         s32i    a12, sp, 16
 581         s32i    a13, sp, 20
 582         s32i    a14, sp, 24
 583         s32i    a15, sp, 28
 584 #elif XCHAL_NO_MUL
 585         /* This is not really a leaf function; allocate enough stack space
 586            to allow CALL12s to a helper function.  */
 587         leaf_entry sp, 64
 588 #else
 589         leaf_entry sp, 32
 590 #endif
 591         movi    a6, 0x7f800000
 592
 593         /* Get the sign of the result.  */
 594         xor     a7, a2, a3
 595
 596         /* Check for NaN and infinity.  */
 597         ball    a2, a6, .Lmul_xnan_or_inf
 598         ball    a3, a6, .Lmul_ynan_or_inf
 599
 600         /* Extract the exponents.  */
 601         extui   a8, a2, 23, 8
 602         extui   a9, a3, 23, 8
 603
 604         beqz    a8, .Lmul_xexpzero
 605 .Lmul_xnormalized:
 606         beqz    a9, .Lmul_yexpzero
 607 .Lmul_ynormalized:
 608
 609         /* Add the exponents.  */
 610         add     a8, a8, a9
 611
 612         /* Replace sign/exponent fields with explicit "1.0".  */
 613         movi    a10, 0xffffff
 614         or      a2, a2, a6
 615         and     a2, a2, a10
 616         or      a3, a3, a6
 617         and     a3, a3, a10
 618
 619         /* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
 620
 621 #if XCHAL_HAVE_MUL32_HIGH
 622
 623         mull    a6, a2, a3
 624         muluh   a2, a2, a3
 625
 626 #else
 627
 628         /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
 629            products.  These partial products are:
 630
 631                 0 xl * yl
 632
 633                 1 xl * yh
 634                 2 xh * yl
 635
 636                 3 xh * yh
 637
 638            If using the Mul16 or Mul32 multiplier options, these input
 639            chunks must be stored in separate registers.  For Mac16, the
 640            UMUL.AA.* opcodes can specify that the inputs come from either
 641            half of the registers, so there is no need to shift them out
 642            ahead of time.  If there is no multiply hardware, the 16-bit
 643            chunks can be extracted when setting up the arguments to the
 644            separate multiply function.  */
 645
 646 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 647         /* Calling a separate multiply function will clobber a0 and requires
 648            use of a8 as a temporary, so save those values now.  (The function
 649            uses a custom ABI so nothing else needs to be saved.)  */
 650         s32i    a0, sp, 0
 651         s32i    a8, sp, 4
 652 #endif
 653
 654 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
 655
 656 #define a2h a4
 657 #define a3h a5
 658
 659         /* Get the high halves of the inputs into registers.  */
 660         srli    a2h, a2, 16
 661         srli    a3h, a3, 16
 662
 663 #define a2l a2
 664 #define a3l a3
 665
 666 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
 667         /* Clear the high halves of the inputs.  This does not matter
 668            for MUL16 because the high bits are ignored.  */
 669         extui   a2, a2, 0, 16
 670         extui   a3, a3, 0, 16
 671 #endif
 672 #endif /* MUL16 || MUL32 */
 673
 674
 675 #if XCHAL_HAVE_MUL16
 676
 677 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 678         mul16u  dst, xreg ## xhalf, yreg ## yhalf
 679
 680 #elif XCHAL_HAVE_MUL32
 681
 682 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 683         mull    dst, xreg ## xhalf, yreg ## yhalf
 684
 685 #elif XCHAL_HAVE_MAC16
 686
 687 /* The preprocessor insists on inserting a space when concatenating after
 688    a period in the definition of do_mul below.  These macros are a workaround
 689    using underscores instead of periods when doing the concatenation.  */
 690 #define umul_aa_ll umul.aa.ll
 691 #define umul_aa_lh umul.aa.lh
 692 #define umul_aa_hl umul.aa.hl
 693 #define umul_aa_hh umul.aa.hh
 694
 695 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 696         umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
 697         rsr     dst, ACCLO
 698
 699 #else /* no multiply hardware */
 700
 701 #define set_arg_l(dst, src) \
 702         extui   dst, src, 0, 16
 703 #define set_arg_h(dst, src) \
 704         srli    dst, src, 16
 705
 706 #if __XTENSA_CALL0_ABI__
 707 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 708         set_arg_ ## xhalf (a13, xreg); \
 709         set_arg_ ## yhalf (a14, yreg); \
 710         call0   .Lmul_mulsi3; \
 711         mov     dst, a12
 712 #else
 713 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 714         set_arg_ ## xhalf (a14, xreg); \
 715         set_arg_ ## yhalf (a15, yreg); \
 716         call12  .Lmul_mulsi3; \
 717         mov     dst, a14
 718 #endif /* __XTENSA_CALL0_ABI__ */
 719
 720 #endif /* no multiply hardware */
 721
 722         /* Add pp1 and pp2 into a6 with carry-out in a9.  */
 723         do_mul(a6, a2, l, a3, h)        /* pp 1 */
 724         do_mul(a11, a2, h, a3, l)       /* pp 2 */
 725         movi    a9, 0
 726         add     a6, a6, a11
 727         bgeu    a6, a11, 1f
 728         addi    a9, a9, 1
 729 1:
 730         /* Shift the high half of a9/a6 into position in a9.  Note that
 731            this value can be safely incremented without any carry-outs.  */
 732         ssai    16
 733         src     a9, a9, a6
 734
 735         /* Compute the low word into a6.  */
 736         do_mul(a11, a2, l, a3, l)       /* pp 0 */
 737         sll     a6, a6
 738         add     a6, a6, a11
 739         bgeu    a6, a11, 1f
 740         addi    a9, a9, 1
 741 1:
 742         /* Compute the high word into a2.  */
 743         do_mul(a2, a2, h, a3, h)        /* pp 3 */
 744         add     a2, a2, a9
 745
 746 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 747         /* Restore values saved on the stack during the multiplication.  */
 748         l32i    a0, sp, 0
 749         l32i    a8, sp, 4
 750 #endif
 751 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
 752
 753         /* Shift left by 9 bits, unless there was a carry-out from the
 754            multiply, in which case, shift by 8 bits and increment the
 755            exponent.  */
 756         movi    a4, 9
 757         srli    a5, a2, 24 - 9
 758         beqz    a5, 1f
 759         addi    a4, a4, -1
 760         addi    a8, a8, 1
 761 1:      ssl     a4
 762         src     a2, a2, a6
 763         sll     a6, a6
 764
 765         /* Subtract the extra bias from the exponent sum (plus one to account
 766            for the explicit "1.0" of the mantissa that will be added to the
 767            exponent in the final result).  */
 768         movi    a4, 0x80
 769         sub     a8, a8, a4
 770
 771         /* Check for over/underflow.  The value in a8 is one less than the
 772            final exponent, so values in the range 0..fd are OK here.  */
 773         movi    a4, 0xfe
 774         bgeu    a8, a4, .Lmul_overflow
 775
 776 .Lmul_round:
 777         /* Round.  */
 778         bgez    a6, .Lmul_rounded
 779         addi    a2, a2, 1
 780         slli    a6, a6, 1
 781         beqz    a6, .Lmul_exactlyhalf
 782
 783 .Lmul_rounded:
 784         /* Add the exponent to the mantissa.  */
 785         slli    a8, a8, 23
 786         add     a2, a2, a8
 787
 788 .Lmul_addsign:
 789         /* Add the sign bit.  */
 790         srli    a7, a7, 31
 791         slli    a7, a7, 31
 792         or      a2, a2, a7
 793
 794 .Lmul_done:
 795 #if __XTENSA_CALL0_ABI__
 796         l32i    a12, sp, 16
 797         l32i    a13, sp, 20
 798         l32i    a14, sp, 24
 799         l32i    a15, sp, 28
 800         addi    sp, sp, 32
 801 #endif
 802         leaf_return
 803
 804 .Lmul_exactlyhalf:
 805         /* Round down to the nearest even value.  */
 806         srli    a2, a2, 1
 807         slli    a2, a2, 1
 808         j       .Lmul_rounded
 809
 810 .Lmul_overflow:
 811         bltz    a8, .Lmul_underflow
 812         /* Return +/- Infinity.  */
 813         movi    a8, 0xff
 814         slli    a2, a8, 23
 815         j       .Lmul_addsign
 816
 817 .Lmul_underflow:
 818         /* Create a subnormal value, where the exponent field contains zero,
 819            but the effective exponent is 1.  The value of a8 is one less than
 820            the actual exponent, so just negate it to get the shift amount.  */
 821         neg     a8, a8
 822         mov     a9, a6
 823         ssr     a8
 824         bgeui   a8, 32, .Lmul_flush_to_zero
 825
 826         /* Shift a2 right.  Any bits that are shifted out of a2 are saved
 827            in a6 (combined with the shifted-out bits currently in a6) for
 828            rounding the result.  */
 829         sll     a6, a2
 830         srl     a2, a2
 831
 832         /* Set the exponent to zero.  */
 833         movi    a8, 0
 834
 835         /* Pack any nonzero bits shifted out into a6.  */
 836         beqz    a9, .Lmul_round
 837         movi    a9, 1
 838         or      a6, a6, a9
 839         j       .Lmul_round
 840
 841 .Lmul_flush_to_zero:
 842         /* Return zero with the appropriate sign bit.  */
 843         srli    a2, a7, 31
 844         slli    a2, a2, 31
 845         j       .Lmul_done
 846
 847 #if XCHAL_NO_MUL
 848
 849         /* For Xtensa processors with no multiply hardware, this simplified
 850            version of _mulsi3 is used for multiplying 16-bit chunks of
 851            the floating-point mantissas.  When using CALL0, this function
 852            uses a custom ABI: the inputs are passed in a13 and a14, the
 853            result is returned in a12, and a8 and a15 are clobbered.  */
 854         .align  4
 855 .Lmul_mulsi3:
 856         leaf_entry sp, 16
 857         .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
 858         movi    \dst, 0
 859 1:      add     \tmp1, \src2, \dst
 860         extui   \tmp2, \src1, 0, 1
 861         movnez  \dst, \tmp1, \tmp2
 862
 863         do_addx2 \tmp1, \src2, \dst, \tmp1
 864         extui   \tmp2, \src1, 1, 1
 865         movnez  \dst, \tmp1, \tmp2
 866
 867         do_addx4 \tmp1, \src2, \dst, \tmp1
 868         extui   \tmp2, \src1, 2, 1
 869         movnez  \dst, \tmp1, \tmp2
 870
 871         do_addx8 \tmp1, \src2, \dst, \tmp1
 872         extui   \tmp2, \src1, 3, 1
 873         movnez  \dst, \tmp1, \tmp2
 874
 875         srli    \src1, \src1, 4
 876         slli    \src2, \src2, 4
 877         bnez    \src1, 1b
 878         .endm
 879 #if __XTENSA_CALL0_ABI__
 880         mul_mulsi3_body a12, a13, a14, a15, a8
 881 #else
 882         /* The result will be written into a2, so save that argument in a4.  */
 883         mov     a4, a2
 884         mul_mulsi3_body a2, a4, a3, a5, a6
 885 #endif
 886         leaf_return
 887 #endif /* XCHAL_NO_MUL */
 888 #endif /* L_mulsf3 */
 889
 890 #ifdef L_divsf3
 891
 892         /* Division */
 893 __divsf3_aux:
 894
 895         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
 896            (This code is placed before the start of the function just to
 897            keep it in range of the limited branch displacements.)  */
 898
 899 .Ldiv_yexpzero:
 900         /* Clear the sign bit of y.  */
 901         slli    a3, a3, 1
 902         srli    a3, a3, 1
 903
 904         /* Check for division by zero.  */
 905         beqz    a3, .Ldiv_yzero
 906
 907         /* Normalize y.  Adjust the exponent in a9.  */
 908         do_nsau a10, a3, a4, a5
 909         addi    a10, a10, -8
 910         ssl     a10
 911         sll     a3, a3
 912         movi    a9, 1
 913         sub     a9, a9, a10
 914         j       .Ldiv_ynormalized
 915
 916 .Ldiv_yzero:
 917         /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
 918         slli    a4, a2, 1
 919         srli    a4, a4, 1
 920         srli    a2, a7, 31
 921         slli    a2, a2, 31
 922         or      a2, a2, a6
 923         bnez    a4, 1f
 924         movi    a4, 0x400000    /* make it a quiet NaN */
 925         or      a2, a2, a4
 926 1:      leaf_return
 927
 928 .Ldiv_xexpzero:
 929         /* Clear the sign bit of x.  */
 930         slli    a2, a2, 1
 931         srli    a2, a2, 1
 932
 933         /* If x is zero, return zero.  */
 934         beqz    a2, .Ldiv_return_zero
 935
 936         /* Normalize x.  Adjust the exponent in a8.  */
 937         do_nsau a10, a2, a4, a5
 938         addi    a10, a10, -8
 939         ssl     a10
 940         sll     a2, a2
 941         movi    a8, 1
 942         sub     a8, a8, a10
 943         j       .Ldiv_xnormalized
 944
 945 .Ldiv_return_zero:
 946         /* Return zero with the appropriate sign bit.  */
 947         srli    a2, a7, 31
 948         slli    a2, a2, 31
 949         leaf_return
 950
 951 .Ldiv_xnan_or_inf:
 952         /* Set the sign bit of the result.  */
 953         srli    a7, a3, 31
 954         slli    a7, a7, 31
 955         xor     a2, a2, a7
 956         /* If y is NaN or Inf, return NaN.  */
 957         bnall   a3, a6, 1f
 958         movi    a4, 0x400000    /* make it a quiet NaN */
 959         or      a2, a2, a4
 960 1:      leaf_return
 961
 962 .Ldiv_ynan_or_inf:
 963         /* If y is Infinity, return zero.  */
 964         slli    a8, a3, 9
 965         beqz    a8, .Ldiv_return_zero
 966         /* y is NaN; return it.  */
 967         mov     a2, a3
 968         leaf_return
 969
 970         .align  4
 971         .global __divsf3
 972         .type   __divsf3, @function
 973 __divsf3:
 974         leaf_entry sp, 16
 975         movi    a6, 0x7f800000
 976
 977         /* Get the sign of the result.  */
 978         xor     a7, a2, a3
 979
 980         /* Check for NaN and infinity.  */
 981         ball    a2, a6, .Ldiv_xnan_or_inf
 982         ball    a3, a6, .Ldiv_ynan_or_inf
 983
 984         /* Extract the exponents.  */
 985         extui   a8, a2, 23, 8
 986         extui   a9, a3, 23, 8
 987
 988         beqz    a9, .Ldiv_yexpzero
 989 .Ldiv_ynormalized:
 990         beqz    a8, .Ldiv_xexpzero
 991 .Ldiv_xnormalized:
 992
 993         /* Subtract the exponents.  */
 994         sub     a8, a8, a9
 995
 996         /* Replace sign/exponent fields with explicit "1.0".  */
 997         movi    a10, 0xffffff
 998         or      a2, a2, a6
 999         and     a2, a2, a10
1000         or      a3, a3, a6
1001         and     a3, a3, a10
1002
1003         /* The first digit of the mantissa division must be a one.
1004            Shift x (and adjust the exponent) as needed to make this true.  */
1005         bltu    a3, a2, 1f
1006         slli    a2, a2, 1
1007         addi    a8, a8, -1
1008 1:
1009         /* Do the first subtraction and shift.  */
1010         sub     a2, a2, a3
1011         slli    a2, a2, 1
1012
1013         /* Put the quotient into a10.  */
1014         movi    a10, 1
1015
1016         /* Divide one bit at a time for 23 bits.  */
1017         movi    a9, 23
1018 #if XCHAL_HAVE_LOOPS
1019         loop    a9, .Ldiv_loopend
1020 #endif
1021 .Ldiv_loop:
1022         /* Shift the quotient << 1.  */
1023         slli    a10, a10, 1
1024
1025         /* Is this digit a 0 or 1?  */
1026         bltu    a2, a3, 1f
1027
1028         /* Output a 1 and subtract.  */
1029         addi    a10, a10, 1
1030         sub     a2, a2, a3
1031
1032         /* Shift the dividend << 1.  */
1033 1:      slli    a2, a2, 1
1034
1035 #if !XCHAL_HAVE_LOOPS
1036         addi    a9, a9, -1
1037         bnez    a9, .Ldiv_loop
1038 #endif
1039 .Ldiv_loopend:
1040
1041         /* Add the exponent bias (less one to account for the explicit "1.0"
1042            of the mantissa that will be added to the exponent in the final
1043            result).  */
1044         addi    a8, a8, 0x7e
1045
1046         /* Check for over/underflow.  The value in a8 is one less than the
1047            final exponent, so values in the range 0..fd are OK here.  */
1048         movi    a4, 0xfe
1049         bgeu    a8, a4, .Ldiv_overflow
1050
1051 .Ldiv_round:
1052         /* Round.  The remainder (<< 1) is in a2.  */
1053         bltu    a2, a3, .Ldiv_rounded
1054         addi    a10, a10, 1
1055         beq     a2, a3, .Ldiv_exactlyhalf
1056
1057 .Ldiv_rounded:
1058         /* Add the exponent to the mantissa.  */
1059         slli    a8, a8, 23
1060         add     a2, a10, a8
1061
1062 .Ldiv_addsign:
1063         /* Add the sign bit.  */
1064         srli    a7, a7, 31
1065         slli    a7, a7, 31
1066         or      a2, a2, a7
1067         leaf_return
1068
1069 .Ldiv_overflow:
1070         bltz    a8, .Ldiv_underflow
1071         /* Return +/- Infinity.  */
1072         addi    a8, a4, 1       /* 0xff */
1073         slli    a2, a8, 23
1074         j       .Ldiv_addsign
1075
1076 .Ldiv_exactlyhalf:
1077         /* Remainder is exactly half the divisor.  Round even.  */
1078         srli    a10, a10, 1
1079         slli    a10, a10, 1
1080         j       .Ldiv_rounded
1081
1082 .Ldiv_underflow:
1083         /* Create a subnormal value, where the exponent field contains zero,
1084            but the effective exponent is 1.  The value of a8 is one less than
1085            the actual exponent, so just negate it to get the shift amount.  */
1086         neg     a8, a8
1087         ssr     a8
1088         bgeui   a8, 32, .Ldiv_flush_to_zero
1089
1090         /* Shift a10 right.  Any bits that are shifted out of a10 are
1091            saved in a6 for rounding the result.  */
1092         sll     a6, a10
1093         srl     a10, a10
1094
1095         /* Set the exponent to zero.  */
1096         movi    a8, 0
1097
1098         /* Pack any nonzero remainder (in a2) into a6.  */
1099         beqz    a2, 1f
1100         movi    a9, 1
1101         or      a6, a6, a9
1102
1103         /* Round a10 based on the bits shifted out into a6.  */
1104 1:      bgez    a6, .Ldiv_rounded
1105         addi    a10, a10, 1
1106         slli    a6, a6, 1
1107         bnez    a6, .Ldiv_rounded
1108         srli    a10, a10, 1
1109         slli    a10, a10, 1
1110         j       .Ldiv_rounded
1111
1112 .Ldiv_flush_to_zero:
1113         /* Return zero with the appropriate sign bit.  */
1114         srli    a2, a7, 31
1115         slli    a2, a2, 31
1116         leaf_return
1117
1118 #endif /* L_divsf3 */
1119
1120 #ifdef L_cmpsf2
1121
1122         /* Equal and Not Equal */
1123
1124         .align  4
1125         .global __eqsf2
1126         .global __nesf2
1127         .set    __nesf2, __eqsf2
1128         .type   __eqsf2, @function
1129 __eqsf2:
1130         leaf_entry sp, 16
1131         bne     a2, a3, 4f
1132
1133         /* The values are equal but NaN != NaN.  Check the exponent.  */
1134         movi    a6, 0x7f800000
1135         ball    a2, a6, 3f
1136
1137         /* Equal.  */
1138         movi    a2, 0
1139         leaf_return
1140
1141         /* Not equal.  */
1142 2:      movi    a2, 1
1143         leaf_return
1144
1145         /* Check if the mantissas are nonzero.  */
1146 3:      slli    a7, a2, 9
1147         j       5f
1148
1149         /* Check if x and y are zero with different signs.  */
1150 4:      or      a7, a2, a3
1151         slli    a7, a7, 1
1152
1153         /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1154            or x when exponent(x) = 0x7f8 and x == y.  */
1155 5:      movi    a2, 0
1156         movi    a3, 1
1157         movnez  a2, a3, a7
1158         leaf_return
1159
1160
1161         /* Greater Than */
1162
1163         .align  4
1164         .global __gtsf2
1165         .type   __gtsf2, @function
1166 __gtsf2:
1167         leaf_entry sp, 16
1168         movi    a6, 0x7f800000
1169         ball    a2, a6, 2f
1170 1:      bnall   a3, a6, .Lle_cmp
1171
1172         /* Check if y is a NaN.  */
1173         slli    a7, a3, 9
1174         beqz    a7, .Lle_cmp
1175         movi    a2, 0
1176         leaf_return
1177
1178         /* Check if x is a NaN.  */
1179 2:      slli    a7, a2, 9
1180         beqz    a7, 1b
1181         movi    a2, 0
1182         leaf_return
1183
1184
1185         /* Less Than or Equal */
1186
1187         .align  4
1188         .global __lesf2
1189         .type   __lesf2, @function
1190 __lesf2:
1191         leaf_entry sp, 16
1192         movi    a6, 0x7f800000
1193         ball    a2, a6, 2f
1194 1:      bnall   a3, a6, .Lle_cmp
1195
1196         /* Check if y is a NaN.  */
1197         slli    a7, a3, 9
1198         beqz    a7, .Lle_cmp
1199         movi    a2, 1
1200         leaf_return
1201
1202         /* Check if x is a NaN.  */
1203 2:      slli    a7, a2, 9
1204         beqz    a7, 1b
1205         movi    a2, 1
1206         leaf_return
1207
1208 .Lle_cmp:
1209         /* Check if x and y have different signs.  */
1210         xor     a7, a2, a3
1211         bltz    a7, .Lle_diff_signs
1212
1213         /* Check if x is negative.  */
1214         bltz    a2, .Lle_xneg
1215
1216         /* Check if x <= y.  */
1217         bltu    a3, a2, 5f
1218 4:      movi    a2, 0
1219         leaf_return
1220
1221 .Lle_xneg:
1222         /* Check if y <= x.  */
1223         bgeu    a2, a3, 4b
1224 5:      movi    a2, 1
1225         leaf_return
1226
1227 .Lle_diff_signs:
1228         bltz    a2, 4b
1229
1230         /* Check if both x and y are zero.  */
1231         or      a7, a2, a3
1232         slli    a7, a7, 1
1233         movi    a2, 1
1234         movi    a3, 0
1235         moveqz  a2, a3, a7
1236         leaf_return
1237
1238
1239         /* Greater Than or Equal */
1240
1241         .align  4
1242         .global __gesf2
1243         .type   __gesf2, @function
1244 __gesf2:
1245         leaf_entry sp, 16
1246         movi    a6, 0x7f800000
1247         ball    a2, a6, 2f
1248 1:      bnall   a3, a6, .Llt_cmp
1249
1250         /* Check if y is a NaN.  */
1251         slli    a7, a3, 9
1252         beqz    a7, .Llt_cmp
1253         movi    a2, -1
1254         leaf_return
1255
1256         /* Check if x is a NaN.  */
1257 2:      slli    a7, a2, 9
1258         beqz    a7, 1b
1259         movi    a2, -1
1260         leaf_return
1261
1262
1263         /* Less Than */
1264
1265         .align  4
1266         .global __ltsf2
1267         .type   __ltsf2, @function
1268 __ltsf2:
1269         leaf_entry sp, 16
1270         movi    a6, 0x7f800000
1271         ball    a2, a6, 2f
1272 1:      bnall   a3, a6, .Llt_cmp
1273
1274         /* Check if y is a NaN.  */
1275         slli    a7, a3, 9
1276         beqz    a7, .Llt_cmp
1277         movi    a2, 0
1278         leaf_return
1279
1280         /* Check if x is a NaN.  */
1281 2:      slli    a7, a2, 9
1282         beqz    a7, 1b
1283         movi    a2, 0
1284         leaf_return
1285
1286 .Llt_cmp:
1287         /* Check if x and y have different signs.  */
1288         xor     a7, a2, a3
1289         bltz    a7, .Llt_diff_signs
1290
1291         /* Check if x is negative.  */
1292         bltz    a2, .Llt_xneg
1293
1294         /* Check if x < y.  */
1295         bgeu    a2, a3, 5f
1296 4:      movi    a2, -1
1297         leaf_return
1298
1299 .Llt_xneg:
1300         /* Check if y < x.  */
1301         bltu    a3, a2, 4b
1302 5:      movi    a2, 0
1303         leaf_return
1304
1305 .Llt_diff_signs:
1306         bgez    a2, 5b
1307
1308         /* Check if both x and y are nonzero.  */
1309         or      a7, a2, a3
1310         slli    a7, a7, 1
1311         movi    a2, 0
1312         movi    a3, -1
1313         movnez  a2, a3, a7
1314         leaf_return
1315
1316
1317         /* Unordered */
1318
1319         .align  4
1320         .global __unordsf2
1321         .type   __unordsf2, @function
1322 __unordsf2:
1323         leaf_entry sp, 16
1324         movi    a6, 0x7f800000
1325         ball    a2, a6, 3f
1326 1:      ball    a3, a6, 4f
1327 2:      movi    a2, 0
1328         leaf_return
1329
1330 3:      slli    a7, a2, 9
1331         beqz    a7, 1b
1332         movi    a2, 1
1333         leaf_return
1334
1335 4:      slli    a7, a3, 9
1336         beqz    a7, 2b
1337         movi    a2, 1
1338         leaf_return
1339
1340 #endif /* L_cmpsf2 */
1341
1342 #ifdef L_fixsfsi
1343
1344         .align  4
1345         .global __fixsfsi
1346         .type   __fixsfsi, @function
1347 __fixsfsi:
1348         leaf_entry sp, 16
1349
1350         /* Check for NaN and Infinity.  */
1351         movi    a6, 0x7f800000
1352         ball    a2, a6, .Lfixsfsi_nan_or_inf
1353
1354         /* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
1355         extui   a4, a2, 23, 8
1356         addi    a4, a4, -0x7e
1357         bgei    a4, 32, .Lfixsfsi_maxint
1358         blti    a4, 1, .Lfixsfsi_zero
1359
1360         /* Add explicit "1.0" and shift << 8.  */
1361         or      a7, a2, a6
1362         slli    a5, a7, 8
1363
1364         /* Shift back to the right, based on the exponent.  */
1365         ssl     a4              /* shift by 32 - a4 */
1366         srl     a5, a5
1367
1368         /* Negate the result if sign != 0.  */
1369         neg     a2, a5
1370         movgez  a2, a5, a7
1371         leaf_return
1372
1373 .Lfixsfsi_nan_or_inf:
1374         /* Handle Infinity and NaN.  */
1375         slli    a4, a2, 9
1376         beqz    a4, .Lfixsfsi_maxint
1377
1378         /* Translate NaN to +maxint.  */
1379         movi    a2, 0
1380
1381 .Lfixsfsi_maxint:
1382         slli    a4, a6, 8       /* 0x80000000 */
1383         addi    a5, a4, -1      /* 0x7fffffff */
1384         movgez  a4, a5, a2
1385         mov     a2, a4
1386         leaf_return
1387
1388 .Lfixsfsi_zero:
1389         movi    a2, 0
1390         leaf_return
1391
1392 #endif /* L_fixsfsi */
1393
1394 #ifdef L_fixsfdi
1395
1396         .align  4
1397         .global __fixsfdi
1398         .type   __fixsfdi, @function
1399 __fixsfdi:
1400         leaf_entry sp, 16
1401
1402         /* Check for NaN and Infinity.  */
1403         movi    a6, 0x7f800000
1404         ball    a2, a6, .Lfixsfdi_nan_or_inf
1405
1406         /* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
1407         extui   a4, a2, 23, 8
1408         addi    a4, a4, -0x7e
1409         bgei    a4, 64, .Lfixsfdi_maxint
1410         blti    a4, 1, .Lfixsfdi_zero
1411
1412         /* Add explicit "1.0" and shift << 8.  */
1413         or      a7, a2, a6
1414         slli    xh, a7, 8
1415
1416         /* Shift back to the right, based on the exponent.  */
1417         ssl     a4              /* shift by 64 - a4 */
1418         bgei    a4, 32, .Lfixsfdi_smallshift
1419         srl     xl, xh
1420         movi    xh, 0
1421
1422 .Lfixsfdi_shifted:
1423         /* Negate the result if sign != 0.  */
1424         bgez    a7, 1f
1425         neg     xl, xl
1426         neg     xh, xh
1427         beqz    xl, 1f
1428         addi    xh, xh, -1
1429 1:      leaf_return
1430
1431 .Lfixsfdi_smallshift:
1432         movi    xl, 0
1433         sll     xl, xh
1434         srl     xh, xh
1435         j       .Lfixsfdi_shifted
1436
1437 .Lfixsfdi_nan_or_inf:
1438         /* Handle Infinity and NaN.  */
1439         slli    a4, a2, 9
1440         beqz    a4, .Lfixsfdi_maxint
1441
1442         /* Translate NaN to +maxint.  */
1443         movi    a2, 0
1444
1445 .Lfixsfdi_maxint:
1446         slli    a7, a6, 8       /* 0x80000000 */
1447         bgez    a2, 1f
1448         mov     xh, a7
1449         movi    xl, 0
1450         leaf_return
1451
1452 1:      addi    xh, a7, -1      /* 0x7fffffff */
1453         movi    xl, -1
1454         leaf_return
1455
1456 .Lfixsfdi_zero:
1457         movi    xh, 0
1458         movi    xl, 0
1459         leaf_return
1460
1461 #endif /* L_fixsfdi */
1462
1463 #ifdef L_fixunssfsi
1464
1465         .align  4
1466         .global __fixunssfsi
1467         .type   __fixunssfsi, @function
1468 __fixunssfsi:
1469         leaf_entry sp, 16
1470
1471         /* Check for NaN and Infinity.  */
1472         movi    a6, 0x7f800000
1473         ball    a2, a6, .Lfixunssfsi_nan_or_inf
1474
1475         /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
1476         extui   a4, a2, 23, 8
1477         addi    a4, a4, -0x7f
1478         bgei    a4, 32, .Lfixunssfsi_maxint
1479         bltz    a4, .Lfixunssfsi_zero
1480
1481         /* Add explicit "1.0" and shift << 8.  */
1482         or      a7, a2, a6
1483         slli    a5, a7, 8
1484
1485         /* Shift back to the right, based on the exponent.  */
1486         addi    a4, a4, 1
1487         beqi    a4, 32, .Lfixunssfsi_bigexp
1488         ssl     a4              /* shift by 32 - a4 */
1489         srl     a5, a5
1490
1491         /* Negate the result if sign != 0.  */
1492         neg     a2, a5
1493         movgez  a2, a5, a7
1494         leaf_return
1495
1496 .Lfixunssfsi_nan_or_inf:
1497         /* Handle Infinity and NaN.  */
1498         slli    a4, a2, 9
1499         beqz    a4, .Lfixunssfsi_maxint
1500
1501         /* Translate NaN to 0xffffffff.  */
1502         movi    a2, -1
1503         leaf_return
1504
1505 .Lfixunssfsi_maxint:
1506         slli    a4, a6, 8       /* 0x80000000 */
1507         movi    a5, -1          /* 0xffffffff */
1508         movgez  a4, a5, a2
1509         mov     a2, a4
1510         leaf_return
1511
1512 .Lfixunssfsi_zero:
1513         movi    a2, 0
1514         leaf_return
1515
1516 .Lfixunssfsi_bigexp:
1517         /* Handle unsigned maximum exponent case.  */
1518         bltz    a2, 1f
1519         mov     a2, a5          /* no shift needed */
1520         leaf_return
1521
1522         /* Return 0x80000000 if negative.  */
1523 1:      slli    a2, a6, 8
1524         leaf_return
1525
1526 #endif /* L_fixunssfsi */
1527
1528 #ifdef L_fixunssfdi
1529
1530         .align  4
1531         .global __fixunssfdi
1532         .type   __fixunssfdi, @function
1533 __fixunssfdi:
1534         leaf_entry sp, 16
1535
1536         /* Check for NaN and Infinity.  */
1537         movi    a6, 0x7f800000
1538         ball    a2, a6, .Lfixunssfdi_nan_or_inf
1539
1540         /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
1541         extui   a4, a2, 23, 8
1542         addi    a4, a4, -0x7f
1543         bgei    a4, 64, .Lfixunssfdi_maxint
1544         bltz    a4, .Lfixunssfdi_zero
1545
1546         /* Add explicit "1.0" and shift << 8.  */
1547         or      a7, a2, a6
1548         slli    xh, a7, 8
1549
1550         /* Shift back to the right, based on the exponent.  */
1551         addi    a4, a4, 1
1552         beqi    a4, 64, .Lfixunssfdi_bigexp
1553         ssl     a4              /* shift by 64 - a4 */
1554         bgei    a4, 32, .Lfixunssfdi_smallshift
1555         srl     xl, xh
1556         movi    xh, 0
1557
1558 .Lfixunssfdi_shifted:
1559         /* Negate the result if sign != 0.  */
1560         bgez    a7, 1f
1561         neg     xl, xl
1562         neg     xh, xh
1563         beqz    xl, 1f
1564         addi    xh, xh, -1
1565 1:      leaf_return
1566
1567 .Lfixunssfdi_smallshift:
1568         movi    xl, 0
1569         src     xl, xh, xl
1570         srl     xh, xh
1571         j       .Lfixunssfdi_shifted
1572
1573 .Lfixunssfdi_nan_or_inf:
1574         /* Handle Infinity and NaN.  */
1575         slli    a4, a2, 9
1576         beqz    a4, .Lfixunssfdi_maxint
1577
1578         /* Translate NaN to 0xffffffff.... */
1579 1:      movi    xh, -1
1580         movi    xl, -1
1581         leaf_return
1582
1583 .Lfixunssfdi_maxint:
1584         bgez    a2, 1b
1585 2:      slli    xh, a6, 8       /* 0x80000000 */
1586         movi    xl, 0
1587         leaf_return
1588
1589 .Lfixunssfdi_zero:
1590         movi    xh, 0
1591         movi    xl, 0
1592         leaf_return
1593
1594 .Lfixunssfdi_bigexp:
1595         /* Handle unsigned maximum exponent case.  */
1596         bltz    a7, 2b
1597         movi    xl, 0
1598         leaf_return             /* no shift needed */
1599
1600 #endif /* L_fixunssfdi */
1601
1602 #ifdef L_floatsisf
1603
1604         .align  4
1605         .global __floatunsisf
1606         .type   __floatunsisf, @function
1607 __floatunsisf:
1608         leaf_entry sp, 16
1609         beqz    a2, .Lfloatsisf_return
1610
1611         /* Set the sign to zero and jump to the floatsisf code.  */
1612         movi    a7, 0
1613         j       .Lfloatsisf_normalize
1614
1615         .align  4
1616         .global __floatsisf
1617         .type   __floatsisf, @function
1618 __floatsisf:
1619         leaf_entry sp, 16
1620
1621         /* Check for zero.  */
1622         beqz    a2, .Lfloatsisf_return
1623
1624         /* Save the sign.  */
1625         extui   a7, a2, 31, 1
1626
1627         /* Get the absolute value.  */
1628 #if XCHAL_HAVE_ABS
1629         abs     a2, a2
1630 #else
1631         neg     a4, a2
1632         movltz  a2, a4, a2
1633 #endif
1634
1635 .Lfloatsisf_normalize:
1636         /* Normalize with the first 1 bit in the msb.  */
1637         do_nsau a4, a2, a5, a6
1638         ssl     a4
1639         sll     a5, a2
1640
1641         /* Shift the mantissa into position, with rounding bits in a6.  */
1642         srli    a2, a5, 8
1643         slli    a6, a5, (32 - 8)
1644
1645         /* Set the exponent.  */
1646         movi    a5, 0x9d        /* 0x7e + 31 */
1647         sub     a5, a5, a4
1648         slli    a5, a5, 23
1649         add     a2, a2, a5
1650
1651         /* Add the sign.  */
1652         slli    a7, a7, 31
1653         or      a2, a2, a7
1654
1655         /* Round up if the leftover fraction is >= 1/2.  */
1656         bgez    a6, .Lfloatsisf_return
1657         addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
1658
1659         /* Check if the leftover fraction is exactly 1/2.  */
1660         slli    a6, a6, 1
1661         beqz    a6, .Lfloatsisf_exactlyhalf
1662
1663 .Lfloatsisf_return:
1664         leaf_return
1665
1666 .Lfloatsisf_exactlyhalf:
1667         /* Round down to the nearest even value.  */
1668         srli    a2, a2, 1
1669         slli    a2, a2, 1
1670         leaf_return
1671
1672 #endif /* L_floatsisf */
1673
1674 #ifdef L_floatdisf
1675
1676         .align  4
1677         .global __floatundisf
1678         .type   __floatundisf, @function
1679 __floatundisf:
1680         leaf_entry sp, 16
1681
1682         /* Check for zero.  */
1683         or      a4, xh, xl
1684         beqz    a4, 2f
1685
1686         /* Set the sign to zero and jump to the floatdisf code.  */
1687         movi    a7, 0
1688         j       .Lfloatdisf_normalize
1689
1690         .align  4
1691         .global __floatdisf
1692         .type   __floatdisf, @function
1693 __floatdisf:
1694         leaf_entry sp, 16
1695
1696         /* Check for zero.  */
1697         or      a4, xh, xl
1698         beqz    a4, 2f
1699
1700         /* Save the sign.  */
1701         extui   a7, xh, 31, 1
1702
1703         /* Get the absolute value.  */
1704         bgez    xh, .Lfloatdisf_normalize
1705         neg     xl, xl
1706         neg     xh, xh
1707         beqz    xl, .Lfloatdisf_normalize
1708         addi    xh, xh, -1
1709
1710 .Lfloatdisf_normalize:
1711         /* Normalize with the first 1 bit in the msb of xh.  */
1712         beqz    xh, .Lfloatdisf_bigshift
1713         do_nsau a4, xh, a5, a6
1714         ssl     a4
1715         src     xh, xh, xl
1716         sll     xl, xl
1717
1718 .Lfloatdisf_shifted:
1719         /* Shift the mantissa into position, with rounding bits in a6.  */
1720         ssai    8
1721         sll     a5, xl
1722         src     a6, xh, xl
1723         srl     xh, xh
1724         beqz    a5, 1f
1725         movi    a5, 1
1726         or      a6, a6, a5
1727 1:
1728         /* Set the exponent.  */
1729         movi    a5, 0xbd        /* 0x7e + 63 */
1730         sub     a5, a5, a4
1731         slli    a5, a5, 23
1732         add     a2, xh, a5
1733
1734         /* Add the sign.  */
1735         slli    a7, a7, 31
1736         or      a2, a2, a7
1737
1738         /* Round up if the leftover fraction is >= 1/2.  */
1739         bgez    a6, 2f
1740         addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
1741
1742         /* Check if the leftover fraction is exactly 1/2.  */
1743         slli    a6, a6, 1
1744         beqz    a6, .Lfloatdisf_exactlyhalf
1745 2:      leaf_return
1746
1747 .Lfloatdisf_bigshift:
1748         /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
1749         do_nsau a4, xl, a5, a6
1750         ssl     a4
1751         sll     xh, xl
1752         movi    xl, 0
1753         addi    a4, a4, 32
1754         j       .Lfloatdisf_shifted
1755
1756 .Lfloatdisf_exactlyhalf:
1757         /* Round down to the nearest even value.  */
1758         srli    a2, a2, 1
1759         slli    a2, a2, 1
1760         leaf_return
1761
1762 #endif /* L_floatdisf */