gcc/config/xtensa/ieee754-sf.S

   1 /* IEEE-754 single-precision functions for Xtensa
   2    Copyright (C) 2006 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    In addition to the permissions in the GNU General Public License,
  13    the Free Software Foundation gives you unlimited permission to link
  14    the compiled version of this file into combinations with other
  15    programs, and to distribute those combinations without any
  16    restriction coming from the use of this file.  (The General Public
  17    License restrictions do apply in other respects; for example, they
  18    cover modification of the file, and distribution when not linked
  19    into a combine executable.)
  20
  21    GCC is distributed in the hope that it will be useful, but WITHOUT
  22    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  23    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  24    License for more details.
  25
  26    You should have received a copy of the GNU General Public License
  27    along with GCC; see the file COPYING.  If not, write to the Free
  28    Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  29    02110-1301, USA.  */
  30
  31 #ifdef __XTENSA_EB__
  32 #define xh a2
  33 #define xl a3
  34 #define yh a4
  35 #define yl a5
  36 #else
  37 #define xh a3
  38 #define xl a2
  39 #define yh a5
  40 #define yl a4
  41 #endif
  42
  43 /*  Warning!  The branch displacements for some Xtensa branch instructions
  44     are quite small, and this code has been carefully laid out to keep
  45     branch targets in range.  If you change anything, be sure to check that
  46     the assembler is not relaxing anything to branch over a jump.  */
  47
  48 #ifdef L_negsf2
  49
  50         .align  4
  51         .global __negsf2
  52         .type   __negsf2, @function
  53 __negsf2:
  54         abi_entry sp, 32
  55         movi    a4, 0x80000000
  56         xor     a2, a2, a4
  57         abi_return
  58
  59 #endif /* L_negsf2 */
  60
  61 #ifdef L_addsubsf3
  62
  63         /* Addition */
  64 __addsf3_aux:
  65
  66         /* Handle NaNs and Infinities.  (This code is placed before the
  67            start of the function just to keep it in range of the limited
  68            branch displacements.)  */
  69
  70 .Ladd_xnan_or_inf:
  71         /* If y is neither Infinity nor NaN, return x.  */
  72         bnall   a3, a6, 1f
  73         /* If x is a NaN, return it.  Otherwise, return y.  */
  74         slli    a7, a2, 9
  75         beqz    a7, .Ladd_ynan_or_inf
  76 1:      abi_return
  77
  78 .Ladd_ynan_or_inf:
  79         /* Return y.  */
  80         mov     a2, a3
  81         abi_return
  82
  83 .Ladd_opposite_signs:
  84         /* Operand signs differ.  Do a subtraction.  */
  85         slli    a7, a6, 8
  86         xor     a3, a3, a7
  87         j       .Lsub_same_sign
  88
  89         .align  4
  90         .global __addsf3
  91         .type   __addsf3, @function
  92 __addsf3:
  93         abi_entry sp, 32
  94         movi    a6, 0x7f800000
  95
  96         /* Check if the two operands have the same sign.  */
  97         xor     a7, a2, a3
  98         bltz    a7, .Ladd_opposite_signs
  99
 100 .Ladd_same_sign:
 101         /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
 102         ball    a2, a6, .Ladd_xnan_or_inf
 103         ball    a3, a6, .Ladd_ynan_or_inf
 104
 105         /* Compare the exponents.  The smaller operand will be shifted
 106            right by the exponent difference and added to the larger
 107            one.  */
 108         extui   a7, a2, 23, 9
 109         extui   a8, a3, 23, 9
 110         bltu    a7, a8, .Ladd_shiftx
 111
 112 .Ladd_shifty:
 113         /* Check if the smaller (or equal) exponent is zero.  */
 114         bnone   a3, a6, .Ladd_yexpzero
 115
 116         /* Replace y sign/exponent with 0x008.  */
 117         or      a3, a3, a6
 118         slli    a3, a3, 8
 119         srli    a3, a3, 8
 120
 121 .Ladd_yexpdiff:
 122         /* Compute the exponent difference.  */
 123         sub     a10, a7, a8
 124
 125         /* Exponent difference > 32 -- just return the bigger value.  */
 126         bgeui   a10, 32, 1f
 127
 128         /* Shift y right by the exponent difference.  Any bits that are
 129            shifted out of y are saved in a9 for rounding the result.  */
 130         ssr     a10
 131         movi    a9, 0
 132         src     a9, a3, a9
 133         srl     a3, a3
 134
 135         /* Do the addition.  */
 136         add     a2, a2, a3
 137
 138         /* Check if the add overflowed into the exponent.  */
 139         extui   a10, a2, 23, 9
 140         beq     a10, a7, .Ladd_round
 141         mov     a8, a7
 142         j       .Ladd_carry
 143
 144 .Ladd_yexpzero:
 145         /* y is a subnormal value.  Replace its sign/exponent with zero,
 146            i.e., no implicit "1.0", and increment the apparent exponent
 147            because subnormals behave as if they had the minimum (nonzero)
 148            exponent.  Test for the case when both exponents are zero.  */
 149         slli    a3, a3, 9
 150         srli    a3, a3, 9
 151         bnone   a2, a6, .Ladd_bothexpzero
 152         addi    a8, a8, 1
 153         j       .Ladd_yexpdiff
 154
 155 .Ladd_bothexpzero:
 156         /* Both exponents are zero.  Handle this as a special case.  There
 157            is no need to shift or round, and the normal code for handling
 158            a carry into the exponent field will not work because it
 159            assumes there is an implicit "1.0" that needs to be added.  */
 160         add     a2, a2, a3
 161 1:      abi_return
 162
 163 .Ladd_xexpzero:
 164         /* Same as "yexpzero" except skip handling the case when both
 165            exponents are zero.  */
 166         slli    a2, a2, 9
 167         srli    a2, a2, 9
 168         addi    a7, a7, 1
 169         j       .Ladd_xexpdiff
 170
 171 .Ladd_shiftx:
 172         /* Same thing as the "shifty" code, but with x and y swapped.  Also,
 173            because the exponent difference is always nonzero in this version,
 174            the shift sequence can use SLL and skip loading a constant zero.  */
 175         bnone   a2, a6, .Ladd_xexpzero
 176
 177         or      a2, a2, a6
 178         slli    a2, a2, 8
 179         srli    a2, a2, 8
 180
 181 .Ladd_xexpdiff:
 182         sub     a10, a8, a7
 183         bgeui   a10, 32, .Ladd_returny
 184
 185         ssr     a10
 186         sll     a9, a2
 187         srl     a2, a2
 188
 189         add     a2, a2, a3
 190
 191         /* Check if the add overflowed into the exponent.  */
 192         extui   a10, a2, 23, 9
 193         bne     a10, a8, .Ladd_carry
 194
 195 .Ladd_round:
 196         /* Round up if the leftover fraction is >= 1/2.  */
 197         bgez    a9, 1f
 198         addi    a2, a2, 1
 199
 200         /* Check if the leftover fraction is exactly 1/2.  */
 201         slli    a9, a9, 1
 202         beqz    a9, .Ladd_exactlyhalf
 203 1:      abi_return
 204
 205 .Ladd_returny:
 206         mov     a2, a3
 207         abi_return
 208
 209 .Ladd_carry:
 210         /* The addition has overflowed into the exponent field, so the
 211            value needs to be renormalized.  The mantissa of the result
 212            can be recovered by subtracting the original exponent and
 213            adding 0x800000 (which is the explicit "1.0" for the
 214            mantissa of the non-shifted operand -- the "1.0" for the
 215            shifted operand was already added).  The mantissa can then
 216            be shifted right by one bit.  The explicit "1.0" of the
 217            shifted mantissa then needs to be replaced by the exponent,
 218            incremented by one to account for the normalizing shift.
 219            It is faster to combine these operations: do the shift first
 220            and combine the additions and subtractions.  If x is the
 221            original exponent, the result is:
 222                shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
 223            or:
 224                shifted mantissa + ((x + 1) << 22)
 225            Note that the exponent is incremented here by leaving the
 226            explicit "1.0" of the mantissa in the exponent field.  */
 227
 228         /* Shift x right by one bit.  Save the lsb.  */
 229         mov     a10, a2
 230         srli    a2, a2, 1
 231
 232         /* See explanation above.  The original exponent is in a8.  */
 233         addi    a8, a8, 1
 234         slli    a8, a8, 22
 235         add     a2, a2, a8
 236
 237         /* Return an Infinity if the exponent overflowed.  */
 238         ball    a2, a6, .Ladd_infinity
 239
 240         /* Same thing as the "round" code except the msb of the leftover
 241            fraction is bit 0 of a10, with the rest of the fraction in a9.  */
 242         bbci.l  a10, 0, 1f
 243         addi    a2, a2, 1
 244         beqz    a9, .Ladd_exactlyhalf
 245 1:      abi_return
 246
 247 .Ladd_infinity:
 248         /* Clear the mantissa.  */
 249         srli    a2, a2, 23
 250         slli    a2, a2, 23
 251
 252         /* The sign bit may have been lost in a carry-out.  Put it back.  */
 253         slli    a8, a8, 1
 254         or      a2, a2, a8
 255         abi_return
 256
 257 .Ladd_exactlyhalf:
 258         /* Round down to the nearest even value.  */
 259         srli    a2, a2, 1
 260         slli    a2, a2, 1
 261         abi_return
 262
 263
 264         /* Subtraction */
 265 __subsf3_aux:
 266
 267         /* Handle NaNs and Infinities.  (This code is placed before the
 268            start of the function just to keep it in range of the limited
 269            branch displacements.)  */
 270
 271 .Lsub_xnan_or_inf:
 272         /* If y is neither Infinity nor NaN, return x.  */
 273         bnall   a3, a6, 1f
 274         /* Both x and y are either NaN or Inf, so the result is NaN.  */
 275         movi    a4, 0x400000    /* make it a quiet NaN */
 276         or      a2, a2, a4
 277 1:      abi_return
 278
 279 .Lsub_ynan_or_inf:
 280         /* Negate y and return it.  */
 281         slli    a7, a6, 8
 282         xor     a2, a3, a7
 283         abi_return
 284
 285 .Lsub_opposite_signs:
 286         /* Operand signs differ.  Do an addition.  */
 287         slli    a7, a6, 8
 288         xor     a3, a3, a7
 289         j       .Ladd_same_sign
 290
 291         .align  4
 292         .global __subsf3
 293         .type   __subsf3, @function
 294 __subsf3:
 295         abi_entry sp, 32
 296         movi    a6, 0x7f800000
 297
 298         /* Check if the two operands have the same sign.  */
 299         xor     a7, a2, a3
 300         bltz    a7, .Lsub_opposite_signs
 301
 302 .Lsub_same_sign:
 303         /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
 304         ball    a2, a6, .Lsub_xnan_or_inf
 305         ball    a3, a6, .Lsub_ynan_or_inf
 306
 307         /* Compare the operands.  In contrast to addition, the entire
 308            value matters here.  */
 309         extui   a7, a2, 23, 8
 310         extui   a8, a3, 23, 8
 311         bltu    a2, a3, .Lsub_xsmaller
 312
 313 .Lsub_ysmaller:
 314         /* Check if the smaller (or equal) exponent is zero.  */
 315         bnone   a3, a6, .Lsub_yexpzero
 316
 317         /* Replace y sign/exponent with 0x008.  */
 318         or      a3, a3, a6
 319         slli    a3, a3, 8
 320         srli    a3, a3, 8
 321
 322 .Lsub_yexpdiff:
 323         /* Compute the exponent difference.  */
 324         sub     a10, a7, a8
 325
 326         /* Exponent difference > 32 -- just return the bigger value.  */
 327         bgeui   a10, 32, 1f
 328
 329         /* Shift y right by the exponent difference.  Any bits that are
 330            shifted out of y are saved in a9 for rounding the result.  */
 331         ssr     a10
 332         movi    a9, 0
 333         src     a9, a3, a9
 334         srl     a3, a3
 335
 336         sub     a2, a2, a3
 337
 338         /* Subtract the leftover bits in a9 from zero and propagate any
 339            borrow from a2.  */
 340         neg     a9, a9
 341         addi    a10, a2, -1
 342         movnez  a2, a10, a9
 343
 344         /* Check if the subtract underflowed into the exponent.  */
 345         extui   a10, a2, 23, 8
 346         beq     a10, a7, .Lsub_round
 347         j       .Lsub_borrow
 348
 349 .Lsub_yexpzero:
 350         /* Return zero if the inputs are equal.  (For the non-subnormal
 351            case, subtracting the "1.0" will cause a borrow from the exponent
 352            and this case can be detected when handling the borrow.)  */
 353         beq     a2, a3, .Lsub_return_zero
 354
 355         /* y is a subnormal value.  Replace its sign/exponent with zero,
 356            i.e., no implicit "1.0".  Unless x is also a subnormal, increment
 357            y's apparent exponent because subnormals behave as if they had
 358            the minimum (nonzero) exponent.  */
 359         slli    a3, a3, 9
 360         srli    a3, a3, 9
 361         bnone   a2, a6, .Lsub_yexpdiff
 362         addi    a8, a8, 1
 363         j       .Lsub_yexpdiff
 364
 365 .Lsub_returny:
 366         /* Negate and return y.  */
 367         slli    a7, a6, 8
 368         xor     a2, a3, a7
 369 1:      abi_return
 370
 371 .Lsub_xsmaller:
 372         /* Same thing as the "ysmaller" code, but with x and y swapped and
 373            with y negated.  */
 374         bnone   a2, a6, .Lsub_xexpzero
 375
 376         or      a2, a2, a6
 377         slli    a2, a2, 8
 378         srli    a2, a2, 8
 379
 380 .Lsub_xexpdiff:
 381         sub     a10, a8, a7
 382         bgeui   a10, 32, .Lsub_returny
 383
 384         ssr     a10
 385         movi    a9, 0
 386         src     a9, a2, a9
 387         srl     a2, a2
 388
 389         /* Negate y.  */
 390         slli    a11, a6, 8
 391         xor     a3, a3, a11
 392
 393         sub     a2, a3, a2
 394
 395         neg     a9, a9
 396         addi    a10, a2, -1
 397         movnez  a2, a10, a9
 398
 399         /* Check if the subtract underflowed into the exponent.  */
 400         extui   a10, a2, 23, 8
 401         bne     a10, a8, .Lsub_borrow
 402
 403 .Lsub_round:
 404         /* Round up if the leftover fraction is >= 1/2.  */
 405         bgez    a9, 1f
 406         addi    a2, a2, 1
 407
 408         /* Check if the leftover fraction is exactly 1/2.  */
 409         slli    a9, a9, 1
 410         beqz    a9, .Lsub_exactlyhalf
 411 1:      abi_return
 412
 413 .Lsub_xexpzero:
 414         /* Same as "yexpzero".  */
 415         beq     a2, a3, .Lsub_return_zero
 416         slli    a2, a2, 9
 417         srli    a2, a2, 9
 418         bnone   a3, a6, .Lsub_xexpdiff
 419         addi    a7, a7, 1
 420         j       .Lsub_xexpdiff
 421
 422 .Lsub_return_zero:
 423         movi    a2, 0
 424         abi_return
 425
 426 .Lsub_borrow:
 427         /* The subtraction has underflowed into the exponent field, so the
 428            value needs to be renormalized.  Shift the mantissa left as
 429            needed to remove any leading zeros and adjust the exponent
 430            accordingly.  If the exponent is not large enough to remove
 431            all the leading zeros, the result will be a subnormal value.  */
 432
 433         slli    a8, a2, 9
 434         beqz    a8, .Lsub_xzero
 435         do_nsau a6, a8, a7, a11
 436         srli    a8, a8, 9
 437         bge     a6, a10, .Lsub_subnormal
 438         addi    a6, a6, 1
 439
 440 .Lsub_normalize_shift:
 441         /* Shift the mantissa (a8/a9) left by a6.  */
 442         ssl     a6
 443         src     a8, a8, a9
 444         sll     a9, a9
 445
 446         /* Combine the shifted mantissa with the sign and exponent,
 447            decrementing the exponent by a6.  (The exponent has already
 448            been decremented by one due to the borrow from the subtraction,
 449            but adding the mantissa will increment the exponent by one.)  */
 450         srli    a2, a2, 23
 451         sub     a2, a2, a6
 452         slli    a2, a2, 23
 453         add     a2, a2, a8
 454         j       .Lsub_round
 455
 456 .Lsub_exactlyhalf:
 457         /* Round down to the nearest even value.  */
 458         srli    a2, a2, 1
 459         slli    a2, a2, 1
 460         abi_return
 461
 462 .Lsub_xzero:
 463         /* If there was a borrow from the exponent, and the mantissa and
 464            guard digits are all zero, then the inputs were equal and the
 465            result should be zero.  */
 466         beqz    a9, .Lsub_return_zero
 467
 468         /* Only the guard digit is nonzero.  Shift by min(24, a10).  */
 469         addi    a11, a10, -24
 470         movi    a6, 24
 471         movltz  a6, a10, a11
 472         j       .Lsub_normalize_shift
 473
 474 .Lsub_subnormal:
 475         /* The exponent is too small to shift away all the leading zeros.
 476            Set a6 to the current exponent (which has already been
 477            decremented by the borrow) so that the exponent of the result
 478            will be zero.  Do not add 1 to a6 in this case, because: (1)
 479            adding the mantissa will not increment the exponent, so there is
 480            no need to subtract anything extra from the exponent to
 481            compensate, and (2) the effective exponent of a subnormal is 1
 482            not 0 so the shift amount must be 1 smaller than normal. */
 483         mov     a6, a10
 484         j       .Lsub_normalize_shift
 485
 486 #endif /* L_addsubsf3 */
 487
 488 #ifdef L_mulsf3
 489
 490         /* Multiplication */
 491 __mulsf3_aux:
 492
 493         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
 494            (This code is placed before the start of the function just to
 495            keep it in range of the limited branch displacements.)  */
 496
 497 .Lmul_xexpzero:
 498         /* Clear the sign bit of x.  */
 499         slli    a2, a2, 1
 500         srli    a2, a2, 1
 501
 502         /* If x is zero, return zero.  */
 503         beqz    a2, .Lmul_return_zero
 504
 505         /* Normalize x.  Adjust the exponent in a8.  */
 506         do_nsau a10, a2, a11, a12
 507         addi    a10, a10, -8
 508         ssl     a10
 509         sll     a2, a2
 510         movi    a8, 1
 511         sub     a8, a8, a10
 512         j       .Lmul_xnormalized
 513
 514 .Lmul_yexpzero:
 515         /* Clear the sign bit of y.  */
 516         slli    a3, a3, 1
 517         srli    a3, a3, 1
 518
 519         /* If y is zero, return zero.  */
 520         beqz    a3, .Lmul_return_zero
 521
 522         /* Normalize y.  Adjust the exponent in a9.  */
 523         do_nsau a10, a3, a11, a12
 524         addi    a10, a10, -8
 525         ssl     a10
 526         sll     a3, a3
 527         movi    a9, 1
 528         sub     a9, a9, a10
 529         j       .Lmul_ynormalized
 530
 531 .Lmul_return_zero:
 532         /* Return zero with the appropriate sign bit.  */
 533         srli    a2, a7, 31
 534         slli    a2, a2, 31
 535         j       .Lmul_done
 536
 537 .Lmul_xnan_or_inf:
 538         /* If y is zero, return NaN.  */
 539         slli    a8, a3, 1
 540         bnez    a8, 1f
 541         movi    a4, 0x400000    /* make it a quiet NaN */
 542         or      a2, a2, a4
 543         j       .Lmul_done
 544 1:
 545         /* If y is NaN, return y.  */
 546         bnall   a3, a6, .Lmul_returnx
 547         slli    a8, a3, 9
 548         beqz    a8, .Lmul_returnx
 549
 550 .Lmul_returny:
 551         mov     a2, a3
 552
 553 .Lmul_returnx:
 554         /* Set the sign bit and return.  */
 555         extui   a7, a7, 31, 1
 556         slli    a2, a2, 1
 557         ssai    1
 558         src     a2, a7, a2
 559         j       .Lmul_done
 560
 561 .Lmul_ynan_or_inf:
 562         /* If x is zero, return NaN.  */
 563         slli    a8, a2, 1
 564         bnez    a8, .Lmul_returny
 565         movi    a7, 0x400000    /* make it a quiet NaN */
 566         or      a2, a3, a7
 567         j       .Lmul_done
 568
 569         .align  4
 570         .global __mulsf3
 571         .type   __mulsf3, @function
 572 __mulsf3:
 573         abi_entry sp, 48
 574 #if __XTENSA_CALL0_ABI__
 575         addi    sp, sp, -32
 576         s32i    a12, sp, 16
 577         s32i    a13, sp, 20
 578         s32i    a14, sp, 24
 579         s32i    a15, sp, 28
 580 #endif
 581         movi    a6, 0x7f800000
 582
 583         /* Get the sign of the result.  */
 584         xor     a7, a2, a3
 585
 586         /* Check for NaN and infinity.  */
 587         ball    a2, a6, .Lmul_xnan_or_inf
 588         ball    a3, a6, .Lmul_ynan_or_inf
 589
 590         /* Extract the exponents.  */
 591         extui   a8, a2, 23, 8
 592         extui   a9, a3, 23, 8
 593
 594         beqz    a8, .Lmul_xexpzero
 595 .Lmul_xnormalized:
 596         beqz    a9, .Lmul_yexpzero
 597 .Lmul_ynormalized:
 598
 599         /* Add the exponents.  */
 600         add     a8, a8, a9
 601
 602         /* Replace sign/exponent fields with explicit "1.0".  */
 603         movi    a10, 0xffffff
 604         or      a2, a2, a6
 605         and     a2, a2, a10
 606         or      a3, a3, a6
 607         and     a3, a3, a10
 608
 609         /* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
 610
 611 #if XCHAL_HAVE_MUL32_HIGH
 612
 613         mull    a6, a2, a3
 614         muluh   a2, a2, a3
 615
 616 #else
 617
 618         /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
 619            products.  These partial products are:
 620
 621                 0 xl * yl
 622
 623                 1 xl * yh
 624                 2 xh * yl
 625
 626                 3 xh * yh
 627
 628            If using the Mul16 or Mul32 multiplier options, these input
 629            chunks must be stored in separate registers.  For Mac16, the
 630            UMUL.AA.* opcodes can specify that the inputs come from either
 631            half of the registers, so there is no need to shift them out
 632            ahead of time.  If there is no multiply hardware, the 16-bit
 633            chunks can be extracted when setting up the arguments to the
 634            separate multiply function.  */
 635
 636 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 637         /* Calling a separate multiply function will clobber a0 and requires
 638            use of a8 as a temporary, so save those values now.  (The function
 639            uses a custom ABI so nothing else needs to be saved.)  */
 640         s32i    a0, sp, 0
 641         s32i    a8, sp, 4
 642 #endif
 643
 644 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
 645
 646 #define a2h a4
 647 #define a3h a5
 648
 649         /* Get the high halves of the inputs into registers.  */
 650         srli    a2h, a2, 16
 651         srli    a3h, a3, 16
 652
 653 #define a2l a2
 654 #define a3l a3
 655
 656 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
 657         /* Clear the high halves of the inputs.  This does not matter
 658            for MUL16 because the high bits are ignored.  */
 659         extui   a2, a2, 0, 16
 660         extui   a3, a3, 0, 16
 661 #endif
 662 #endif /* MUL16 || MUL32 */
 663
 664
 665 #if XCHAL_HAVE_MUL16
 666
 667 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 668         mul16u  dst, xreg ## xhalf, yreg ## yhalf
 669
 670 #elif XCHAL_HAVE_MUL32
 671
 672 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 673         mull    dst, xreg ## xhalf, yreg ## yhalf
 674
 675 #elif XCHAL_HAVE_MAC16
 676
 677 /* The preprocessor insists on inserting a space when concatenating after
 678    a period in the definition of do_mul below.  These macros are a workaround
 679    using underscores instead of periods when doing the concatenation.  */
 680 #define umul_aa_ll umul.aa.ll
 681 #define umul_aa_lh umul.aa.lh
 682 #define umul_aa_hl umul.aa.hl
 683 #define umul_aa_hh umul.aa.hh
 684
 685 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 686         umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
 687         rsr     dst, ACCLO
 688
 689 #else /* no multiply hardware */
 690
 691 #define set_arg_l(dst, src) \
 692         extui   dst, src, 0, 16
 693 #define set_arg_h(dst, src) \
 694         srli    dst, src, 16
 695
 696 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 697         set_arg_ ## xhalf (a13, xreg); \
 698         set_arg_ ## yhalf (a14, yreg); \
 699         call0   .Lmul_mulsi3; \
 700         mov     dst, a12
 701 #endif
 702
 703         /* Add pp1 and pp2 into a6 with carry-out in a9.  */
 704         do_mul(a6, a2, l, a3, h)        /* pp 1 */
 705         do_mul(a11, a2, h, a3, l)       /* pp 2 */
 706         movi    a9, 0
 707         add     a6, a6, a11
 708         bgeu    a6, a11, 1f
 709         addi    a9, a9, 1
 710 1:
 711         /* Shift the high half of a9/a6 into position in a9.  Note that
 712            this value can be safely incremented without any carry-outs.  */
 713         ssai    16
 714         src     a9, a9, a6
 715
 716         /* Compute the low word into a6.  */
 717         do_mul(a11, a2, l, a3, l)       /* pp 0 */
 718         sll     a6, a6
 719         add     a6, a6, a11
 720         bgeu    a6, a11, 1f
 721         addi    a9, a9, 1
 722 1:
 723         /* Compute the high word into a2.  */
 724         do_mul(a2, a2, h, a3, h)        /* pp 3 */
 725         add     a2, a2, a9
 726
 727 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 728         /* Restore values saved on the stack during the multiplication.  */
 729         l32i    a0, sp, 0
 730         l32i    a8, sp, 4
 731 #endif
 732 #endif
 733
 734         /* Shift left by 9 bits, unless there was a carry-out from the
 735            multiply, in which case, shift by 8 bits and increment the
 736            exponent.  */
 737         movi    a4, 9
 738         srli    a5, a2, 24 - 9
 739         beqz    a5, 1f
 740         addi    a4, a4, -1
 741         addi    a8, a8, 1
 742 1:      ssl     a4
 743         src     a2, a2, a6
 744         sll     a6, a6
 745
 746         /* Subtract the extra bias from the exponent sum (plus one to account
 747            for the explicit "1.0" of the mantissa that will be added to the
 748            exponent in the final result).  */
 749         movi    a4, 0x80
 750         sub     a8, a8, a4
 751
 752         /* Check for over/underflow.  The value in a8 is one less than the
 753            final exponent, so values in the range 0..fd are OK here.  */
 754         movi    a4, 0xfe
 755         bgeu    a8, a4, .Lmul_overflow
 756
 757 .Lmul_round:
 758         /* Round.  */
 759         bgez    a6, .Lmul_rounded
 760         addi    a2, a2, 1
 761         slli    a6, a6, 1
 762         beqz    a6, .Lmul_exactlyhalf
 763
 764 .Lmul_rounded:
 765         /* Add the exponent to the mantissa.  */
 766         slli    a8, a8, 23
 767         add     a2, a2, a8
 768
 769 .Lmul_addsign:
 770         /* Add the sign bit.  */
 771         srli    a7, a7, 31
 772         slli    a7, a7, 31
 773         or      a2, a2, a7
 774
 775 .Lmul_done:
 776 #if __XTENSA_CALL0_ABI__
 777         l32i    a12, sp, 16
 778         l32i    a13, sp, 20
 779         l32i    a14, sp, 24
 780         l32i    a15, sp, 28
 781         addi    sp, sp, 32
 782 #endif
 783         abi_return
 784
 785 .Lmul_exactlyhalf:
 786         /* Round down to the nearest even value.  */
 787         srli    a2, a2, 1
 788         slli    a2, a2, 1
 789         j       .Lmul_rounded
 790
 791 .Lmul_overflow:
 792         bltz    a8, .Lmul_underflow
 793         /* Return +/- Infinity.  */
 794         movi    a8, 0xff
 795         slli    a2, a8, 23
 796         j       .Lmul_addsign
 797
 798 .Lmul_underflow:
 799         /* Create a subnormal value, where the exponent field contains zero,
 800            but the effective exponent is 1.  The value of a8 is one less than
 801            the actual exponent, so just negate it to get the shift amount.  */
 802         neg     a8, a8
 803         mov     a9, a6
 804         ssr     a8
 805         bgeui   a8, 32, .Lmul_flush_to_zero
 806
 807         /* Shift a2 right.  Any bits that are shifted out of a2 are saved
 808            in a6 (combined with the shifted-out bits currently in a6) for
 809            rounding the result.  */
 810         sll     a6, a2
 811         srl     a2, a2
 812
 813         /* Set the exponent to zero.  */
 814         movi    a8, 0
 815
 816         /* Pack any nonzero bits shifted out into a6.  */
 817         beqz    a9, .Lmul_round
 818         movi    a9, 1
 819         or      a6, a6, a9
 820         j       .Lmul_round
 821
 822 .Lmul_flush_to_zero:
 823         /* Return zero with the appropriate sign bit.  */
 824         srli    a2, a7, 31
 825         slli    a2, a2, 31
 826         j       .Lmul_done
 827
 828 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 829
 830         /* For Xtensa processors with no multiply hardware, this simplified
 831            version of _mulsi3 is used for multiplying 16-bit chunks of
 832            the floating-point mantissas.  It uses a custom ABI: the inputs
 833            are passed in a13 and a14, the result is returned in a12, and
 834            a8 and a15 are clobbered.  */
 835         .align  4
 836 .Lmul_mulsi3:
 837         movi    a12, 0
 838 .Lmul_mult_loop:
 839         add     a15, a14, a12
 840         extui   a8, a13, 0, 1
 841         movnez  a12, a15, a8
 842
 843         do_addx2 a15, a14, a12, a15
 844         extui   a8, a13, 1, 1
 845         movnez  a12, a15, a8
 846
 847         do_addx4 a15, a14, a12, a15
 848         extui   a8, a13, 2, 1
 849         movnez  a12, a15, a8
 850
 851         do_addx8 a15, a14, a12, a15
 852         extui   a8, a13, 3, 1
 853         movnez  a12, a15, a8
 854
 855         srli    a13, a13, 4
 856         slli    a14, a14, 4
 857         bnez    a13, .Lmul_mult_loop
 858         ret
 859 #endif /* !MUL16 && !MUL32 && !MAC16 */
 860 #endif /* L_mulsf3 */
 861
 862 #ifdef L_divsf3
 863
 864         /* Division */
 865 __divsf3_aux:
 866
 867         /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
 868            (This code is placed before the start of the function just to
 869            keep it in range of the limited branch displacements.)  */
 870
 871 .Ldiv_yexpzero:
 872         /* Clear the sign bit of y.  */
 873         slli    a3, a3, 1
 874         srli    a3, a3, 1
 875
 876         /* Check for division by zero.  */
 877         beqz    a3, .Ldiv_yzero
 878
 879         /* Normalize y.  Adjust the exponent in a9.  */
 880         do_nsau a10, a3, a4, a5
 881         addi    a10, a10, -8
 882         ssl     a10
 883         sll     a3, a3
 884         movi    a9, 1
 885         sub     a9, a9, a10
 886         j       .Ldiv_ynormalized
 887
 888 .Ldiv_yzero:
 889         /* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
 890         slli    a4, a2, 1
 891         srli    a4, a4, 1
 892         srli    a2, a7, 31
 893         slli    a2, a2, 31
 894         or      a2, a2, a6
 895         bnez    a4, 1f
 896         movi    a4, 0x400000    /* make it a quiet NaN */
 897         or      a2, a2, a4
 898 1:      abi_return
 899
 900 .Ldiv_xexpzero:
 901         /* Clear the sign bit of x.  */
 902         slli    a2, a2, 1
 903         srli    a2, a2, 1
 904
 905         /* If x is zero, return zero.  */
 906         beqz    a2, .Ldiv_return_zero
 907
 908         /* Normalize x.  Adjust the exponent in a8.  */
 909         do_nsau a10, a2, a4, a5
 910         addi    a10, a10, -8
 911         ssl     a10
 912         sll     a2, a2
 913         movi    a8, 1
 914         sub     a8, a8, a10
 915         j       .Ldiv_xnormalized
 916
 917 .Ldiv_return_zero:
 918         /* Return zero with the appropriate sign bit.  */
 919         srli    a2, a7, 31
 920         slli    a2, a2, 31
 921         abi_return
 922
 923 .Ldiv_xnan_or_inf:
 924         /* Set the sign bit of the result.  */
 925         srli    a7, a3, 31
 926         slli    a7, a7, 31
 927         xor     a2, a2, a7
 928         /* If y is NaN or Inf, return NaN.  */
 929         bnall   a3, a6, 1f
 930         movi    a4, 0x400000    /* make it a quiet NaN */
 931         or      a2, a2, a4
 932 1:      abi_return
 933
 934 .Ldiv_ynan_or_inf:
 935         /* If y is Infinity, return zero.  */
 936         slli    a8, a3, 9
 937         beqz    a8, .Ldiv_return_zero
 938         /* y is NaN; return it.  */
 939         mov     a2, a3
 940         abi_return
 941
 942         .align  4
 943         .global __divsf3
 944         .type   __divsf3, @function
 945 __divsf3:
 946         abi_entry sp, 32
 947         movi    a6, 0x7f800000
 948
 949         /* Get the sign of the result.  */
 950         xor     a7, a2, a3
 951
 952         /* Check for NaN and infinity.  */
 953         ball    a2, a6, .Ldiv_xnan_or_inf
 954         ball    a3, a6, .Ldiv_ynan_or_inf
 955
 956         /* Extract the exponents.  */
 957         extui   a8, a2, 23, 8
 958         extui   a9, a3, 23, 8
 959
 960         beqz    a9, .Ldiv_yexpzero
 961 .Ldiv_ynormalized:
 962         beqz    a8, .Ldiv_xexpzero
 963 .Ldiv_xnormalized:
 964
 965         /* Subtract the exponents.  */
 966         sub     a8, a8, a9
 967
 968         /* Replace sign/exponent fields with explicit "1.0".  */
 969         movi    a10, 0xffffff
 970         or      a2, a2, a6
 971         and     a2, a2, a10
 972         or      a3, a3, a6
 973         and     a3, a3, a10
 974
 975         /* The first digit of the mantissa division must be a one.
 976            Shift x (and adjust the exponent) as needed to make this true.  */
 977         bltu    a3, a2, 1f
 978         slli    a2, a2, 1
 979         addi    a8, a8, -1
 980 1:
 981         /* Do the first subtraction and shift.  */
 982         sub     a2, a2, a3
 983         slli    a2, a2, 1
 984
 985         /* Put the quotient into a10.  */
 986         movi    a10, 1
 987
 988         /* Divide one bit at a time for 23 bits.  */
 989         movi    a9, 23
 990 #if XCHAL_HAVE_LOOPS
 991         loop    a9, .Ldiv_loopend
 992 #endif
 993 .Ldiv_loop:
 994         /* Shift the quotient << 1.  */
 995         slli    a10, a10, 1
 996
 997         /* Is this digit a 0 or 1?  */
 998         bltu    a2, a3, 1f
 999
1000         /* Output a 1 and subtract.  */
1001         addi    a10, a10, 1
1002         sub     a2, a2, a3
1003
1004         /* Shift the dividend << 1.  */
1005 1:      slli    a2, a2, 1
1006
1007 #if !XCHAL_HAVE_LOOPS
1008         addi    a9, a9, -1
1009         bnez    a9, .Ldiv_loop
1010 #endif
1011 .Ldiv_loopend:
1012
1013         /* Add the exponent bias (less one to account for the explicit "1.0"
1014            of the mantissa that will be added to the exponent in the final
1015            result).  */
1016         addi    a8, a8, 0x7e
1017
1018         /* Check for over/underflow.  The value in a8 is one less than the
1019            final exponent, so values in the range 0..fd are OK here.  */
1020         movi    a4, 0xfe
1021         bgeu    a8, a4, .Ldiv_overflow
1022
1023 .Ldiv_round:
1024         /* Round.  The remainder (<< 1) is in a2.  */
1025         bltu    a2, a3, .Ldiv_rounded
1026         addi    a10, a10, 1
1027         beq     a2, a3, .Ldiv_exactlyhalf
1028
1029 .Ldiv_rounded:
1030         /* Add the exponent to the mantissa.  */
1031         slli    a8, a8, 23
1032         add     a2, a10, a8
1033
1034 .Ldiv_addsign:
1035         /* Add the sign bit.  */
1036         srli    a7, a7, 31
1037         slli    a7, a7, 31
1038         or      a2, a2, a7
1039         abi_return
1040
1041 .Ldiv_overflow:
1042         bltz    a8, .Ldiv_underflow
1043         /* Return +/- Infinity.  */
1044         addi    a8, a4, 1       /* 0xff */
1045         slli    a2, a8, 23
1046         j       .Ldiv_addsign
1047
1048 .Ldiv_exactlyhalf:
1049         /* Remainder is exactly half the divisor.  Round even.  */
1050         srli    a10, a10, 1
1051         slli    a10, a10, 1
1052         j       .Ldiv_rounded
1053
1054 .Ldiv_underflow:
1055         /* Create a subnormal value, where the exponent field contains zero,
1056            but the effective exponent is 1.  The value of a8 is one less than
1057            the actual exponent, so just negate it to get the shift amount.  */
1058         neg     a8, a8
1059         ssr     a8
1060         bgeui   a8, 32, .Ldiv_flush_to_zero
1061
1062         /* Shift a10 right.  Any bits that are shifted out of a10 are
1063            saved in a6 for rounding the result.  */
1064         sll     a6, a10
1065         srl     a10, a10
1066
1067         /* Set the exponent to zero.  */
1068         movi    a8, 0
1069
1070         /* Pack any nonzero remainder (in a2) into a6.  */
1071         beqz    a2, 1f
1072         movi    a9, 1
1073         or      a6, a6, a9
1074
1075         /* Round a10 based on the bits shifted out into a6.  */
1076 1:      bgez    a6, .Ldiv_rounded
1077         addi    a10, a10, 1
1078         slli    a6, a6, 1
1079         bnez    a6, .Ldiv_rounded
1080         srli    a10, a10, 1
1081         slli    a10, a10, 1
1082         j       .Ldiv_rounded
1083
1084 .Ldiv_flush_to_zero:
1085         /* Return zero with the appropriate sign bit.  */
1086         srli    a2, a7, 31
1087         slli    a2, a2, 31
1088         abi_return
1089
1090 #endif /* L_divsf3 */
1091
1092 #ifdef L_cmpsf2
1093
1094         /* Equal and Not Equal */
1095
1096         .align  4
1097         .global __eqsf2
1098         .global __nesf2
1099         .set    __nesf2, __eqsf2
1100         .type   __eqsf2, @function
1101 __eqsf2:
1102         abi_entry sp, 32
1103         bne     a2, a3, 4f
1104
1105         /* The values are equal but NaN != NaN.  Check the exponent.  */
1106         movi    a6, 0x7f800000
1107         ball    a2, a6, 3f
1108
1109         /* Equal.  */
1110         movi    a2, 0
1111         abi_return
1112
1113         /* Not equal.  */
1114 2:      movi    a2, 1
1115         abi_return
1116
1117         /* Check if the mantissas are nonzero.  */
1118 3:      slli    a7, a2, 9
1119         j       5f
1120
1121         /* Check if x and y are zero with different signs.  */
1122 4:      or      a7, a2, a3
1123         slli    a7, a7, 1
1124
1125         /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1126            or x when exponent(x) = 0x7f8 and x == y.  */
1127 5:      movi    a2, 0
1128         movi    a3, 1
1129         movnez  a2, a3, a7
1130         abi_return
1131
1132
1133         /* Greater Than */
1134
1135         .align  4
1136         .global __gtsf2
1137         .type   __gtsf2, @function
1138 __gtsf2:
1139         abi_entry sp, 32
1140         movi    a6, 0x7f800000
1141         ball    a2, a6, 2f
1142 1:      bnall   a3, a6, .Lle_cmp
1143
1144         /* Check if y is a NaN.  */
1145         slli    a7, a3, 9
1146         beqz    a7, .Lle_cmp
1147         movi    a2, 0
1148         abi_return
1149
1150         /* Check if x is a NaN.  */
1151 2:      slli    a7, a2, 9
1152         beqz    a7, 1b
1153         movi    a2, 0
1154         abi_return
1155
1156
1157         /* Less Than or Equal */
1158
1159         .align  4
1160         .global __lesf2
1161         .type   __lesf2, @function
1162 __lesf2:
1163         abi_entry sp, 32
1164         movi    a6, 0x7f800000
1165         ball    a2, a6, 2f
1166 1:      bnall   a3, a6, .Lle_cmp
1167
1168         /* Check if y is a NaN.  */
1169         slli    a7, a3, 9
1170         beqz    a7, .Lle_cmp
1171         movi    a2, 1
1172         abi_return
1173
1174         /* Check if x is a NaN.  */
1175 2:      slli    a7, a2, 9
1176         beqz    a7, 1b
1177         movi    a2, 1
1178         abi_return
1179
1180 .Lle_cmp:
1181         /* Check if x and y have different signs.  */
1182         xor     a7, a2, a3
1183         bltz    a7, .Lle_diff_signs
1184
1185         /* Check if x is negative.  */
1186         bltz    a2, .Lle_xneg
1187
1188         /* Check if x <= y.  */
1189         bltu    a3, a2, 5f
1190 4:      movi    a2, 0
1191         abi_return
1192
1193 .Lle_xneg:
1194         /* Check if y <= x.  */
1195         bgeu    a2, a3, 4b
1196 5:      movi    a2, 1
1197         abi_return
1198
1199 .Lle_diff_signs:
1200         bltz    a2, 4b
1201
1202         /* Check if both x and y are zero.  */
1203         or      a7, a2, a3
1204         slli    a7, a7, 1
1205         movi    a2, 1
1206         movi    a3, 0
1207         moveqz  a2, a3, a7
1208         abi_return
1209
1210
1211         /* Greater Than or Equal */
1212
1213         .align  4
1214         .global __gesf2
1215         .type   __gesf2, @function
1216 __gesf2:
1217         abi_entry sp, 32
1218         movi    a6, 0x7f800000
1219         ball    a2, a6, 2f
1220 1:      bnall   a3, a6, .Llt_cmp
1221
1222         /* Check if y is a NaN.  */
1223         slli    a7, a3, 9
1224         beqz    a7, .Llt_cmp
1225         movi    a2, -1
1226         abi_return
1227
1228         /* Check if x is a NaN.  */
1229 2:      slli    a7, a2, 9
1230         beqz    a7, 1b
1231         movi    a2, -1
1232         abi_return
1233
1234
1235         /* Less Than */
1236
1237         .align  4
1238         .global __ltsf2
1239         .type   __ltsf2, @function
1240 __ltsf2:
1241         abi_entry sp, 32
1242         movi    a6, 0x7f800000
1243         ball    a2, a6, 2f
1244 1:      bnall   a3, a6, .Llt_cmp
1245
1246         /* Check if y is a NaN.  */
1247         slli    a7, a3, 9
1248         beqz    a7, .Llt_cmp
1249         movi    a2, 0
1250         abi_return
1251
1252         /* Check if x is a NaN.  */
1253 2:      slli    a7, a2, 9
1254         beqz    a7, 1b
1255         movi    a2, 0
1256         abi_return
1257
1258 .Llt_cmp:
1259         /* Check if x and y have different signs.  */
1260         xor     a7, a2, a3
1261         bltz    a7, .Llt_diff_signs
1262
1263         /* Check if x is negative.  */
1264         bltz    a2, .Llt_xneg
1265
1266         /* Check if x < y.  */
1267         bgeu    a2, a3, 5f
1268 4:      movi    a2, -1
1269         abi_return
1270
1271 .Llt_xneg:
1272         /* Check if y < x.  */
1273         bltu    a3, a2, 4b
1274 5:      movi    a2, 0
1275         abi_return
1276
1277 .Llt_diff_signs:
1278         bgez    a2, 5b
1279
1280         /* Check if both x and y are nonzero.  */
1281         or      a7, a2, a3
1282         slli    a7, a7, 1
1283         movi    a2, 0
1284         movi    a3, -1
1285         movnez  a2, a3, a7
1286         abi_return
1287
1288
1289         /* Unordered */
1290
1291         .align  4
1292         .global __unordsf2
1293         .type   __unordsf2, @function
1294 __unordsf2:
1295         abi_entry sp, 32
1296         movi    a6, 0x7f800000
1297         ball    a2, a6, 3f
1298 1:      ball    a3, a6, 4f
1299 2:      movi    a2, 0
1300         abi_return
1301
1302 3:      slli    a7, a2, 9
1303         beqz    a7, 1b
1304         movi    a2, 1
1305         abi_return
1306
1307 4:      slli    a7, a3, 9
1308         beqz    a7, 2b
1309         movi    a2, 1
1310         abi_return
1311
1312 #endif /* L_cmpsf2 */
1313
1314 #ifdef L_fixsfsi
1315
1316         .align  4
1317         .global __fixsfsi
1318         .type   __fixsfsi, @function
1319 __fixsfsi:
1320         abi_entry sp, 32
1321
1322         /* Check for NaN and Infinity.  */
1323         movi    a6, 0x7f800000
1324         ball    a2, a6, .Lfixsfsi_nan_or_inf
1325
1326         /* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
1327         extui   a4, a2, 23, 8
1328         addi    a4, a4, -0x7e
1329         bgei    a4, 32, .Lfixsfsi_maxint
1330         blti    a4, 1, .Lfixsfsi_zero
1331
1332         /* Add explicit "1.0" and shift << 8.  */
1333         or      a7, a2, a6
1334         slli    a5, a7, 8
1335
1336         /* Shift back to the right, based on the exponent.  */
1337         ssl     a4              /* shift by 32 - a4 */
1338         srl     a5, a5
1339
1340         /* Negate the result if sign != 0.  */
1341         neg     a2, a5
1342         movgez  a2, a5, a7
1343         abi_return
1344
1345 .Lfixsfsi_nan_or_inf:
1346         /* Handle Infinity and NaN.  */
1347         slli    a4, a2, 9
1348         beqz    a4, .Lfixsfsi_maxint
1349
1350         /* Translate NaN to +maxint.  */
1351         movi    a2, 0
1352
1353 .Lfixsfsi_maxint:
1354         slli    a4, a6, 8       /* 0x80000000 */
1355         addi    a5, a4, -1      /* 0x7fffffff */
1356         movgez  a4, a5, a2
1357         mov     a2, a4
1358         abi_return
1359
1360 .Lfixsfsi_zero:
1361         movi    a2, 0
1362         abi_return
1363
1364 #endif /* L_fixsfsi */
1365
1366 #ifdef L_fixsfdi
1367
1368         .align  4
1369         .global __fixsfdi
1370         .type   __fixsfdi, @function
1371 __fixsfdi:
1372         abi_entry sp, 32
1373
1374         /* Check for NaN and Infinity.  */
1375         movi    a6, 0x7f800000
1376         ball    a2, a6, .Lfixsfdi_nan_or_inf
1377
1378         /* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
1379         extui   a4, a2, 23, 8
1380         addi    a4, a4, -0x7e
1381         bgei    a4, 64, .Lfixsfdi_maxint
1382         blti    a4, 1, .Lfixsfdi_zero
1383
1384         /* Add explicit "1.0" and shift << 8.  */
1385         or      a7, a2, a6
1386         slli    xh, a7, 8
1387
1388         /* Shift back to the right, based on the exponent.  */
1389         ssl     a4              /* shift by 64 - a4 */
1390         bgei    a4, 32, .Lfixsfdi_smallshift
1391         srl     xl, xh
1392         movi    xh, 0
1393
1394 .Lfixsfdi_shifted:
1395         /* Negate the result if sign != 0.  */
1396         bgez    a7, 1f
1397         neg     xl, xl
1398         neg     xh, xh
1399         beqz    xl, 1f
1400         addi    xh, xh, -1
1401 1:      abi_return
1402
1403 .Lfixsfdi_smallshift:
1404         movi    xl, 0
1405         sll     xl, xh
1406         srl     xh, xh
1407         j       .Lfixsfdi_shifted
1408
1409 .Lfixsfdi_nan_or_inf:
1410         /* Handle Infinity and NaN.  */
1411         slli    a4, a2, 9
1412         beqz    a4, .Lfixsfdi_maxint
1413
1414         /* Translate NaN to +maxint.  */
1415         movi    a2, 0
1416
1417 .Lfixsfdi_maxint:
1418         slli    a7, a6, 8       /* 0x80000000 */
1419         bgez    a2, 1f
1420         mov     xh, a7
1421         movi    xl, 0
1422         abi_return
1423
1424 1:      addi    xh, a7, -1      /* 0x7fffffff */
1425         movi    xl, -1
1426         abi_return
1427
1428 .Lfixsfdi_zero:
1429         movi    xh, 0
1430         movi    xl, 0
1431         abi_return
1432
1433 #endif /* L_fixsfdi */
1434
1435 #ifdef L_fixunssfsi
1436
1437         .align  4
1438         .global __fixunssfsi
1439         .type   __fixunssfsi, @function
1440 __fixunssfsi:
1441         abi_entry sp, 32
1442
1443         /* Check for NaN and Infinity.  */
1444         movi    a6, 0x7f800000
1445         ball    a2, a6, .Lfixunssfsi_nan_or_inf
1446
1447         /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
1448         extui   a4, a2, 23, 8
1449         addi    a4, a4, -0x7f
1450         bgei    a4, 32, .Lfixunssfsi_maxint
1451         bltz    a4, .Lfixunssfsi_zero
1452
1453         /* Add explicit "1.0" and shift << 8.  */
1454         or      a7, a2, a6
1455         slli    a5, a7, 8
1456
1457         /* Shift back to the right, based on the exponent.  */
1458         addi    a4, a4, 1
1459         beqi    a4, 32, .Lfixunssfsi_bigexp
1460         ssl     a4              /* shift by 32 - a4 */
1461         srl     a5, a5
1462
1463         /* Negate the result if sign != 0.  */
1464         neg     a2, a5
1465         movgez  a2, a5, a7
1466         abi_return
1467
1468 .Lfixunssfsi_nan_or_inf:
1469         /* Handle Infinity and NaN.  */
1470         slli    a4, a2, 9
1471         beqz    a4, .Lfixunssfsi_maxint
1472
1473         /* Translate NaN to 0xffffffff.  */
1474         movi    a2, -1
1475         abi_return
1476
1477 .Lfixunssfsi_maxint:
1478         slli    a4, a6, 8       /* 0x80000000 */
1479         movi    a5, -1          /* 0xffffffff */
1480         movgez  a4, a5, a2
1481         mov     a2, a4
1482         abi_return
1483
1484 .Lfixunssfsi_zero:
1485         movi    a2, 0
1486         abi_return
1487
1488 .Lfixunssfsi_bigexp:
1489         /* Handle unsigned maximum exponent case.  */
1490         bltz    a2, 1f
1491         mov     a2, a5          /* no shift needed */
1492         abi_return
1493
1494         /* Return 0x80000000 if negative.  */
1495 1:      slli    a2, a6, 8
1496         abi_return
1497
1498 #endif /* L_fixunssfsi */
1499
1500 #ifdef L_fixunssfdi
1501
1502         .align  4
1503         .global __fixunssfdi
1504         .type   __fixunssfdi, @function
1505 __fixunssfdi:
1506         abi_entry sp, 32
1507
1508         /* Check for NaN and Infinity.  */
1509         movi    a6, 0x7f800000
1510         ball    a2, a6, .Lfixunssfdi_nan_or_inf
1511
1512         /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
1513         extui   a4, a2, 23, 8
1514         addi    a4, a4, -0x7f
1515         bgei    a4, 64, .Lfixunssfdi_maxint
1516         bltz    a4, .Lfixunssfdi_zero
1517
1518         /* Add explicit "1.0" and shift << 8.  */
1519         or      a7, a2, a6
1520         slli    xh, a7, 8
1521
1522         /* Shift back to the right, based on the exponent.  */
1523         addi    a4, a4, 1
1524         beqi    a4, 64, .Lfixunssfdi_bigexp
1525         ssl     a4              /* shift by 64 - a4 */
1526         bgei    a4, 32, .Lfixunssfdi_smallshift
1527         srl     xl, xh
1528         movi    xh, 0
1529
1530 .Lfixunssfdi_shifted:
1531         /* Negate the result if sign != 0.  */
1532         bgez    a7, 1f
1533         neg     xl, xl
1534         neg     xh, xh
1535         beqz    xl, 1f
1536         addi    xh, xh, -1
1537 1:      abi_return
1538
1539 .Lfixunssfdi_smallshift:
1540         movi    xl, 0
1541         src     xl, xh, xl
1542         srl     xh, xh
1543         j       .Lfixunssfdi_shifted
1544
1545 .Lfixunssfdi_nan_or_inf:
1546         /* Handle Infinity and NaN.  */
1547         slli    a4, a2, 9
1548         beqz    a4, .Lfixunssfdi_maxint
1549
1550         /* Translate NaN to 0xffffffff.... */
1551 1:      movi    xh, -1
1552         movi    xl, -1
1553         abi_return
1554
1555 .Lfixunssfdi_maxint:
1556         bgez    a2, 1b
1557 2:      slli    xh, a6, 8       /* 0x80000000 */
1558         movi    xl, 0
1559         abi_return
1560
1561 .Lfixunssfdi_zero:
1562         movi    xh, 0
1563         movi    xl, 0
1564         abi_return
1565
1566 .Lfixunssfdi_bigexp:
1567         /* Handle unsigned maximum exponent case.  */
1568         bltz    a7, 2b
1569         movi    xl, 0
1570         abi_return              /* no shift needed */
1571
1572 #endif /* L_fixunssfdi */
1573
1574 #ifdef L_floatsisf
1575
1576         .align  4
1577         .global __floatunsisf
1578         .type   __floatunsisf, @function
1579 __floatunsisf:
1580         abi_entry sp, 32
1581         beqz    a2, .Lfloatsisf_return
1582
1583         /* Set the sign to zero and jump to the floatsisf code.  */
1584         movi    a7, 0
1585         j       .Lfloatsisf_normalize
1586
1587         .align  4
1588         .global __floatsisf
1589         .type   __floatsisf, @function
1590 __floatsisf:
1591         abi_entry sp, 32
1592
1593         /* Check for zero.  */
1594         beqz    a2, .Lfloatsisf_return
1595
1596         /* Save the sign.  */
1597         extui   a7, a2, 31, 1
1598
1599         /* Get the absolute value.  */
1600 #if XCHAL_HAVE_ABS
1601         abs     a2, a2
1602 #else
1603         neg     a4, a2
1604         movltz  a2, a4, a2
1605 #endif
1606
1607 .Lfloatsisf_normalize:
1608         /* Normalize with the first 1 bit in the msb.  */
1609         do_nsau a4, a2, a5, a6
1610         ssl     a4
1611         sll     a5, a2
1612
1613         /* Shift the mantissa into position, with rounding bits in a6.  */
1614         srli    a2, a5, 8
1615         slli    a6, a5, (32 - 8)
1616
1617         /* Set the exponent.  */
1618         movi    a5, 0x9d        /* 0x7e + 31 */
1619         sub     a5, a5, a4
1620         slli    a5, a5, 23
1621         add     a2, a2, a5
1622
1623         /* Add the sign.  */
1624         slli    a7, a7, 31
1625         or      a2, a2, a7
1626
1627         /* Round up if the leftover fraction is >= 1/2.  */
1628         bgez    a6, .Lfloatsisf_return
1629         addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
1630
1631         /* Check if the leftover fraction is exactly 1/2.  */
1632         slli    a6, a6, 1
1633         beqz    a6, .Lfloatsisf_exactlyhalf
1634
1635 .Lfloatsisf_return:
1636         abi_return
1637
1638 .Lfloatsisf_exactlyhalf:
1639         /* Round down to the nearest even value.  */
1640         srli    a2, a2, 1
1641         slli    a2, a2, 1
1642         abi_return
1643
1644 #endif /* L_floatsisf */
1645
1646 #ifdef L_floatdisf
1647
1648         .align  4
1649         .global __floatundisf
1650         .type   __floatundisf, @function
1651 __floatundisf:
1652         abi_entry sp, 32
1653
1654         /* Check for zero.  */
1655         or      a4, xh, xl
1656         beqz    a4, 2f
1657
1658         /* Set the sign to zero and jump to the floatdisf code.  */
1659         movi    a7, 0
1660         j       .Lfloatdisf_normalize
1661
1662         .align  4
1663         .global __floatdisf
1664         .type   __floatdisf, @function
1665 __floatdisf:
1666         abi_entry sp, 32
1667
1668         /* Check for zero.  */
1669         or      a4, xh, xl
1670         beqz    a4, 2f
1671
1672         /* Save the sign.  */
1673         extui   a7, xh, 31, 1
1674
1675         /* Get the absolute value.  */
1676         bgez    xh, .Lfloatdisf_normalize
1677         neg     xl, xl
1678         neg     xh, xh
1679         beqz    xl, .Lfloatdisf_normalize
1680         addi    xh, xh, -1
1681
1682 .Lfloatdisf_normalize:
1683         /* Normalize with the first 1 bit in the msb of xh.  */
1684         beqz    xh, .Lfloatdisf_bigshift
1685         do_nsau a4, xh, a5, a6
1686         ssl     a4
1687         src     xh, xh, xl
1688         sll     xl, xl
1689
1690 .Lfloatdisf_shifted:
1691         /* Shift the mantissa into position, with rounding bits in a6.  */
1692         ssai    8
1693         sll     a5, xl
1694         src     a6, xh, xl
1695         srl     xh, xh
1696         beqz    a5, 1f
1697         movi    a5, 1
1698         or      a6, a6, a5
1699 1:
1700         /* Set the exponent.  */
1701         movi    a5, 0xbd        /* 0x7e + 63 */
1702         sub     a5, a5, a4
1703         slli    a5, a5, 23
1704         add     a2, xh, a5
1705
1706         /* Add the sign.  */
1707         slli    a7, a7, 31
1708         or      a2, a2, a7
1709
1710         /* Round up if the leftover fraction is >= 1/2.  */
1711         bgez    a6, 2f
1712         addi    a2, a2, 1       /* Overflow to the exponent is OK.  */
1713
1714         /* Check if the leftover fraction is exactly 1/2.  */
1715         slli    a6, a6, 1
1716         beqz    a6, .Lfloatdisf_exactlyhalf
1717 2:      abi_return
1718
1719 .Lfloatdisf_bigshift:
1720         /* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
1721         do_nsau a4, xl, a5, a6
1722         ssl     a4
1723         sll     xh, xl
1724         movi    xl, 0
1725         addi    a4, a4, 32
1726         j       .Lfloatdisf_shifted
1727
1728 .Lfloatdisf_exactlyhalf:
1729         /* Round down to the nearest even value.  */
1730         srli    a2, a2, 1
1731         slli    a2, a2, 1
1732         abi_return
1733
1734 #endif /* L_floatdisf */