gcc/config/sparc/lb1spc.asm

   1 /* This is an assembly language implementation of libgcc1.c for the sparc
   2    processor.
   3
   4    These routines are derived from the Sparc Architecture Manual, version 8,
   5    slightly edited to match the desired calling convention, and also to
   6    optimize them for our purposes.  */
   7
   8 #ifdef L_mulsi3
   9 .text
  10         .align 4
  11         .global .umul
  12         .proc 4
  13 .umul:
  14         or      %o0, %o1, %o4   ! logical or of multiplier and multiplicand
  15         mov     %o0, %y         ! multiplier to Y register
  16         andncc  %o4, 0xfff, %o5 ! mask out lower 12 bits
  17         be      mul_shortway    ! can do it the short way
  18         andcc   %g0, %g0, %o4   ! zero the partial product and clear NV cc
  19         !
  20         ! long multiply
  21         !
  22         mulscc  %o4, %o1, %o4   ! first iteration of 33
  23         mulscc  %o4, %o1, %o4
  24         mulscc  %o4, %o1, %o4
  25         mulscc  %o4, %o1, %o4
  26         mulscc  %o4, %o1, %o4
  27         mulscc  %o4, %o1, %o4
  28         mulscc  %o4, %o1, %o4
  29         mulscc  %o4, %o1, %o4
  30         mulscc  %o4, %o1, %o4
  31         mulscc  %o4, %o1, %o4
  32         mulscc  %o4, %o1, %o4
  33         mulscc  %o4, %o1, %o4
  34         mulscc  %o4, %o1, %o4
  35         mulscc  %o4, %o1, %o4
  36         mulscc  %o4, %o1, %o4
  37         mulscc  %o4, %o1, %o4
  38         mulscc  %o4, %o1, %o4
  39         mulscc  %o4, %o1, %o4
  40         mulscc  %o4, %o1, %o4
  41         mulscc  %o4, %o1, %o4
  42         mulscc  %o4, %o1, %o4
  43         mulscc  %o4, %o1, %o4
  44         mulscc  %o4, %o1, %o4
  45         mulscc  %o4, %o1, %o4
  46         mulscc  %o4, %o1, %o4
  47         mulscc  %o4, %o1, %o4
  48         mulscc  %o4, %o1, %o4
  49         mulscc  %o4, %o1, %o4
  50         mulscc  %o4, %o1, %o4
  51         mulscc  %o4, %o1, %o4
  52         mulscc  %o4, %o1, %o4
  53         mulscc  %o4, %o1, %o4   ! 32nd iteration
  54         mulscc  %o4, %g0, %o4   ! last iteration only shifts
  55         ! the upper 32 bits of product are wrong, but we do not care
  56         retl
  57         rd      %y, %o0
  58         !
  59         ! short multiply
  60         !
  61 mul_shortway:
  62         mulscc  %o4, %o1, %o4   ! first iteration of 13
  63         mulscc  %o4, %o1, %o4
  64         mulscc  %o4, %o1, %o4
  65         mulscc  %o4, %o1, %o4
  66         mulscc  %o4, %o1, %o4
  67         mulscc  %o4, %o1, %o4
  68         mulscc  %o4, %o1, %o4
  69         mulscc  %o4, %o1, %o4
  70         mulscc  %o4, %o1, %o4
  71         mulscc  %o4, %o1, %o4
  72         mulscc  %o4, %o1, %o4
  73         mulscc  %o4, %o1, %o4   ! 12th iteration
  74         mulscc  %o4, %g0, %o4   ! last iteration only shifts
  75         rd      %y, %o5
  76         sll     %o4, 12, %o4    ! left shift partial product by 12 bits
  77         srl     %o5, 20, %o5    ! right shift partial product by 20 bits
  78         retl
  79         or      %o5, %o4, %o0   ! merge for true product
  80 #endif
  81
  82 #ifdef L_divsi3
  83 .text
  84         .align 4
  85         .global .udiv
  86         .proc 4
  87 .udiv:
  88         save    %sp, -64, %sp
  89         b       divide
  90         mov     0, %l2          ! result always positive
  91         .global .div
  92         .proc 4
  93 .div:
  94         save    %sp, -64, %sp
  95         orcc    %i1, %i0, %g0   ! is either operand negative
  96         bge     divide          ! if not, skip this junk
  97         xor     %i1, %i0, %l2   ! record sign of result in sign of %l2
  98         tst     %i1
  99         bge     2f
 100         tst     %i0
 101         ! %i1 < 0
 102         bge     divide
 103         neg     %i1
 104 2:      ! %i0 < 0
 105         neg     %i0
 106         !       FALL THROUGH
 107 divide:
 108         ! Compute size of quotient, scale comparand.
 109         orcc    %i1, %g0, %l1           ! movcc %i1, %l1
 110         te      2                       ! if %i1 = 0
 111         mov     %i0, %i3
 112         mov     0, %i2
 113         sethi   %hi(1<<(32-4-1)), %l3
 114         cmp     %i3, %l3
 115         blu     not_really_big
 116         mov     0, %l0
 117         !
 118         ! Here, the %i0 is >= 2^(31-3) or so.  We must be careful here,
 119         ! as our usual 3-at-a-shot divide step will cause overflow and havoc.
 120         ! The total number of bits in the result here is 3*%l0+%l4, where
 121         ! %l4 <= 3.
 122         ! Compute %l0 in an unorthodox manner: know we need to Shift %l1 into
 123         ! the top decade: so do not even bother to compare to %i3.
 124 1:      cmp     %l1, %l3
 125         bgeu    3f
 126         mov     1, %l4
 127         sll     %l1, 3, %l1
 128         b       1b
 129         inc     %l0
 130         !
 131         ! Now compute %l4
 132         !
 133 2:      addcc   %l1, %l1, %l1
 134         bcc     not_too_big
 135         add     %l4, 1, %l4
 136         !
 137         ! We are here if the %i1 overflowed when Shifting.
 138         ! This means that %i3 has the high-order bit set.
 139         ! Restore %l1 and subtract from %i3.
 140         sll     %l3, 4, %l3
 141         srl     %l1, 1, %l1
 142         add     %l1, %l3, %l1
 143         b       do_single_div
 144         dec     %l4
 145 not_too_big:
 146 3:      cmp     %l1, %i3
 147         blu     2b
 148         nop
 149         be      do_single_div
 150         nop
 151         ! %l1 > %i3: went too far: back up 1 step
 152         !       srl     %l1, 1, %l1
 153         !       dec     %l4
 154         ! do single-bit divide steps
 155         !
 156         ! We have to be careful here.  We know that %i3 >= %l1, so we can do the
 157         ! first divide step without thinking.  BUT, the others are conditional,
 158         ! and are only done if %i3 >= 0.  Because both %i3 and %l1 may have the
 159         ! high-order bit set in the first step, just falling into the regular
 160         ! division loop will mess up the first time around.
 161         ! So we unroll slightly...
 162 do_single_div:
 163         deccc   %l4
 164         bl      end_regular_divide
 165         nop
 166         sub     %i3, %l1, %i3
 167         mov     1, %i2
 168         b       end_single_divloop
 169         nop
 170 single_divloop:
 171         sll     %i2, 1, %i2
 172         bl      1f
 173         srl     %l1, 1, %l1
 174         ! %i3 >= 0
 175         sub     %i3, %l1, %i3
 176         b       2f
 177         inc     %i2
 178 1:      ! %i3 < 0
 179         add     %i3, %l1, %i3
 180         dec     %i2
 181 end_single_divloop:
 182 2:      deccc   %l4
 183         bge     single_divloop
 184         tst     %i3
 185         b       end_regular_divide
 186         nop
 187 not_really_big:
 188 1:      sll     %l1, 3, %l1
 189         cmp     %l1, %i3
 190         bleu    1b
 191         inccc   %l0
 192         be      got_result
 193         dec     %l0
 194 do_regular_divide:
 195         ! Do the main division iteration
 196         tst     %i3
 197         ! Fall through into divide loop
 198 divloop:
 199         sll     %i2, 3, %i2
 200         ! depth 1, accumulated bits 0
 201         bl      L.1.8
 202         srl     %l1,1,%l1
 203         ! remainder is positive
 204         subcc   %i3,%l1,%i3
 205         ! depth 2, accumulated bits 1
 206         bl      L.2.9
 207         srl     %l1,1,%l1
 208         ! remainder is positive
 209         subcc   %i3,%l1,%i3
 210         ! depth 3, accumulated bits 3
 211         bl      L.3.11
 212         srl     %l1,1,%l1
 213         ! remainder is positive
 214         subcc   %i3,%l1,%i3
 215         b       9f
 216         add     %i2, (3*2+1), %i2
 217 L.3.11: ! remainder is negative
 218         addcc   %i3,%l1,%i3
 219         b       9f
 220         add     %i2, (3*2-1), %i2
 221 L.2.9:  ! remainder is negative
 222         addcc   %i3,%l1,%i3
 223         ! depth 3, accumulated bits 1
 224         bl      L.3.9
 225         srl     %l1,1,%l1
 226         ! remainder is positive
 227         subcc   %i3,%l1,%i3
 228         b       9f
 229         add     %i2, (1*2+1), %i2
 230 L.3.9:  ! remainder is negative
 231         addcc   %i3,%l1,%i3
 232         b       9f
 233         add     %i2, (1*2-1), %i2
 234 L.1.8:  ! remainder is negative
 235         addcc   %i3,%l1,%i3
 236         ! depth 2, accumulated bits -1
 237         bl      L.2.7
 238         srl     %l1,1,%l1
 239         ! remainder is positive
 240         subcc   %i3,%l1,%i3
 241         ! depth 3, accumulated bits -1
 242         bl      L.3.7
 243         srl     %l1,1,%l1
 244         ! remainder is positive
 245         subcc   %i3,%l1,%i3
 246         b       9f
 247         add     %i2, (-1*2+1), %i2
 248 L.3.7:  ! remainder is negative
 249         addcc   %i3,%l1,%i3
 250         b       9f
 251         add     %i2, (-1*2-1), %i2
 252 L.2.7:  ! remainder is negative
 253         addcc   %i3,%l1,%i3
 254         ! depth 3, accumulated bits -3
 255         bl      L.3.5
 256         srl     %l1,1,%l1
 257         ! remainder is positive
 258         subcc   %i3,%l1,%i3
 259         b       9f
 260         add     %i2, (-3*2+1), %i2
 261 L.3.5:  ! remainder is negative
 262         addcc   %i3,%l1,%i3
 263         b       9f
 264         add     %i2, (-3*2-1), %i2
 265 end_regular_divide:
 266 9:      deccc   %l0
 267         bge     divloop
 268         tst     %i3
 269         bge     got_result
 270         nop
 271         ! non-restoring fixup here
 272         dec     %i2
 273 got_result:
 274         tst     %l2
 275         bge     1f
 276         restore
 277         ! answer < 0
 278         retl            ! leaf-routine return
 279         neg     %o2, %o0        ! quotient <- -%i2
 280 1:      retl            ! leaf-routine return
 281         mov     %o2, %o0        ! quotient <- %i2
 282 #endif
 283
 284 #ifdef L_modsi3
 285 .text
 286         .align 4
 287         .global .urem
 288         .proc 4
 289 .urem:
 290         save    %sp, -64, %sp
 291         b       divide
 292         mov     0, %l2          ! result always positive
 293         .global .rem
 294         .proc 4
 295 .rem:
 296         save    %sp, -64, %sp
 297         orcc    %i1, %i0, %g0   ! is either operand negative
 298         bge     divide          ! if not, skip this junk
 299         mov     %i0, %l2        ! record sign of result in sign of %i2
 300         tst     %i1
 301         bge     2f
 302         tst     %i0
 303         ! %i1 < 0
 304         bge     divide
 305         neg     %i1
 306 2:      ! %i0 < 0
 307         neg     %i0
 308         !       FALL THROUGH
 309 divide:
 310         ! Compute size of quotient, scale comparand.
 311         orcc    %i1, %g0, %l1           ! movcc %i1, %l1
 312         te      2                       ! if %i1 = 0
 313         mov     %i0, %i3
 314         mov     0, %i2
 315         sethi   %hi(1<<(32-4-1)), %l3
 316         cmp     %i3, %l3
 317         blu     not_really_big
 318         mov     0, %l0
 319         !
 320         ! Here, the %i0 is >= 2^(31-3) or so.  We must be careful here,
 321         ! as our usual 3-at-a-shot divide step will cause overflow and havoc.
 322         ! The total number of bits in the result here is 3*%l0+%l4, where
 323         ! %l4 <= 3.
 324         ! Compute %l0 in an unorthodox manner: know we need to Shift %l1 into
 325         ! the top decade: so do not even bother to compare to %i3.
 326 1:      cmp     %l1, %l3
 327         bgeu    3f
 328         mov     1, %l4
 329         sll     %l1, 3, %l1
 330         b       1b
 331         inc     %l0
 332         !
 333         ! Now compute %l4
 334         !
 335 2:      addcc   %l1, %l1, %l1
 336         bcc     not_too_big
 337         add     %l4, 1, %l4
 338         !
 339         ! We are here if the %i1 overflowed when Shifting.
 340         ! This means that %i3 has the high-order bit set.
 341         ! Restore %l1 and subtract from %i3.
 342         sll     %l3, 4, %l3
 343         srl     %l1, 1, %l1
 344         add     %l1, %l3, %l1
 345         b       do_single_div
 346         dec     %l4
 347 not_too_big:
 348 3:      cmp     %l1, %i3
 349         blu     2b
 350         nop
 351         be      do_single_div
 352         nop
 353         ! %l1 > %i3: went too far: back up 1 step
 354         !       srl     %l1, 1, %l1
 355         !       dec     %l4
 356         ! do single-bit divide steps
 357         !
 358         ! We have to be careful here.  We know that %i3 >= %l1, so we can do the
 359         ! first divide step without thinking.  BUT, the others are conditional,
 360         ! and are only done if %i3 >= 0.  Because both %i3 and %l1 may have the
 361         ! high-order bit set in the first step, just falling into the regular
 362         ! division loop will mess up the first time around.
 363         ! So we unroll slightly...
 364 do_single_div:
 365         deccc   %l4
 366         bl      end_regular_divide
 367         nop
 368         sub     %i3, %l1, %i3
 369         mov     1, %i2
 370         b       end_single_divloop
 371         nop
 372 single_divloop:
 373         sll     %i2, 1, %i2
 374         bl      1f
 375         srl     %l1, 1, %l1
 376         ! %i3 >= 0
 377         sub     %i3, %l1, %i3
 378         b       2f
 379         inc     %i2
 380 1:      ! %i3 < 0
 381         add     %i3, %l1, %i3
 382         dec     %i2
 383 end_single_divloop:
 384 2:      deccc   %l4
 385         bge     single_divloop
 386         tst     %i3
 387         b       end_regular_divide
 388         nop
 389 not_really_big:
 390 1:      sll     %l1, 3, %l1
 391         cmp     %l1, %i3
 392         bleu    1b
 393         inccc   %l0
 394         be      got_result
 395         dec     %l0
 396 do_regular_divide:
 397         ! Do the main division iteration
 398         tst     %i3
 399         ! Fall through into divide loop
 400 divloop:
 401         sll     %i2, 3, %i2
 402         ! depth 1, accumulated bits 0
 403         bl      L.1.8
 404         srl     %l1,1,%l1
 405         ! remainder is positive
 406         subcc   %i3,%l1,%i3
 407         ! depth 2, accumulated bits 1
 408         bl      L.2.9
 409         srl     %l1,1,%l1
 410         ! remainder is positive
 411         subcc   %i3,%l1,%i3
 412         ! depth 3, accumulated bits 3
 413         bl      L.3.11
 414         srl     %l1,1,%l1
 415         ! remainder is positive
 416         subcc   %i3,%l1,%i3
 417         b       9f
 418         add     %i2, (3*2+1), %i2
 419 L.3.11: ! remainder is negative
 420         addcc   %i3,%l1,%i3
 421         b       9f
 422         add     %i2, (3*2-1), %i2
 423 L.2.9:  ! remainder is negative
 424         addcc   %i3,%l1,%i3
 425         ! depth 3, accumulated bits 1
 426         bl      L.3.9
 427         srl     %l1,1,%l1
 428         ! remainder is positive
 429         subcc   %i3,%l1,%i3
 430         b       9f
 431         add     %i2, (1*2+1), %i2
 432 L.3.9:  ! remainder is negative
 433         addcc   %i3,%l1,%i3
 434         b       9f
 435         add     %i2, (1*2-1), %i2
 436 L.1.8:  ! remainder is negative
 437         addcc   %i3,%l1,%i3
 438         ! depth 2, accumulated bits -1
 439         bl      L.2.7
 440         srl     %l1,1,%l1
 441         ! remainder is positive
 442         subcc   %i3,%l1,%i3
 443         ! depth 3, accumulated bits -1
 444         bl      L.3.7
 445         srl     %l1,1,%l1
 446         ! remainder is positive
 447         subcc   %i3,%l1,%i3
 448         b       9f
 449         add     %i2, (-1*2+1), %i2
 450 L.3.7:  ! remainder is negative
 451         addcc   %i3,%l1,%i3
 452         b       9f
 453         add     %i2, (-1*2-1), %i2
 454 L.2.7:  ! remainder is negative
 455         addcc   %i3,%l1,%i3
 456         ! depth 3, accumulated bits -3
 457         bl      L.3.5
 458         srl     %l1,1,%l1
 459         ! remainder is positive
 460         subcc   %i3,%l1,%i3
 461         b       9f
 462         add     %i2, (-3*2+1), %i2
 463 L.3.5:  ! remainder is negative
 464         addcc   %i3,%l1,%i3
 465         b       9f
 466         add     %i2, (-3*2-1), %i2
 467 end_regular_divide:
 468 9:      deccc   %l0
 469         bge     divloop
 470         tst     %i3
 471         bge     got_result
 472         nop
 473         ! non-restoring fixup here
 474         add     %i3, %i1, %i3
 475 got_result:
 476         tst     %l2
 477         bge     1f
 478         restore
 479         ! answer < 0
 480         retl            ! leaf-routine return
 481         neg     %o3, %o0        ! remainder <- -%i3
 482 1:      retl            ! leaf-routine return
 483         mov     %o3, %o0        ! remainder <- %i3
 484 #endif
 485
 486