gcc/config/arm/lib1funcs.asm

   1 @ libgcc1 routines for ARM cpu.
   2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
   3
   4 /* Copyright (C) 1995, 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
   5
   6 This file is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 In addition to the permissions in the GNU General Public License, the
  12 Free Software Foundation gives you unlimited permission to link the
  13 compiled version of this file with other programs, and to distribute
  14 those programs without any restriction coming from the use of this
  15 file.  (The General Public License restrictions do apply in other
  16 respects; for example, they cover modification of the file, and
  17 distribution when not linked into another program.)
  18
  19 This file is distributed in the hope that it will be useful, but
  20 WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22 General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with this program; see the file COPYING.  If not, write to
  26 the Free Software Foundation, 59 Temple Place - Suite 330,
  27 Boston, MA 02111-1307, USA.  */
  28
  29 /* As a special exception, if you link this library with other files,
  30    some of which are compiled with GCC, to produce an executable,
  31    this library does not by itself cause the resulting executable
  32    to be covered by the GNU General Public License.
  33    This exception does not however invalidate any other reasons why
  34    the executable file might be covered by the GNU General Public License.  */
  35
  36 #ifdef __APCS_26__
  37 #define RET     movs    pc, lr
  38 #define RETc(x) mov##x##s       pc, lr
  39 #define RETCOND ^
  40 #else
  41 #define RET     mov     pc, lr
  42 #define RETc(x) mov##x  pc, lr
  43 #define RETCOND
  44 #endif
  45
  46 #ifndef __USER_LABEL_PREFIX__
  47 #error  __USER_LABEL_PREFIX__ not defined
  48 #endif
  49
  50 /* ANSI concatenation macros.  */
  51
  52 #define CONCAT1(a, b) CONCAT2(a, b)
  53 #define CONCAT2(a, b) a ## b
  54
  55 /* Use the right prefix for global labels.  */
  56
  57 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
  58
  59 #ifdef __ELF__
  60 #ifdef __thumb__
  61 #define __PLT__  /* Not supported in thumb assembler (for now).  */
  62 #else
  63 #define __PLT__ (PLT)
  64 #endif
  65 #define TYPE(x) .type SYM(x),function
  66 #define SIZE(x) .size SYM(x), . - SYM(x)
  67 #else
  68 #define __PLT__
  69 #define TYPE(x)
  70 #define SIZE(x)
  71 #endif
  72
  73 #ifdef __thumb__
  74 #define THUMB_FUNC .thumb_func
  75 #define THUMB_CODE .force_thumb
  76 #else
  77 #define THUMB_FUNC
  78 #define THUMB_CODE
  79 #endif
  80
  81
  82 .macro FUNC_START name
  83         .text
  84         .globl SYM (__\name)
  85         TYPE (__\name)
  86         .align 0
  87         THUMB_CODE
  88         THUMB_FUNC
  89 SYM (__\name):
  90 .endm
  91
  92 /* Used for Thumb code.  */
  93 work            .req    r4      @ XXXX is this safe ?
  94
  95 #ifdef L_udivsi3
  96
  97 dividend        .req    r0
  98 divisor         .req    r1
  99 result          .req    r2
 100 curbit          .req    r3
 101 ip              .req    r12
 102 sp              .req    r13
 103 lr              .req    r14
 104 pc              .req    r15
 105
 106  FUNC_START udivsi3
 107
 108 #ifdef __thumb__
 109
 110         cmp     divisor, #0
 111         beq     Ldiv0
 112         mov     curbit, #1
 113         mov     result, #0
 114
 115         push    { work }
 116         cmp     dividend, divisor
 117         bcc     Lgot_result
 118
 119         @ Load the constant 0x10000000 into our work register
 120         mov     work, #1
 121         lsl     work, #28
 122 Loop1:
 123         @ Unless the divisor is very big, shift it up in multiples of
 124         @ four bits, since this is the amount of unwinding in the main
 125         @ division loop.  Continue shifting until the divisor is
 126         @ larger than the dividend.
 127         cmp     divisor, work
 128         bcs     Lbignum
 129         cmp     divisor, dividend
 130         bcs     Lbignum
 131         lsl     divisor, #4
 132         lsl     curbit,  #4
 133         b       Loop1
 134
 135 Lbignum:
 136         @ Set work to 0x80000000
 137         lsl     work, #3
 138 Loop2:
 139         @ For very big divisors, we must shift it a bit at a time, or
 140         @ we will be in danger of overflowing.
 141         cmp     divisor, work
 142         bcs     Loop3
 143         cmp     divisor, dividend
 144         bcs     Loop3
 145         lsl     divisor, #1
 146         lsl     curbit,  #1
 147         b       Loop2
 148
 149 Loop3:
 150         @ Test for possible subtractions, and note which bits
 151         @ are done in the result.  On the final pass, this may subtract
 152         @ too much from the dividend, but the result will be ok, since the
 153         @ "bit" will have been shifted out at the bottom.
 154         cmp     dividend, divisor
 155         bcc     Over1
 156         sub     dividend, dividend, divisor
 157         orr     result, result, curbit
 158 Over1:
 159         lsr     work, divisor, #1
 160         cmp     dividend, work
 161         bcc     Over2
 162         sub     dividend, dividend, work
 163         lsr     work, curbit, #1
 164         orr     result, work
 165 Over2:
 166         lsr     work, divisor, #2
 167         cmp     dividend, work
 168         bcc     Over3
 169         sub     dividend, dividend, work
 170         lsr     work, curbit, #2
 171         orr     result, work
 172 Over3:
 173         lsr     work, divisor, #3
 174         cmp     dividend, work
 175         bcc     Over4
 176         sub     dividend, dividend, work
 177         lsr     work, curbit, #3
 178         orr     result, work
 179 Over4:
 180         cmp     dividend, #0                    @ Early termination?
 181         beq     Lgot_result
 182         lsr     curbit,  #4                     @ No, any more bits to do?
 183         beq     Lgot_result
 184         lsr     divisor, #4
 185         b       Loop3
 186 Lgot_result:
 187         mov     r0, result
 188         pop     { work }
 189         RET
 190
 191 Ldiv0:
 192         push    { lr }
 193         bl      SYM (__div0) __PLT__
 194         mov     r0, #0                  @ about as wrong as it could be
 195         pop     { pc }
 196
 197 #else /* arm version */
 198
 199         cmp     divisor, #0
 200         beq     Ldiv0
 201         mov     curbit, #1
 202         mov     result, #0
 203         cmp     dividend, divisor
 204         bcc     Lgot_result
 205 Loop1:
 206         @ Unless the divisor is very big, shift it up in multiples of
 207         @ four bits, since this is the amount of unwinding in the main
 208         @ division loop.  Continue shifting until the divisor is
 209         @ larger than the dividend.
 210         cmp     divisor, #0x10000000
 211         cmpcc   divisor, dividend
 212         movcc   divisor, divisor, lsl #4
 213         movcc   curbit, curbit, lsl #4
 214         bcc     Loop1
 215
 216 Lbignum:
 217         @ For very big divisors, we must shift it a bit at a time, or
 218         @ we will be in danger of overflowing.
 219         cmp     divisor, #0x80000000
 220         cmpcc   divisor, dividend
 221         movcc   divisor, divisor, lsl #1
 222         movcc   curbit, curbit, lsl #1
 223         bcc     Lbignum
 224
 225 Loop3:
 226         @ Test for possible subtractions, and note which bits
 227         @ are done in the result.  On the final pass, this may subtract
 228         @ too much from the dividend, but the result will be ok, since the
 229         @ "bit" will have been shifted out at the bottom.
 230         cmp     dividend, divisor
 231         subcs   dividend, dividend, divisor
 232         orrcs   result, result, curbit
 233         cmp     dividend, divisor, lsr #1
 234         subcs   dividend, dividend, divisor, lsr #1
 235         orrcs   result, result, curbit, lsr #1
 236         cmp     dividend, divisor, lsr #2
 237         subcs   dividend, dividend, divisor, lsr #2
 238         orrcs   result, result, curbit, lsr #2
 239         cmp     dividend, divisor, lsr #3
 240         subcs   dividend, dividend, divisor, lsr #3
 241         orrcs   result, result, curbit, lsr #3
 242         cmp     dividend, #0                    @ Early termination?
 243         movnes  curbit, curbit, lsr #4          @ No, any more bits to do?
 244         movne   divisor, divisor, lsr #4
 245         bne     Loop3
 246 Lgot_result:
 247         mov     r0, result
 248         RET
 249
 250 Ldiv0:
 251         str     lr, [sp, #-4]!
 252         bl      SYM (__div0) __PLT__
 253         mov     r0, #0                  @ about as wrong as it could be
 254         ldmia   sp!, {pc}RETCOND
 255
 256 #endif /* arm version */
 257
 258  SIZE   (__udivsi3)
 259
 260 #endif /* L_udivsi3 */
 261
 262 #ifdef L_umodsi3
 263
 264 dividend        .req    r0
 265 divisor         .req    r1
 266 overdone        .req    r2
 267 curbit          .req    r3
 268 ip              .req    r12
 269 sp              .req    r13
 270 lr              .req    r14
 271 pc              .req    r15
 272
 273  FUNC_START umodsi3
 274
 275 #ifdef __thumb__
 276
 277         cmp     divisor, #0
 278         beq     Ldiv0
 279         mov     curbit, #1
 280         cmp     dividend, divisor
 281         bcs     Over1
 282         RET
 283
 284 Over1:
 285         @ Load the constant 0x10000000 into our work register
 286         push    { work }
 287         mov     work, #1
 288         lsl     work, #28
 289 Loop1:
 290         @ Unless the divisor is very big, shift it up in multiples of
 291         @ four bits, since this is the amount of unwinding in the main
 292         @ division loop.  Continue shifting until the divisor is
 293         @ larger than the dividend.
 294         cmp     divisor, work
 295         bcs     Lbignum
 296         cmp     divisor, dividend
 297         bcs     Lbignum
 298         lsl     divisor, #4
 299         lsl     curbit, #4
 300         b       Loop1
 301 Lbignum:
 302         @ Set work to 0x80000000
 303         lsl     work, #3
 304 Loop2:
 305         @ For very big divisors, we must shift it a bit at a time, or
 306         @ we will be in danger of overflowing.
 307         cmp     divisor, work
 308         bcs     Loop3
 309         cmp     divisor, dividend
 310         bcs     Loop3
 311         lsl     divisor, #1
 312         lsl     curbit, #1
 313         b       Loop2
 314 Loop3:
 315         @ Test for possible subtractions.  On the final pass, this may
 316         @ subtract too much from the dividend, so keep track of which
 317         @ subtractions are done, we can fix them up afterwards...
 318         mov     overdone, #0
 319         cmp     dividend, divisor
 320         bcc     Over2
 321         sub     dividend, dividend, divisor
 322 Over2:
 323         lsr     work, divisor, #1
 324         cmp     dividend, work
 325         bcc     Over3
 326         sub     dividend, dividend, work
 327         mov     ip, curbit
 328         mov     work, #1
 329         ror     curbit, work
 330         orr     overdone, curbit
 331         mov     curbit, ip
 332 Over3:
 333         lsr     work, divisor, #2
 334         cmp     dividend, work
 335         bcc     Over4
 336         sub     dividend, dividend, work
 337         mov     ip, curbit
 338         mov     work, #2
 339         ror     curbit, work
 340         orr     overdone, curbit
 341         mov     curbit, ip
 342 Over4:
 343         lsr     work, divisor, #3
 344         cmp     dividend, work
 345         bcc     Over5
 346         sub     dividend, dividend, work
 347         mov     ip, curbit
 348         mov     work, #3
 349         ror     curbit, work
 350         orr     overdone, curbit
 351         mov     curbit, ip
 352 Over5:
 353         mov     ip, curbit
 354         cmp     dividend, #0                    @ Early termination?
 355         beq     Over6
 356         lsr     curbit, #4                      @ No, any more bits to do?
 357         beq     Over6
 358         lsr     divisor, #4
 359         b       Loop3
 360 Over6:
 361         @ Any subtractions that we should not have done will be recorded in
 362         @ the top three bits of "overdone".  Exactly which were not needed
 363         @ are governed by the position of the bit, stored in ip.
 364         @ If we terminated early, because dividend became zero,
 365         @ then none of the below will match, since the bit in ip will not be
 366         @ in the bottom nibble.
 367
 368         mov     work, #0xe
 369         lsl     work, #28
 370         and     overdone, work
 371         bne     Over7
 372         pop     { work }
 373         RET                                     @ No fixups needed
 374 Over7:
 375         mov     curbit, ip
 376         mov     work, #3
 377         ror     curbit, work
 378         tst     overdone, curbit
 379         beq     Over8
 380         lsr     work, divisor, #3
 381         add     dividend, dividend, work
 382 Over8:
 383         mov     curbit, ip
 384         mov     work, #2
 385         ror     curbit, work
 386         tst     overdone, curbit
 387         beq     Over9
 388         lsr     work, divisor, #2
 389         add     dividend, dividend, work
 390 Over9:
 391         mov     curbit, ip
 392         mov     work, #1
 393         ror     curbit, work
 394         tst     overdone, curbit
 395         beq     Over10
 396         lsr     work, divisor, #1
 397         add     dividend, dividend, work
 398 Over10:
 399         pop     { work }
 400         RET
 401 Ldiv0:
 402         push    { lr }
 403         bl      SYM (__div0) __PLT__
 404         mov     r0, #0                  @ about as wrong as it could be
 405         pop     { pc }
 406
 407 #else  /* arm version */
 408
 409         cmp     divisor, #0
 410         beq     Ldiv0
 411         mov     curbit, #1
 412         cmp     dividend, divisor
 413         RETc(cc)
 414 Loop1:
 415         @ Unless the divisor is very big, shift it up in multiples of
 416         @ four bits, since this is the amount of unwinding in the main
 417         @ division loop.  Continue shifting until the divisor is
 418         @ larger than the dividend.
 419         cmp     divisor, #0x10000000
 420         cmpcc   divisor, dividend
 421         movcc   divisor, divisor, lsl #4
 422         movcc   curbit, curbit, lsl #4
 423         bcc     Loop1
 424
 425 Lbignum:
 426         @ For very big divisors, we must shift it a bit at a time, or
 427         @ we will be in danger of overflowing.
 428         cmp     divisor, #0x80000000
 429         cmpcc   divisor, dividend
 430         movcc   divisor, divisor, lsl #1
 431         movcc   curbit, curbit, lsl #1
 432         bcc     Lbignum
 433
 434 Loop3:
 435         @ Test for possible subtractions.  On the final pass, this may
 436         @ subtract too much from the dividend, so keep track of which
 437         @ subtractions are done, we can fix them up afterwards...
 438         mov     overdone, #0
 439         cmp     dividend, divisor
 440         subcs   dividend, dividend, divisor
 441         cmp     dividend, divisor, lsr #1
 442         subcs   dividend, dividend, divisor, lsr #1
 443         orrcs   overdone, overdone, curbit, ror #1
 444         cmp     dividend, divisor, lsr #2
 445         subcs   dividend, dividend, divisor, lsr #2
 446         orrcs   overdone, overdone, curbit, ror #2
 447         cmp     dividend, divisor, lsr #3
 448         subcs   dividend, dividend, divisor, lsr #3
 449         orrcs   overdone, overdone, curbit, ror #3
 450         mov     ip, curbit
 451         cmp     dividend, #0                    @ Early termination?
 452         movnes  curbit, curbit, lsr #4          @ No, any more bits to do?
 453         movne   divisor, divisor, lsr #4
 454         bne     Loop3
 455
 456         @ Any subtractions that we should not have done will be recorded in
 457         @ the top three bits of "overdone".  Exactly which were not needed
 458         @ are governed by the position of the bit, stored in ip.
 459         @ If we terminated early, because dividend became zero,
 460         @ then none of the below will match, since the bit in ip will not be
 461         @ in the bottom nibble.
 462         ands    overdone, overdone, #0xe0000000
 463         RETc(eq)                                @ No fixups needed
 464         tst     overdone, ip, ror #3
 465         addne   dividend, dividend, divisor, lsr #3
 466         tst     overdone, ip, ror #2
 467         addne   dividend, dividend, divisor, lsr #2
 468         tst     overdone, ip, ror #1
 469         addne   dividend, dividend, divisor, lsr #1
 470         RET
 471
 472 Ldiv0:
 473         str     lr, [sp, #-4]!
 474         bl      SYM (__div0) __PLT__
 475         mov     r0, #0                  @ about as wrong as it could be
 476         ldmia   sp!, {pc}RETCOND
 477
 478 #endif /* arm version */
 479
 480  SIZE   (__umodsi3)
 481
 482 #endif /* L_umodsi3 */
 483
 484 #ifdef L_divsi3
 485
 486 dividend        .req    r0
 487 divisor         .req    r1
 488 result          .req    r2
 489 curbit          .req    r3
 490 ip              .req    r12
 491 sp              .req    r13
 492 lr              .req    r14
 493 pc              .req    r15
 494
 495  FUNC_START divsi3
 496
 497 #ifdef __thumb__
 498         cmp     divisor, #0
 499         beq     Ldiv0
 500
 501         push    { work }
 502         mov     work, dividend
 503         eor     work, divisor           @ Save the sign of the result.
 504         mov     ip, work
 505         mov     curbit, #1
 506         mov     result, #0
 507         cmp     divisor, #0
 508         bpl     Over1
 509         neg     divisor, divisor        @ Loops below use unsigned.
 510 Over1:
 511         cmp     dividend, #0
 512         bpl     Over2
 513         neg     dividend, dividend
 514 Over2:
 515         cmp     dividend, divisor
 516         bcc     Lgot_result
 517
 518         mov     work, #1
 519         lsl     work, #28
 520 Loop1:
 521         @ Unless the divisor is very big, shift it up in multiples of
 522         @ four bits, since this is the amount of unwinding in the main
 523         @ division loop.  Continue shifting until the divisor is
 524         @ larger than the dividend.
 525         cmp     divisor, work
 526         Bcs     Lbignum
 527         cmp     divisor, dividend
 528         Bcs     Lbignum
 529         lsl     divisor, #4
 530         lsl     curbit, #4
 531         b       Loop1
 532
 533 Lbignum:
 534         @ For very big divisors, we must shift it a bit at a time, or
 535         @ we will be in danger of overflowing.
 536         lsl     work, #3
 537 Loop2:
 538         cmp     divisor, work
 539         Bcs     Loop3
 540         cmp     divisor, dividend
 541         Bcs     Loop3
 542         lsl     divisor, #1
 543         lsl     curbit, #1
 544         b       Loop2
 545
 546 Loop3:
 547         @ Test for possible subtractions, and note which bits
 548         @ are done in the result.  On the final pass, this may subtract
 549         @ too much from the dividend, but the result will be ok, since the
 550         @ "bit" will have been shifted out at the bottom.
 551         cmp     dividend, divisor
 552         Bcc     Over3
 553         sub     dividend, dividend, divisor
 554         orr     result, result, curbit
 555 Over3:
 556         lsr     work, divisor, #1
 557         cmp     dividend, work
 558         Bcc     Over4
 559         sub     dividend, dividend, work
 560         lsr     work, curbit, #1
 561         orr     result, work
 562 Over4:
 563         lsr     work, divisor, #2
 564         cmp     dividend, work
 565         Bcc     Over5
 566         sub     dividend, dividend, work
 567         lsr     work, curbit, #2
 568         orr     result, result, work
 569 Over5:
 570         lsr     work, divisor, #3
 571         cmp     dividend, work
 572         Bcc     Over6
 573         sub     dividend, dividend, work
 574         lsr     work, curbit, #3
 575         orr     result, result, work
 576 Over6:
 577         cmp     dividend, #0                    @ Early termination?
 578         Beq     Lgot_result
 579         lsr     curbit, #4                      @ No, any more bits to do?
 580         Beq     Lgot_result
 581         lsr     divisor, #4
 582         b       Loop3
 583
 584 Lgot_result:
 585         mov     r0, result
 586         mov     work, ip
 587         cmp     work, #0
 588         Bpl     Over7
 589         neg     r0, r0
 590 Over7:
 591         pop     { work }
 592         RET
 593
 594 Ldiv0:
 595         push    { lr }
 596         bl      SYM (__div0) __PLT__
 597         mov     r0, #0                  @ about as wrong as it could be
 598         pop     { pc }
 599
 600 #else /* arm version */
 601
 602         eor     ip, dividend, divisor           @ Save the sign of the result.
 603         mov     curbit, #1
 604         mov     result, #0
 605         cmp     divisor, #0
 606         rsbmi   divisor, divisor, #0            @ Loops below use unsigned.
 607         beq     Ldiv0
 608         cmp     dividend, #0
 609         rsbmi   dividend, dividend, #0
 610         cmp     dividend, divisor
 611         bcc     Lgot_result
 612
 613 Loop1:
 614         @ Unless the divisor is very big, shift it up in multiples of
 615         @ four bits, since this is the amount of unwinding in the main
 616         @ division loop.  Continue shifting until the divisor is
 617         @ larger than the dividend.
 618         cmp     divisor, #0x10000000
 619         cmpcc   divisor, dividend
 620         movcc   divisor, divisor, lsl #4
 621         movcc   curbit, curbit, lsl #4
 622         bcc     Loop1
 623
 624 Lbignum:
 625         @ For very big divisors, we must shift it a bit at a time, or
 626         @ we will be in danger of overflowing.
 627         cmp     divisor, #0x80000000
 628         cmpcc   divisor, dividend
 629         movcc   divisor, divisor, lsl #1
 630         movcc   curbit, curbit, lsl #1
 631         bcc     Lbignum
 632
 633 Loop3:
 634         @ Test for possible subtractions, and note which bits
 635         @ are done in the result.  On the final pass, this may subtract
 636         @ too much from the dividend, but the result will be ok, since the
 637         @ "bit" will have been shifted out at the bottom.
 638         cmp     dividend, divisor
 639         subcs   dividend, dividend, divisor
 640         orrcs   result, result, curbit
 641         cmp     dividend, divisor, lsr #1
 642         subcs   dividend, dividend, divisor, lsr #1
 643         orrcs   result, result, curbit, lsr #1
 644         cmp     dividend, divisor, lsr #2
 645         subcs   dividend, dividend, divisor, lsr #2
 646         orrcs   result, result, curbit, lsr #2
 647         cmp     dividend, divisor, lsr #3
 648         subcs   dividend, dividend, divisor, lsr #3
 649         orrcs   result, result, curbit, lsr #3
 650         cmp     dividend, #0                    @ Early termination?
 651         movnes  curbit, curbit, lsr #4          @ No, any more bits to do?
 652         movne   divisor, divisor, lsr #4
 653         bne     Loop3
 654 Lgot_result:
 655         mov     r0, result
 656         cmp     ip, #0
 657         rsbmi   r0, r0, #0
 658         RET
 659
 660 Ldiv0:
 661         str     lr, [sp, #-4]!
 662         bl      SYM (__div0) __PLT__
 663         mov     r0, #0                  @ about as wrong as it could be
 664         ldmia   sp!, {pc}RETCOND
 665
 666 #endif /* arm version */
 667
 668  SIZE   (__divsi3)
 669
 670 #endif /* L_divsi3 */
 671
 672 #ifdef L_modsi3
 673
 674 dividend        .req    r0
 675 divisor         .req    r1
 676 overdone        .req    r2
 677 curbit          .req    r3
 678 ip              .req    r12
 679 sp              .req    r13
 680 lr              .req    r14
 681 pc              .req    r15
 682
 683  FUNC_START modsi3
 684
 685 #ifdef __thumb__
 686
 687         mov     curbit, #1
 688         cmp     divisor, #0
 689         beq     Ldiv0
 690         Bpl     Over1
 691         neg     divisor, divisor                @ Loops below use unsigned.
 692 Over1:
 693         push    { work }
 694         @ Need to save the sign of the dividend, unfortunately, we need
 695         @ ip later on.  Must do this after saving the original value of
 696         @ the work register, because we will pop this value off first.
 697         push    { dividend }
 698         cmp     dividend, #0
 699         Bpl     Over2
 700         neg     dividend, dividend
 701 Over2:
 702         cmp     dividend, divisor
 703         bcc     Lgot_result
 704         mov     work, #1
 705         lsl     work, #28
 706 Loop1:
 707         @ Unless the divisor is very big, shift it up in multiples of
 708         @ four bits, since this is the amount of unwinding in the main
 709         @ division loop.  Continue shifting until the divisor is
 710         @ larger than the dividend.
 711         cmp     divisor, work
 712         bcs     Lbignum
 713         cmp     divisor, dividend
 714         bcs     Lbignum
 715         lsl     divisor, #4
 716         lsl     curbit, #4
 717         b       Loop1
 718
 719 Lbignum:
 720         @ Set work to 0x80000000
 721         lsl     work, #3
 722 Loop2:
 723         @ For very big divisors, we must shift it a bit at a time, or
 724         @ we will be in danger of overflowing.
 725         cmp     divisor, work
 726         bcs     Loop3
 727         cmp     divisor, dividend
 728         bcs     Loop3
 729         lsl     divisor, #1
 730         lsl     curbit, #1
 731         b       Loop2
 732
 733 Loop3:
 734         @ Test for possible subtractions.  On the final pass, this may
 735         @ subtract too much from the dividend, so keep track of which
 736         @ subtractions are done, we can fix them up afterwards...
 737         mov     overdone, #0
 738         cmp     dividend, divisor
 739         bcc     Over3
 740         sub     dividend, dividend, divisor
 741 Over3:
 742         lsr     work, divisor, #1
 743         cmp     dividend, work
 744         bcc     Over4
 745         sub     dividend, dividend, work
 746         mov     ip, curbit
 747         mov     work, #1
 748         ror     curbit, work
 749         orr     overdone, curbit
 750         mov     curbit, ip
 751 Over4:
 752         lsr     work, divisor, #2
 753         cmp     dividend, work
 754         bcc     Over5
 755         sub     dividend, dividend, work
 756         mov     ip, curbit
 757         mov     work, #2
 758         ror     curbit, work
 759         orr     overdone, curbit
 760         mov     curbit, ip
 761 Over5:
 762         lsr     work, divisor, #3
 763         cmp     dividend, work
 764         bcc     Over6
 765         sub     dividend, dividend, work
 766         mov     ip, curbit
 767         mov     work, #3
 768         ror     curbit, work
 769         orr     overdone, curbit
 770         mov     curbit, ip
 771 Over6:
 772         mov     ip, curbit
 773         cmp     dividend, #0                    @ Early termination?
 774         beq     Over7
 775         lsr     curbit, #4                      @ No, any more bits to do?
 776         beq     Over7
 777         lsr     divisor, #4
 778         b       Loop3
 779
 780 Over7:
 781         @ Any subtractions that we should not have done will be recorded in
 782         @ the top three bits of "overdone".  Exactly which were not needed
 783         @ are governed by the position of the bit, stored in ip.
 784         @ If we terminated early, because dividend became zero,
 785         @ then none of the below will match, since the bit in ip will not be
 786         @ in the bottom nibble.
 787         mov     work, #0xe
 788         lsl     work, #28
 789         and     overdone, work
 790         beq     Lgot_result
 791
 792         mov     curbit, ip
 793         mov     work, #3
 794         ror     curbit, work
 795         tst     overdone, curbit
 796         beq     Over8
 797         lsr     work, divisor, #3
 798         add     dividend, dividend, work
 799 Over8:
 800         mov     curbit, ip
 801         mov     work, #2
 802         ror     curbit, work
 803         tst     overdone, curbit
 804         beq     Over9
 805         lsr     work, divisor, #2
 806         add     dividend, dividend, work
 807 Over9:
 808         mov     curbit, ip
 809         mov     work, #1
 810         ror     curbit, work
 811         tst     overdone, curbit
 812         beq     Lgot_result
 813         lsr     work, divisor, #1
 814         add     dividend, dividend, work
 815 Lgot_result:
 816         pop     { work }
 817         cmp     work, #0
 818         bpl     Over10
 819         neg     dividend, dividend
 820 Over10:
 821         pop     { work }
 822         RET
 823
 824 Ldiv0:
 825         push    { lr }
 826         bl      SYM (__div0) __PLT__
 827         mov     r0, #0                  @ about as wrong as it could be
 828         pop     { pc }
 829
 830 #else /* arm version */
 831
 832         mov     curbit, #1
 833         cmp     divisor, #0
 834         rsbmi   divisor, divisor, #0            @ Loops below use unsigned.
 835         beq     Ldiv0
 836         @ Need to save the sign of the dividend, unfortunately, we need
 837         @ ip later on; this is faster than pushing lr and using that.
 838         str     dividend, [sp, #-4]!
 839         cmp     dividend, #0
 840         rsbmi   dividend, dividend, #0
 841         cmp     dividend, divisor
 842         bcc     Lgot_result
 843
 844 Loop1:
 845         @ Unless the divisor is very big, shift it up in multiples of
 846         @ four bits, since this is the amount of unwinding in the main
 847         @ division loop.  Continue shifting until the divisor is
 848         @ larger than the dividend.
 849         cmp     divisor, #0x10000000
 850         cmpcc   divisor, dividend
 851         movcc   divisor, divisor, lsl #4
 852         movcc   curbit, curbit, lsl #4
 853         bcc     Loop1
 854
 855 Lbignum:
 856         @ For very big divisors, we must shift it a bit at a time, or
 857         @ we will be in danger of overflowing.
 858         cmp     divisor, #0x80000000
 859         cmpcc   divisor, dividend
 860         movcc   divisor, divisor, lsl #1
 861         movcc   curbit, curbit, lsl #1
 862         bcc     Lbignum
 863
 864 Loop3:
 865         @ Test for possible subtractions.  On the final pass, this may
 866         @ subtract too much from the dividend, so keep track of which
 867         @ subtractions are done, we can fix them up afterwards...
 868         mov     overdone, #0
 869         cmp     dividend, divisor
 870         subcs   dividend, dividend, divisor
 871         cmp     dividend, divisor, lsr #1
 872         subcs   dividend, dividend, divisor, lsr #1
 873         orrcs   overdone, overdone, curbit, ror #1
 874         cmp     dividend, divisor, lsr #2
 875         subcs   dividend, dividend, divisor, lsr #2
 876         orrcs   overdone, overdone, curbit, ror #2
 877         cmp     dividend, divisor, lsr #3
 878         subcs   dividend, dividend, divisor, lsr #3
 879         orrcs   overdone, overdone, curbit, ror #3
 880         mov     ip, curbit
 881         cmp     dividend, #0                    @ Early termination?
 882         movnes  curbit, curbit, lsr #4          @ No, any more bits to do?
 883         movne   divisor, divisor, lsr #4
 884         bne     Loop3
 885
 886         @ Any subtractions that we should not have done will be recorded in
 887         @ the top three bits of "overdone".  Exactly which were not needed
 888         @ are governed by the position of the bit, stored in ip.
 889         @ If we terminated early, because dividend became zero,
 890         @ then none of the below will match, since the bit in ip will not be
 891         @ in the bottom nibble.
 892         ands    overdone, overdone, #0xe0000000
 893         beq     Lgot_result
 894         tst     overdone, ip, ror #3
 895         addne   dividend, dividend, divisor, lsr #3
 896         tst     overdone, ip, ror #2
 897         addne   dividend, dividend, divisor, lsr #2
 898         tst     overdone, ip, ror #1
 899         addne   dividend, dividend, divisor, lsr #1
 900 Lgot_result:
 901         ldr     ip, [sp], #4
 902         cmp     ip, #0
 903         rsbmi   dividend, dividend, #0
 904         RET
 905
 906 Ldiv0:
 907         str     lr, [sp, #-4]!
 908         bl      SYM (__div0) __PLT__
 909         mov     r0, #0                  @ about as wrong as it could be
 910         ldmia   sp!, {pc}RETCOND
 911
 912 #endif /* arm version */
 913
 914  SIZE   (__modsi3)
 915
 916 #endif /* L_modsi3 */
 917
 918 #ifdef L_dvmd_tls
 919
 920  FUNC_START div0
 921
 922         RET
 923
 924  SIZE   (__div0)
 925
 926 #endif /* L_divmodsi_tools */
 927
 928 #ifdef L_dvmd_lnx
 929 @ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
 930
 931 #include <asm/unistd.h>
 932
 933 #define SIGFPE  8                       @ cant use <asm/signal.h> as it
 934                                         @ contains too much C rubbish
 935  FUNC_START div0
 936
 937         stmfd   sp!, {r1, lr}
 938         swi     __NR_getpid
 939         cmn     r0, #1000
 940         ldmhsfd sp!, {r1, pc}RETCOND    @ not much we can do
 941         mov     r1, #SIGFPE
 942         swi     __NR_kill
 943         ldmfd   sp!, {r1, pc}RETCOND
 944
 945  SIZE   (__div0)
 946
 947 #endif /* L_dvmd_lnx */
 948
 949 /* These next two sections are here despite the fact that they contain Thumb
 950    assembler because their presence allows interworked code to be linked even
 951    when the GCC library is this one.  */
 952
 953 /* Do not build the interworking functions when the target cpu
 954    is the arm v3 architecture.  (This is one of the multilib
 955    options).  */
 956 #if defined L_call_via_rX && ! defined __ARM_ARCH_3__
 957
 958 /* These labels & instructions are used by the Arm/Thumb interworking code.
 959    The address of function to be called is loaded into a register and then
 960    one of these labels is called via a BL instruction.  This puts the
 961    return address into the link register with the bottom bit set, and the
 962    code here switches to the correct mode before executing the function.  */
 963
 964         .text
 965         .align 0
 966         .force_thumb
 967 .macro call_via register
 968         .globl  SYM (_call_via_\register)
 969         TYPE    (_call_via_\register)
 970         .thumb_func
 971 SYM (_call_via_\register):
 972         bx      \register
 973         nop
 974
 975         SIZE    (_call_via_\register)
 976 .endm
 977
 978         call_via r0
 979         call_via r1
 980         call_via r2
 981         call_via r3
 982         call_via r4
 983         call_via r5
 984         call_via r6
 985         call_via r7
 986         call_via r8
 987         call_via r9
 988         call_via sl
 989         call_via fp
 990         call_via ip
 991         call_via sp
 992         call_via lr
 993
 994 #endif /* L_call_via_rX */
 995
 996 /* Do not build the interworking functions when the target cpu
 997    is the arm v3 architecture.  (This is one of the multilib
 998    options).  */
 999 #if defined L_interwork_call_via_rX && ! defined __ARM_ARCH_3__
1000
1001 /* These labels & instructions are used by the Arm/Thumb interworking code,
1002    when the target address is in an unknown instruction set.  The address
1003    of function to be called is loaded into a register and then one of these
1004    labels is called via a BL instruction.  This puts the return address
1005    into the link register with the bottom bit set, and the code here
1006    switches to the correct mode before executing the function.  Unfortunately
1007    the target code cannot be relied upon to return via a BX instruction, so
1008    instead we have to store the resturn address on the stack and allow the
1009    called function to return here instead.  Upon return we recover the real
1010    return address and use a BX to get back to Thumb mode.  */
1011
1012         .text
1013         .align 0
1014
1015         .code   32
1016         .globl _arm_return
1017 _arm_return:
1018         ldmia   r13!, {r12}
1019         bx      r12
1020         .code   16
1021
1022 .macro interwork register
1023         .code   16
1024         .globl  SYM (_interwork_call_via_\register)
1025         TYPE    (_interwork_call_via_\register)
1026         .thumb_func
1027 SYM (_interwork_call_via_\register):
1028         bx      pc
1029         nop
1030
1031         .code   32
1032         .globl .Lchange_\register
1033 .Lchange_\register:
1034         tst     \register, #1
1035         stmeqdb r13!, {lr}
1036         adreq   lr, _arm_return
1037         bx      \register
1038
1039         SIZE    (_interwork_call_via_\register)
1040 .endm
1041
1042         interwork r0
1043         interwork r1
1044         interwork r2
1045         interwork r3
1046         interwork r4
1047         interwork r5
1048         interwork r6
1049         interwork r7
1050         interwork r8
1051         interwork r9
1052         interwork sl
1053         interwork fp
1054         interwork ip
1055         interwork sp
1056
1057         /* The lr case has to be handled a little differently...*/
1058         .code 16
1059         .globl  SYM (_interwork_call_via_lr)
1060         TYPE    (_interwork_call_via_lr)
1061         .thumb_func
1062 SYM (_interwork_call_via_lr):
1063         bx      pc
1064         nop
1065
1066         .code 32
1067         .globl .Lchange_lr
1068 .Lchange_lr:
1069         tst     lr, #1
1070         stmeqdb r13!, {lr}
1071         mov     ip, lr
1072         adreq   lr, _arm_return
1073         bx      ip
1074
1075         SIZE    (_interwork_call_via_lr)
1076
1077 #endif /* L_interwork_call_via_rX */