gcc/config/arm/lib1funcs.asm

   1 @ libgcc routines for ARM cpu.
   2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
   3
   4 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007
   5    Free Software Foundation, Inc.
   6
   7 This file is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 2, or (at your option) any
  10 later version.
  11
  12 In addition to the permissions in the GNU General Public License, the
  13 Free Software Foundation gives you unlimited permission to link the
  14 compiled version of this file into combinations with other programs,
  15 and to distribute those combinations without any restriction coming
  16 from the use of this file.  (The General Public License restrictions
  17 do apply in other respects; for example, they cover modification of
  18 the file, and distribution when not linked into a combine
  19 executable.)
  20
  21 This file is distributed in the hope that it will be useful, but
  22 WITHOUT ANY WARRANTY; without even the implied warranty of
  23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  24 General Public License for more details.
  25
  26 You should have received a copy of the GNU General Public License
  27 along with this program; see the file COPYING.  If not, write to
  28 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  29 Boston, MA 02110-1301, USA.  */
  30
  31 /* An executable stack is *not* required for these functions.  */
  32 #if defined(__ELF__) && defined(__linux__)
  33 .section .note.GNU-stack,"",%progbits
  34 .previous
  35 #endif
  36
  37 /* ------------------------------------------------------------------------ */
  38
  39 /* We need to know what prefix to add to function names.  */
  40
  41 #ifndef __USER_LABEL_PREFIX__
  42 #error  __USER_LABEL_PREFIX__ not defined
  43 #endif
  44
  45 /* ANSI concatenation macros.  */
  46
  47 #define CONCAT1(a, b) CONCAT2(a, b)
  48 #define CONCAT2(a, b) a ## b
  49
  50 /* Use the right prefix for global labels.  */
  51
  52 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
  53
  54 #ifdef __ELF__
  55 #ifdef __thumb__
  56 #define __PLT__  /* Not supported in Thumb assembler (for now).  */
  57 #else
  58 #define __PLT__ (PLT)
  59 #endif
  60 #define TYPE(x) .type SYM(x),function
  61 #define SIZE(x) .size SYM(x), . - SYM(x)
  62 #define LSYM(x) .x
  63 #else
  64 #define __PLT__
  65 #define TYPE(x)
  66 #define SIZE(x)
  67 #define LSYM(x) x
  68 #endif
  69
  70 /* Function end macros.  Variants for interworking.  */
  71
  72 #if defined(__ARM_ARCH_2__)
  73 # define __ARM_ARCH__ 2
  74 #endif
  75
  76 #if defined(__ARM_ARCH_3__)
  77 # define __ARM_ARCH__ 3
  78 #endif
  79
  80 #if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
  81         || defined(__ARM_ARCH_4T__)
  82 /* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
  83    long multiply instructions.  That includes v3M.  */
  84 # define __ARM_ARCH__ 4
  85 #endif
  86
  87 #if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
  88         || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
  89         || defined(__ARM_ARCH_5TEJ__)
  90 # define __ARM_ARCH__ 5
  91 #endif
  92
  93 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
  94         || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
  95         || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__)
  96 # define __ARM_ARCH__ 6
  97 #endif
  98
  99 #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
 100         || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__)
 101 # define __ARM_ARCH__ 7
 102 #endif
 103
 104 #ifndef __ARM_ARCH__
 105 #error Unable to determine architecture.
 106 #endif
 107
 108 /* How to return from a function call depends on the architecture variant.  */
 109
 110 #if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
 111
 112 # define RET            bx      lr
 113 # define RETc(x)        bx##x   lr
 114
 115 /* Special precautions for interworking on armv4t.  */
 116 # if (__ARM_ARCH__ == 4)
 117
 118 /* Always use bx, not ldr pc.  */
 119 #  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
 120 #    define __INTERWORKING__
 121 #   endif /* __THUMB__ || __THUMB_INTERWORK__ */
 122
 123 /* Include thumb stub before arm mode code.  */
 124 #  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
 125 #   define __INTERWORKING_STUBS__
 126 #  endif /* __thumb__ && !__THUMB_INTERWORK__ */
 127
 128 #endif /* __ARM_ARCH == 4 */
 129
 130 #else
 131
 132 # define RET            mov     pc, lr
 133 # define RETc(x)        mov##x  pc, lr
 134
 135 #endif
 136
 137 .macro  cfi_pop         advance, reg, cfa_offset
 138 #ifdef __ELF__
 139         .pushsection    .debug_frame
 140         .byte   0x4             /* DW_CFA_advance_loc4 */
 141         .4byte  \advance
 142         .byte   (0xc0 | \reg)   /* DW_CFA_restore */
 143         .byte   0xe             /* DW_CFA_def_cfa_offset */
 144         .uleb128 \cfa_offset
 145         .popsection
 146 #endif
 147 .endm
 148 .macro  cfi_push        advance, reg, offset, cfa_offset
 149 #ifdef __ELF__
 150         .pushsection    .debug_frame
 151         .byte   0x4             /* DW_CFA_advance_loc4 */
 152         .4byte  \advance
 153         .byte   (0x80 | \reg)   /* DW_CFA_offset */
 154         .uleb128 (\offset / -4)
 155         .byte   0xe             /* DW_CFA_def_cfa_offset */
 156         .uleb128 \cfa_offset
 157         .popsection
 158 #endif
 159 .endm
 160 .macro cfi_start        start_label, end_label
 161 #ifdef __ELF__
 162         .pushsection    .debug_frame
 163 LSYM(Lstart_frame):
 164         .4byte  LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
 165 LSYM(Lstart_cie):
 166         .4byte  0xffffffff      @ CIE Identifier Tag
 167         .byte   0x1     @ CIE Version
 168         .ascii  "\0"    @ CIE Augmentation
 169         .uleb128 0x1    @ CIE Code Alignment Factor
 170         .sleb128 -4     @ CIE Data Alignment Factor
 171         .byte   0xe     @ CIE RA Column
 172         .byte   0xc     @ DW_CFA_def_cfa
 173         .uleb128 0xd
 174         .uleb128 0x0
 175
 176         .align 2
 177 LSYM(Lend_cie):
 178         .4byte  LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length
 179 LSYM(Lstart_fde):
 180         .4byte  LSYM(Lstart_frame)      @ FDE CIE offset
 181         .4byte  \start_label    @ FDE initial location
 182         .4byte  \end_label-\start_label @ FDE address range
 183         .popsection
 184 #endif
 185 .endm
 186 .macro cfi_end  end_label
 187 #ifdef __ELF__
 188         .pushsection    .debug_frame
 189         .align  2
 190 LSYM(Lend_fde):
 191         .popsection
 192 \end_label:
 193 #endif
 194 .endm
 195
 196 /* Don't pass dirn, it's there just to get token pasting right.  */
 197
 198 .macro  RETLDM  regs=, cond=, unwind=, dirn=ia
 199 #if defined (__INTERWORKING__)
 200         .ifc "\regs",""
 201         ldr\cond        lr, [sp], #8
 202         .else
 203 # if defined(__thumb2__)
 204         pop\cond        {\regs, lr}
 205 # else
 206         ldm\cond\dirn   sp!, {\regs, lr}
 207 # endif
 208         .endif
 209         .ifnc "\unwind", ""
 210         /* Mark LR as restored.  */
 211 97:     cfi_pop 97b - \unwind, 0xe, 0x0
 212         .endif
 213         bx\cond lr
 214 #else
 215         /* Caller is responsible for providing IT instruction.  */
 216         .ifc "\regs",""
 217         ldr\cond        pc, [sp], #8
 218         .else
 219 # if defined(__thumb2__)
 220         pop\cond        {\regs, pc}
 221 # else
 222         ldm\cond\dirn   sp!, {\regs, pc}
 223 # endif
 224         .endif
 225 #endif
 226 .endm
 227
 228 /* The Unified assembly syntax allows the same code to be assembled for both
 229    ARM and Thumb-2.  However this is only supported by recent gas, so define
 230    a set of macros to allow ARM code on older assemblers.  */
 231 #if defined(__thumb2__)
 232 .macro do_it cond, suffix=""
 233         it\suffix       \cond
 234 .endm
 235 .macro shift1 op, arg0, arg1, arg2
 236         \op     \arg0, \arg1, \arg2
 237 .endm
 238 #define do_push push
 239 #define do_pop  pop
 240 #define COND(op1, op2, cond) op1 ## op2 ## cond
 241 /* Perform an arithmetic operation with a variable shift operand.  This
 242    requires two instructions and a scratch register on Thumb-2.  */
 243 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
 244         \shiftop \tmp, \src2, \shiftreg
 245         \name \dest, \src1, \tmp
 246 .endm
 247 #else
 248 .macro do_it cond, suffix=""
 249 .endm
 250 .macro shift1 op, arg0, arg1, arg2
 251         mov     \arg0, \arg1, \op \arg2
 252 .endm
 253 #define do_push stmfd sp!,
 254 #define do_pop  ldmfd sp!,
 255 #define COND(op1, op2, cond) op1 ## cond ## op2
 256 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
 257         \name \dest, \src1, \src2, \shiftop \shiftreg
 258 .endm
 259 #endif
 260
 261 .macro ARM_LDIV0 name
 262         str     lr, [sp, #-8]!
 263 98:     cfi_push 98b - __\name, 0xe, -0x8, 0x8
 264         bl      SYM (__div0) __PLT__
 265         mov     r0, #0                  @ About as wrong as it could be.
 266         RETLDM  unwind=98b
 267 .endm
 268
 269
 270 .macro THUMB_LDIV0 name
 271         push    { r1, lr }
 272 98:     cfi_push 98b - __\name, 0xe, -0x4, 0x8
 273         bl      SYM (__div0)
 274         mov     r0, #0                  @ About as wrong as it could be.
 275 #if defined (__INTERWORKING__)
 276         pop     { r1, r2 }
 277         bx      r2
 278 #else
 279         pop     { r1, pc }
 280 #endif
 281 .endm
 282
 283 .macro FUNC_END name
 284         SIZE (__\name)
 285 .endm
 286
 287 .macro DIV_FUNC_END name
 288         cfi_start       __\name, LSYM(Lend_div0)
 289 LSYM(Ldiv0):
 290 #ifdef __thumb__
 291         THUMB_LDIV0 \name
 292 #else
 293         ARM_LDIV0 \name
 294 #endif
 295         cfi_end LSYM(Lend_div0)
 296         FUNC_END \name
 297 .endm
 298
 299 .macro THUMB_FUNC_START name
 300         .globl  SYM (\name)
 301         TYPE    (\name)
 302         .thumb_func
 303 SYM (\name):
 304 .endm
 305
 306 /* Function start macros.  Variants for ARM and Thumb.  */
 307
 308 #ifdef __thumb__
 309 #define THUMB_FUNC .thumb_func
 310 #define THUMB_CODE .force_thumb
 311 # if defined(__thumb2__)
 312 #define THUMB_SYNTAX .syntax divided
 313 # else
 314 #define THUMB_SYNTAX
 315 # endif
 316 #else
 317 #define THUMB_FUNC
 318 #define THUMB_CODE
 319 #define THUMB_SYNTAX
 320 #endif
 321
 322 .macro FUNC_START name
 323         .text
 324         .globl SYM (__\name)
 325         TYPE (__\name)
 326         .align 0
 327         THUMB_CODE
 328         THUMB_FUNC
 329         THUMB_SYNTAX
 330 SYM (__\name):
 331 .endm
 332
 333 /* Special function that will always be coded in ARM assembly, even if
 334    in Thumb-only compilation.  */
 335
 336 #if defined(__thumb2__)
 337
 338 /* For Thumb-2 we build everything in thumb mode.  */
 339 .macro ARM_FUNC_START name
 340        FUNC_START \name
 341        .syntax unified
 342 .endm
 343 #define EQUIV .thumb_set
 344 .macro  ARM_CALL name
 345         bl      __\name
 346 .endm
 347
 348 #elif defined(__INTERWORKING_STUBS__)
 349
 350 .macro  ARM_FUNC_START name
 351         FUNC_START \name
 352         bx      pc
 353         nop
 354         .arm
 355 /* A hook to tell gdb that we've switched to ARM mode.  Also used to call
 356    directly from other local arm routines.  */
 357 _L__\name:
 358 .endm
 359 #define EQUIV .thumb_set
 360 /* Branch directly to a function declared with ARM_FUNC_START.
 361    Must be called in arm mode.  */
 362 .macro  ARM_CALL name
 363         bl      _L__\name
 364 .endm
 365
 366 #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
 367
 368 .macro  ARM_FUNC_START name
 369         .text
 370         .globl SYM (__\name)
 371         TYPE (__\name)
 372         .align 0
 373         .arm
 374 SYM (__\name):
 375 .endm
 376 #define EQUIV .set
 377 .macro  ARM_CALL name
 378         bl      __\name
 379 .endm
 380
 381 #endif
 382
 383 .macro  FUNC_ALIAS new old
 384         .globl  SYM (__\new)
 385 #if defined (__thumb__)
 386         .thumb_set      SYM (__\new), SYM (__\old)
 387 #else
 388         .set    SYM (__\new), SYM (__\old)
 389 #endif
 390 .endm
 391
 392 .macro  ARM_FUNC_ALIAS new old
 393         .globl  SYM (__\new)
 394         EQUIV   SYM (__\new), SYM (__\old)
 395 #if defined(__INTERWORKING_STUBS__)
 396         .set    SYM (_L__\new), SYM (_L__\old)
 397 #endif
 398 .endm
 399
 400 #ifdef __thumb__
 401 /* Register aliases.  */
 402
 403 work            .req    r4      @ XXXX is this safe ?
 404 dividend        .req    r0
 405 divisor         .req    r1
 406 overdone        .req    r2
 407 result          .req    r2
 408 curbit          .req    r3
 409 #endif
 410 #if 0
 411 ip              .req    r12
 412 sp              .req    r13
 413 lr              .req    r14
 414 pc              .req    r15
 415 #endif
 416
 417 /* ------------------------------------------------------------------------ */
 418 /*              Bodies of the division and modulo routines.                 */
 419 /* ------------------------------------------------------------------------ */
 420 .macro ARM_DIV_BODY dividend, divisor, result, curbit
 421
 422 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
 423
 424         clz     \curbit, \dividend
 425         clz     \result, \divisor
 426         sub     \curbit, \result, \curbit
 427         rsbs    \curbit, \curbit, #31
 428         addne   \curbit, \curbit, \curbit, lsl #1
 429         mov     \result, #0
 430         addne   pc, pc, \curbit, lsl #2
 431         nop
 432         .set    shift, 32
 433         .rept   32
 434         .set    shift, shift - 1
 435         cmp     \dividend, \divisor, lsl #shift
 436         adc     \result, \result, \result
 437         subcs   \dividend, \dividend, \divisor, lsl #shift
 438         .endr
 439
 440 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 441 #if __ARM_ARCH__ >= 5
 442
 443         clz     \curbit, \divisor
 444         clz     \result, \dividend
 445         sub     \result, \curbit, \result
 446         mov     \curbit, #1
 447         mov     \divisor, \divisor, lsl \result
 448         mov     \curbit, \curbit, lsl \result
 449         mov     \result, #0
 450
 451 #else /* __ARM_ARCH__ < 5 */
 452
 453         @ Initially shift the divisor left 3 bits if possible,
 454         @ set curbit accordingly.  This allows for curbit to be located
 455         @ at the left end of each 4-bit nibbles in the division loop
 456         @ to save one loop in most cases.
 457         tst     \divisor, #0xe0000000
 458         moveq   \divisor, \divisor, lsl #3
 459         moveq   \curbit, #8
 460         movne   \curbit, #1
 461
 462         @ Unless the divisor is very big, shift it up in multiples of
 463         @ four bits, since this is the amount of unwinding in the main
 464         @ division loop.  Continue shifting until the divisor is
 465         @ larger than the dividend.
 466 1:      cmp     \divisor, #0x10000000
 467         cmplo   \divisor, \dividend
 468         movlo   \divisor, \divisor, lsl #4
 469         movlo   \curbit, \curbit, lsl #4
 470         blo     1b
 471
 472         @ For very big divisors, we must shift it a bit at a time, or
 473         @ we will be in danger of overflowing.
 474 1:      cmp     \divisor, #0x80000000
 475         cmplo   \divisor, \dividend
 476         movlo   \divisor, \divisor, lsl #1
 477         movlo   \curbit, \curbit, lsl #1
 478         blo     1b
 479
 480         mov     \result, #0
 481
 482 #endif /* __ARM_ARCH__ < 5 */
 483
 484         @ Division loop
 485 1:      cmp     \dividend, \divisor
 486         subhs   \dividend, \dividend, \divisor
 487         orrhs   \result,   \result,   \curbit
 488         cmp     \dividend, \divisor,  lsr #1
 489         subhs   \dividend, \dividend, \divisor, lsr #1
 490         orrhs   \result,   \result,   \curbit,  lsr #1
 491         cmp     \dividend, \divisor,  lsr #2
 492         subhs   \dividend, \dividend, \divisor, lsr #2
 493         orrhs   \result,   \result,   \curbit,  lsr #2
 494         cmp     \dividend, \divisor,  lsr #3
 495         subhs   \dividend, \dividend, \divisor, lsr #3
 496         orrhs   \result,   \result,   \curbit,  lsr #3
 497         cmp     \dividend, #0                   @ Early termination?
 498         movnes  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
 499         movne   \divisor,  \divisor, lsr #4
 500         bne     1b
 501
 502 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 503
 504 .endm
 505 /* ------------------------------------------------------------------------ */
 506 .macro ARM_DIV2_ORDER divisor, order
 507
 508 #if __ARM_ARCH__ >= 5
 509
 510         clz     \order, \divisor
 511         rsb     \order, \order, #31
 512
 513 #else
 514
 515         cmp     \divisor, #(1 << 16)
 516         movhs   \divisor, \divisor, lsr #16
 517         movhs   \order, #16
 518         movlo   \order, #0
 519
 520         cmp     \divisor, #(1 << 8)
 521         movhs   \divisor, \divisor, lsr #8
 522         addhs   \order, \order, #8
 523
 524         cmp     \divisor, #(1 << 4)
 525         movhs   \divisor, \divisor, lsr #4
 526         addhs   \order, \order, #4
 527
 528         cmp     \divisor, #(1 << 2)
 529         addhi   \order, \order, #3
 530         addls   \order, \order, \divisor, lsr #1
 531
 532 #endif
 533
 534 .endm
 535 /* ------------------------------------------------------------------------ */
 536 .macro ARM_MOD_BODY dividend, divisor, order, spare
 537
 538 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
 539
 540         clz     \order, \divisor
 541         clz     \spare, \dividend
 542         sub     \order, \order, \spare
 543         rsbs    \order, \order, #31
 544         addne   pc, pc, \order, lsl #3
 545         nop
 546         .set    shift, 32
 547         .rept   32
 548         .set    shift, shift - 1
 549         cmp     \dividend, \divisor, lsl #shift
 550         subcs   \dividend, \dividend, \divisor, lsl #shift
 551         .endr
 552
 553 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 554 #if __ARM_ARCH__ >= 5
 555
 556         clz     \order, \divisor
 557         clz     \spare, \dividend
 558         sub     \order, \order, \spare
 559         mov     \divisor, \divisor, lsl \order
 560
 561 #else /* __ARM_ARCH__ < 5 */
 562
 563         mov     \order, #0
 564
 565         @ Unless the divisor is very big, shift it up in multiples of
 566         @ four bits, since this is the amount of unwinding in the main
 567         @ division loop.  Continue shifting until the divisor is
 568         @ larger than the dividend.
 569 1:      cmp     \divisor, #0x10000000
 570         cmplo   \divisor, \dividend
 571         movlo   \divisor, \divisor, lsl #4
 572         addlo   \order, \order, #4
 573         blo     1b
 574
 575         @ For very big divisors, we must shift it a bit at a time, or
 576         @ we will be in danger of overflowing.
 577 1:      cmp     \divisor, #0x80000000
 578         cmplo   \divisor, \dividend
 579         movlo   \divisor, \divisor, lsl #1
 580         addlo   \order, \order, #1
 581         blo     1b
 582
 583 #endif /* __ARM_ARCH__ < 5 */
 584
 585         @ Perform all needed substractions to keep only the reminder.
 586         @ Do comparisons in batch of 4 first.
 587         subs    \order, \order, #3              @ yes, 3 is intended here
 588         blt     2f
 589
 590 1:      cmp     \dividend, \divisor
 591         subhs   \dividend, \dividend, \divisor
 592         cmp     \dividend, \divisor,  lsr #1
 593         subhs   \dividend, \dividend, \divisor, lsr #1
 594         cmp     \dividend, \divisor,  lsr #2
 595         subhs   \dividend, \dividend, \divisor, lsr #2
 596         cmp     \dividend, \divisor,  lsr #3
 597         subhs   \dividend, \dividend, \divisor, lsr #3
 598         cmp     \dividend, #1
 599         mov     \divisor, \divisor, lsr #4
 600         subges  \order, \order, #4
 601         bge     1b
 602
 603         tst     \order, #3
 604         teqne   \dividend, #0
 605         beq     5f
 606
 607         @ Either 1, 2 or 3 comparison/substractions are left.
 608 2:      cmn     \order, #2
 609         blt     4f
 610         beq     3f
 611         cmp     \dividend, \divisor
 612         subhs   \dividend, \dividend, \divisor
 613         mov     \divisor,  \divisor,  lsr #1
 614 3:      cmp     \dividend, \divisor
 615         subhs   \dividend, \dividend, \divisor
 616         mov     \divisor,  \divisor,  lsr #1
 617 4:      cmp     \dividend, \divisor
 618         subhs   \dividend, \dividend, \divisor
 619 5:
 620
 621 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 622
 623 .endm
 624 /* ------------------------------------------------------------------------ */
 625 .macro THUMB_DIV_MOD_BODY modulo
 626         @ Load the constant 0x10000000 into our work register.
 627         mov     work, #1
 628         lsl     work, #28
 629 LSYM(Loop1):
 630         @ Unless the divisor is very big, shift it up in multiples of
 631         @ four bits, since this is the amount of unwinding in the main
 632         @ division loop.  Continue shifting until the divisor is
 633         @ larger than the dividend.
 634         cmp     divisor, work
 635         bhs     LSYM(Lbignum)
 636         cmp     divisor, dividend
 637         bhs     LSYM(Lbignum)
 638         lsl     divisor, #4
 639         lsl     curbit,  #4
 640         b       LSYM(Loop1)
 641 LSYM(Lbignum):
 642         @ Set work to 0x80000000
 643         lsl     work, #3
 644 LSYM(Loop2):
 645         @ For very big divisors, we must shift it a bit at a time, or
 646         @ we will be in danger of overflowing.
 647         cmp     divisor, work
 648         bhs     LSYM(Loop3)
 649         cmp     divisor, dividend
 650         bhs     LSYM(Loop3)
 651         lsl     divisor, #1
 652         lsl     curbit,  #1
 653         b       LSYM(Loop2)
 654 LSYM(Loop3):
 655         @ Test for possible subtractions ...
 656   .if \modulo
 657         @ ... On the final pass, this may subtract too much from the dividend,
 658         @ so keep track of which subtractions are done, we can fix them up
 659         @ afterwards.
 660         mov     overdone, #0
 661         cmp     dividend, divisor
 662         blo     LSYM(Lover1)
 663         sub     dividend, dividend, divisor
 664 LSYM(Lover1):
 665         lsr     work, divisor, #1
 666         cmp     dividend, work
 667         blo     LSYM(Lover2)
 668         sub     dividend, dividend, work
 669         mov     ip, curbit
 670         mov     work, #1
 671         ror     curbit, work
 672         orr     overdone, curbit
 673         mov     curbit, ip
 674 LSYM(Lover2):
 675         lsr     work, divisor, #2
 676         cmp     dividend, work
 677         blo     LSYM(Lover3)
 678         sub     dividend, dividend, work
 679         mov     ip, curbit
 680         mov     work, #2
 681         ror     curbit, work
 682         orr     overdone, curbit
 683         mov     curbit, ip
 684 LSYM(Lover3):
 685         lsr     work, divisor, #3
 686         cmp     dividend, work
 687         blo     LSYM(Lover4)
 688         sub     dividend, dividend, work
 689         mov     ip, curbit
 690         mov     work, #3
 691         ror     curbit, work
 692         orr     overdone, curbit
 693         mov     curbit, ip
 694 LSYM(Lover4):
 695         mov     ip, curbit
 696   .else
 697         @ ... and note which bits are done in the result.  On the final pass,
 698         @ this may subtract too much from the dividend, but the result will be ok,
 699         @ since the "bit" will have been shifted out at the bottom.
 700         cmp     dividend, divisor
 701         blo     LSYM(Lover1)
 702         sub     dividend, dividend, divisor
 703         orr     result, result, curbit
 704 LSYM(Lover1):
 705         lsr     work, divisor, #1
 706         cmp     dividend, work
 707         blo     LSYM(Lover2)
 708         sub     dividend, dividend, work
 709         lsr     work, curbit, #1
 710         orr     result, work
 711 LSYM(Lover2):
 712         lsr     work, divisor, #2
 713         cmp     dividend, work
 714         blo     LSYM(Lover3)
 715         sub     dividend, dividend, work
 716         lsr     work, curbit, #2
 717         orr     result, work
 718 LSYM(Lover3):
 719         lsr     work, divisor, #3
 720         cmp     dividend, work
 721         blo     LSYM(Lover4)
 722         sub     dividend, dividend, work
 723         lsr     work, curbit, #3
 724         orr     result, work
 725 LSYM(Lover4):
 726   .endif
 727
 728         cmp     dividend, #0                    @ Early termination?
 729         beq     LSYM(Lover5)
 730         lsr     curbit,  #4                     @ No, any more bits to do?
 731         beq     LSYM(Lover5)
 732         lsr     divisor, #4
 733         b       LSYM(Loop3)
 734 LSYM(Lover5):
 735   .if \modulo
 736         @ Any subtractions that we should not have done will be recorded in
 737         @ the top three bits of "overdone".  Exactly which were not needed
 738         @ are governed by the position of the bit, stored in ip.
 739         mov     work, #0xe
 740         lsl     work, #28
 741         and     overdone, work
 742         beq     LSYM(Lgot_result)
 743
 744         @ If we terminated early, because dividend became zero, then the
 745         @ bit in ip will not be in the bottom nibble, and we should not
 746         @ perform the additions below.  We must test for this though
 747         @ (rather relying upon the TSTs to prevent the additions) since
 748         @ the bit in ip could be in the top two bits which might then match
 749         @ with one of the smaller RORs.
 750         mov     curbit, ip
 751         mov     work, #0x7
 752         tst     curbit, work
 753         beq     LSYM(Lgot_result)
 754
 755         mov     curbit, ip
 756         mov     work, #3
 757         ror     curbit, work
 758         tst     overdone, curbit
 759         beq     LSYM(Lover6)
 760         lsr     work, divisor, #3
 761         add     dividend, work
 762 LSYM(Lover6):
 763         mov     curbit, ip
 764         mov     work, #2
 765         ror     curbit, work
 766         tst     overdone, curbit
 767         beq     LSYM(Lover7)
 768         lsr     work, divisor, #2
 769         add     dividend, work
 770 LSYM(Lover7):
 771         mov     curbit, ip
 772         mov     work, #1
 773         ror     curbit, work
 774         tst     overdone, curbit
 775         beq     LSYM(Lgot_result)
 776         lsr     work, divisor, #1
 777         add     dividend, work
 778   .endif
 779 LSYM(Lgot_result):
 780 .endm
 781 /* ------------------------------------------------------------------------ */
 782 /*              Start of the Real Functions                                 */
 783 /* ------------------------------------------------------------------------ */
 784 #ifdef L_udivsi3
 785
 786         FUNC_START udivsi3
 787         FUNC_ALIAS aeabi_uidiv udivsi3
 788
 789 #ifdef __thumb__
 790
 791         cmp     divisor, #0
 792         beq     LSYM(Ldiv0)
 793         mov     curbit, #1
 794         mov     result, #0
 795
 796         push    { work }
 797         cmp     dividend, divisor
 798         blo     LSYM(Lgot_result)
 799
 800         THUMB_DIV_MOD_BODY 0
 801
 802         mov     r0, result
 803         pop     { work }
 804         RET
 805
 806 #else /* ARM version.  */
 807
 808         subs    r2, r1, #1
 809         RETc(eq)
 810         bcc     LSYM(Ldiv0)
 811         cmp     r0, r1
 812         bls     11f
 813         tst     r1, r2
 814         beq     12f
 815
 816         ARM_DIV_BODY r0, r1, r2, r3
 817
 818         mov     r0, r2
 819         RET
 820
 821 11:     moveq   r0, #1
 822         movne   r0, #0
 823         RET
 824
 825 12:     ARM_DIV2_ORDER r1, r2
 826
 827         mov     r0, r0, lsr r2
 828         RET
 829
 830 #endif /* ARM version */
 831
 832         DIV_FUNC_END udivsi3
 833
 834 FUNC_START aeabi_uidivmod
 835 #ifdef __thumb__
 836         push    {r0, r1, lr}
 837         bl      SYM(__udivsi3)
 838         POP     {r1, r2, r3}
 839         mul     r2, r0
 840         sub     r1, r1, r2
 841         bx      r3
 842 #else
 843         stmfd   sp!, { r0, r1, lr }
 844         bl      SYM(__udivsi3)
 845         ldmfd   sp!, { r1, r2, lr }
 846         mul     r3, r2, r0
 847         sub     r1, r1, r3
 848         RET
 849 #endif
 850         FUNC_END aeabi_uidivmod
 851
 852 #endif /* L_udivsi3 */
 853 /* ------------------------------------------------------------------------ */
 854 #ifdef L_umodsi3
 855
 856         FUNC_START umodsi3
 857
 858 #ifdef __thumb__
 859
 860         cmp     divisor, #0
 861         beq     LSYM(Ldiv0)
 862         mov     curbit, #1
 863         cmp     dividend, divisor
 864         bhs     LSYM(Lover10)
 865         RET
 866
 867 LSYM(Lover10):
 868         push    { work }
 869
 870         THUMB_DIV_MOD_BODY 1
 871
 872         pop     { work }
 873         RET
 874
 875 #else  /* ARM version.  */
 876
 877         subs    r2, r1, #1                      @ compare divisor with 1
 878         bcc     LSYM(Ldiv0)
 879         cmpne   r0, r1                          @ compare dividend with divisor
 880         moveq   r0, #0
 881         tsthi   r1, r2                          @ see if divisor is power of 2
 882         andeq   r0, r0, r2
 883         RETc(ls)
 884
 885         ARM_MOD_BODY r0, r1, r2, r3
 886
 887         RET
 888
 889 #endif /* ARM version.  */
 890
 891         DIV_FUNC_END umodsi3
 892
 893 #endif /* L_umodsi3 */
 894 /* ------------------------------------------------------------------------ */
 895 #ifdef L_divsi3
 896
 897         FUNC_START divsi3
 898         FUNC_ALIAS aeabi_idiv divsi3
 899
 900 #ifdef __thumb__
 901         cmp     divisor, #0
 902         beq     LSYM(Ldiv0)
 903
 904         push    { work }
 905         mov     work, dividend
 906         eor     work, divisor           @ Save the sign of the result.
 907         mov     ip, work
 908         mov     curbit, #1
 909         mov     result, #0
 910         cmp     divisor, #0
 911         bpl     LSYM(Lover10)
 912         neg     divisor, divisor        @ Loops below use unsigned.
 913 LSYM(Lover10):
 914         cmp     dividend, #0
 915         bpl     LSYM(Lover11)
 916         neg     dividend, dividend
 917 LSYM(Lover11):
 918         cmp     dividend, divisor
 919         blo     LSYM(Lgot_result)
 920
 921         THUMB_DIV_MOD_BODY 0
 922
 923         mov     r0, result
 924         mov     work, ip
 925         cmp     work, #0
 926         bpl     LSYM(Lover12)
 927         neg     r0, r0
 928 LSYM(Lover12):
 929         pop     { work }
 930         RET
 931
 932 #else /* ARM version.  */
 933
 934         cmp     r1, #0
 935         eor     ip, r0, r1                      @ save the sign of the result.
 936         beq     LSYM(Ldiv0)
 937         rsbmi   r1, r1, #0                      @ loops below use unsigned.
 938         subs    r2, r1, #1                      @ division by 1 or -1 ?
 939         beq     10f
 940         movs    r3, r0
 941         rsbmi   r3, r0, #0                      @ positive dividend value
 942         cmp     r3, r1
 943         bls     11f
 944         tst     r1, r2                          @ divisor is power of 2 ?
 945         beq     12f
 946
 947         ARM_DIV_BODY r3, r1, r0, r2
 948
 949         cmp     ip, #0
 950         rsbmi   r0, r0, #0
 951         RET
 952
 953 10:     teq     ip, r0                          @ same sign ?
 954         rsbmi   r0, r0, #0
 955         RET
 956
 957 11:     movlo   r0, #0
 958         moveq   r0, ip, asr #31
 959         orreq   r0, r0, #1
 960         RET
 961
 962 12:     ARM_DIV2_ORDER r1, r2
 963
 964         cmp     ip, #0
 965         mov     r0, r3, lsr r2
 966         rsbmi   r0, r0, #0
 967         RET
 968
 969 #endif /* ARM version */
 970
 971         DIV_FUNC_END divsi3
 972
 973 FUNC_START aeabi_idivmod
 974 #ifdef __thumb__
 975         push    {r0, r1, lr}
 976         bl      SYM(__divsi3)
 977         POP     {r1, r2, r3}
 978         mul     r2, r0
 979         sub     r1, r1, r2
 980         bx      r3
 981 #else
 982         stmfd   sp!, { r0, r1, lr }
 983         bl      SYM(__divsi3)
 984         ldmfd   sp!, { r1, r2, lr }
 985         mul     r3, r2, r0
 986         sub     r1, r1, r3
 987         RET
 988 #endif
 989         FUNC_END aeabi_idivmod
 990
 991 #endif /* L_divsi3 */
 992 /* ------------------------------------------------------------------------ */
 993 #ifdef L_modsi3
 994
 995         FUNC_START modsi3
 996
 997 #ifdef __thumb__
 998
 999         mov     curbit, #1
1000         cmp     divisor, #0
1001         beq     LSYM(Ldiv0)
1002         bpl     LSYM(Lover10)
1003         neg     divisor, divisor                @ Loops below use unsigned.
1004 LSYM(Lover10):
1005         push    { work }
1006         @ Need to save the sign of the dividend, unfortunately, we need
1007         @ work later on.  Must do this after saving the original value of
1008         @ the work register, because we will pop this value off first.
1009         push    { dividend }
1010         cmp     dividend, #0
1011         bpl     LSYM(Lover11)
1012         neg     dividend, dividend
1013 LSYM(Lover11):
1014         cmp     dividend, divisor
1015         blo     LSYM(Lgot_result)
1016
1017         THUMB_DIV_MOD_BODY 1
1018
1019         pop     { work }
1020         cmp     work, #0
1021         bpl     LSYM(Lover12)
1022         neg     dividend, dividend
1023 LSYM(Lover12):
1024         pop     { work }
1025         RET
1026
1027 #else /* ARM version.  */
1028
1029         cmp     r1, #0
1030         beq     LSYM(Ldiv0)
1031         rsbmi   r1, r1, #0                      @ loops below use unsigned.
1032         movs    ip, r0                          @ preserve sign of dividend
1033         rsbmi   r0, r0, #0                      @ if negative make positive
1034         subs    r2, r1, #1                      @ compare divisor with 1
1035         cmpne   r0, r1                          @ compare dividend with divisor
1036         moveq   r0, #0
1037         tsthi   r1, r2                          @ see if divisor is power of 2
1038         andeq   r0, r0, r2
1039         bls     10f
1040
1041         ARM_MOD_BODY r0, r1, r2, r3
1042
1043 10:     cmp     ip, #0
1044         rsbmi   r0, r0, #0
1045         RET
1046
1047 #endif /* ARM version */
1048
1049         DIV_FUNC_END modsi3
1050
1051 #endif /* L_modsi3 */
1052 /* ------------------------------------------------------------------------ */
1053 #ifdef L_dvmd_tls
1054
1055         FUNC_START div0
1056         FUNC_ALIAS aeabi_idiv0 div0
1057         FUNC_ALIAS aeabi_ldiv0 div0
1058
1059         RET
1060
1061         FUNC_END aeabi_ldiv0
1062         FUNC_END aeabi_idiv0
1063         FUNC_END div0
1064
1065 #endif /* L_divmodsi_tools */
1066 /* ------------------------------------------------------------------------ */
1067 #ifdef L_dvmd_lnx
1068 @ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
1069
1070 /* Constant taken from <asm/signal.h>.  */
1071 #define SIGFPE  8
1072
1073         ARM_FUNC_START div0
1074
1075         do_push {r1, lr}
1076         mov     r0, #SIGFPE
1077         bl      SYM(raise) __PLT__
1078         RETLDM  r1
1079
1080         FUNC_END div0
1081
1082 #endif /* L_dvmd_lnx */
1083 /* ------------------------------------------------------------------------ */
1084 /* Dword shift operations.  */
1085 /* All the following Dword shift variants rely on the fact that
1086         shft xxx, Reg
1087    is in fact done as
1088         shft xxx, (Reg & 255)
1089    so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1090    case of logical shifts) or the sign (for asr).  */
1091
1092 #ifdef __ARMEB__
1093 #define al      r1
1094 #define ah      r0
1095 #else
1096 #define al      r0
1097 #define ah      r1
1098 #endif
1099
1100 /* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
1101 #ifndef __symbian__
1102
1103 #ifdef L_lshrdi3
1104
1105         FUNC_START lshrdi3
1106         FUNC_ALIAS aeabi_llsr lshrdi3
1107
1108 #ifdef __thumb__
1109         lsr     al, r2
1110         mov     r3, ah
1111         lsr     ah, r2
1112         mov     ip, r3
1113         sub     r2, #32
1114         lsr     r3, r2
1115         orr     al, r3
1116         neg     r2, r2
1117         mov     r3, ip
1118         lsl     r3, r2
1119         orr     al, r3
1120         RET
1121 #else
1122         subs    r3, r2, #32
1123         rsb     ip, r2, #32
1124         movmi   al, al, lsr r2
1125         movpl   al, ah, lsr r3
1126         orrmi   al, al, ah, lsl ip
1127         mov     ah, ah, lsr r2
1128         RET
1129 #endif
1130         FUNC_END aeabi_llsr
1131         FUNC_END lshrdi3
1132
1133 #endif
1134
1135 #ifdef L_ashrdi3
1136
1137         FUNC_START ashrdi3
1138         FUNC_ALIAS aeabi_lasr ashrdi3
1139
1140 #ifdef __thumb__
1141         lsr     al, r2
1142         mov     r3, ah
1143         asr     ah, r2
1144         sub     r2, #32
1145         @ If r2 is negative at this point the following step would OR
1146         @ the sign bit into all of AL.  That's not what we want...
1147         bmi     1f
1148         mov     ip, r3
1149         asr     r3, r2
1150         orr     al, r3
1151         mov     r3, ip
1152 1:
1153         neg     r2, r2
1154         lsl     r3, r2
1155         orr     al, r3
1156         RET
1157 #else
1158         subs    r3, r2, #32
1159         rsb     ip, r2, #32
1160         movmi   al, al, lsr r2
1161         movpl   al, ah, asr r3
1162         orrmi   al, al, ah, lsl ip
1163         mov     ah, ah, asr r2
1164         RET
1165 #endif
1166
1167         FUNC_END aeabi_lasr
1168         FUNC_END ashrdi3
1169
1170 #endif
1171
1172 #ifdef L_ashldi3
1173
1174         FUNC_START ashldi3
1175         FUNC_ALIAS aeabi_llsl ashldi3
1176
1177 #ifdef __thumb__
1178         lsl     ah, r2
1179         mov     r3, al
1180         lsl     al, r2
1181         mov     ip, r3
1182         sub     r2, #32
1183         lsl     r3, r2
1184         orr     ah, r3
1185         neg     r2, r2
1186         mov     r3, ip
1187         lsr     r3, r2
1188         orr     ah, r3
1189         RET
1190 #else
1191         subs    r3, r2, #32
1192         rsb     ip, r2, #32
1193         movmi   ah, ah, lsl r2
1194         movpl   ah, al, lsl r3
1195         orrmi   ah, ah, al, lsr ip
1196         mov     al, al, lsl r2
1197         RET
1198 #endif
1199         FUNC_END aeabi_llsl
1200         FUNC_END ashldi3
1201
1202 #endif
1203
1204 #endif /* __symbian__ */
1205
1206 /* ------------------------------------------------------------------------ */
1207 /* These next two sections are here despite the fact that they contain Thumb
1208    assembler because their presence allows interworked code to be linked even
1209    when the GCC library is this one.  */
1210
1211 /* Do not build the interworking functions when the target architecture does
1212    not support Thumb instructions.  (This can be a multilib option).  */
1213 #if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1214       || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1215       || __ARM_ARCH__ >= 6
1216
1217 #if defined L_call_via_rX
1218
1219 /* These labels & instructions are used by the Arm/Thumb interworking code.
1220    The address of function to be called is loaded into a register and then
1221    one of these labels is called via a BL instruction.  This puts the
1222    return address into the link register with the bottom bit set, and the
1223    code here switches to the correct mode before executing the function.  */
1224
1225         .text
1226         .align 0
1227         .force_thumb
1228
1229 .macro call_via register
1230         THUMB_FUNC_START _call_via_\register
1231
1232         bx      \register
1233         nop
1234
1235         SIZE    (_call_via_\register)
1236 .endm
1237
1238         call_via r0
1239         call_via r1
1240         call_via r2
1241         call_via r3
1242         call_via r4
1243         call_via r5
1244         call_via r6
1245         call_via r7
1246         call_via r8
1247         call_via r9
1248         call_via sl
1249         call_via fp
1250         call_via ip
1251         call_via sp
1252         call_via lr
1253
1254 #endif /* L_call_via_rX */
1255
1256 /* Don't bother with the old interworking routines for Thumb-2.  */
1257 /* ??? Maybe only omit these on v7m.  */
1258 #ifndef __thumb2__
1259
1260 #if defined L_interwork_call_via_rX
1261
1262 /* These labels & instructions are used by the Arm/Thumb interworking code,
1263    when the target address is in an unknown instruction set.  The address
1264    of function to be called is loaded into a register and then one of these
1265    labels is called via a BL instruction.  This puts the return address
1266    into the link register with the bottom bit set, and the code here
1267    switches to the correct mode before executing the function.  Unfortunately
1268    the target code cannot be relied upon to return via a BX instruction, so
1269    instead we have to store the resturn address on the stack and allow the
1270    called function to return here instead.  Upon return we recover the real
1271    return address and use a BX to get back to Thumb mode.
1272
1273    There are three variations of this code.  The first,
1274    _interwork_call_via_rN(), will push the return address onto the
1275    stack and pop it in _arm_return().  It should only be used if all
1276    arguments are passed in registers.
1277
1278    The second, _interwork_r7_call_via_rN(), instead stores the return
1279    address at [r7, #-4].  It is the caller's responsibility to ensure
1280    that this address is valid and contains no useful data.
1281
1282    The third, _interwork_r11_call_via_rN(), works in the same way but
1283    uses r11 instead of r7.  It is useful if the caller does not really
1284    need a frame pointer.  */
1285
1286         .text
1287         .align 0
1288
1289         .code   32
1290         .globl _arm_return
1291 LSYM(Lstart_arm_return):
1292         cfi_start       LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1293         cfi_push        0, 0xe, -0x8, 0x8
1294         nop     @ This nop is for the benefit of debuggers, so that
1295                 @ backtraces will use the correct unwind information.
1296 _arm_return:
1297         RETLDM  unwind=LSYM(Lstart_arm_return)
1298         cfi_end LSYM(Lend_arm_return)
1299
1300         .globl _arm_return_r7
1301 _arm_return_r7:
1302         ldr     lr, [r7, #-4]
1303         bx      lr
1304
1305         .globl _arm_return_r11
1306 _arm_return_r11:
1307         ldr     lr, [r11, #-4]
1308         bx      lr
1309
1310 .macro interwork_with_frame frame, register, name, return
1311         .code   16
1312
1313         THUMB_FUNC_START \name
1314
1315         bx      pc
1316         nop
1317
1318         .code   32
1319         tst     \register, #1
1320         streq   lr, [\frame, #-4]
1321         adreq   lr, _arm_return_\frame
1322         bx      \register
1323
1324         SIZE    (\name)
1325 .endm
1326
1327 .macro interwork register
1328         .code   16
1329
1330         THUMB_FUNC_START _interwork_call_via_\register
1331
1332         bx      pc
1333         nop
1334
1335         .code   32
1336         .globl LSYM(Lchange_\register)
1337 LSYM(Lchange_\register):
1338         tst     \register, #1
1339         streq   lr, [sp, #-8]!
1340         adreq   lr, _arm_return
1341         bx      \register
1342
1343         SIZE    (_interwork_call_via_\register)
1344
1345         interwork_with_frame r7,\register,_interwork_r7_call_via_\register
1346         interwork_with_frame r11,\register,_interwork_r11_call_via_\register
1347 .endm
1348
1349         interwork r0
1350         interwork r1
1351         interwork r2
1352         interwork r3
1353         interwork r4
1354         interwork r5
1355         interwork r6
1356         interwork r7
1357         interwork r8
1358         interwork r9
1359         interwork sl
1360         interwork fp
1361         interwork ip
1362         interwork sp
1363
1364         /* The LR case has to be handled a little differently...  */
1365         .code 16
1366
1367         THUMB_FUNC_START _interwork_call_via_lr
1368
1369         bx      pc
1370         nop
1371
1372         .code 32
1373         .globl .Lchange_lr
1374 .Lchange_lr:
1375         tst     lr, #1
1376         stmeqdb r13!, {lr, pc}
1377         mov     ip, lr
1378         adreq   lr, _arm_return
1379         bx      ip
1380
1381         SIZE    (_interwork_call_via_lr)
1382
1383 #endif /* L_interwork_call_via_rX */
1384 #endif /* !__thumb2__ */
1385 #endif /* Arch supports thumb.  */
1386
1387 #ifndef __symbian__
1388 #include "ieee754-df.S"
1389 #include "ieee754-sf.S"
1390 #include "bpabi.S"
1391 #endif /* __symbian__ */