libgcc/config/avr/lib1funcs.S

   1 /*  -*- Mode: Asm -*-  */
   2 /* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
   3    Free Software Foundation, Inc.
   4    Contributed by Denis Chertykov <chertykov@gmail.com>
   5
   6 This file is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 This file is distributed in the hope that it will be useful, but
  12 WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 General Public License for more details.
  15
  16 Under Section 7 of GPL version 3, you are granted additional
  17 permissions described in the GCC Runtime Library Exception, version
  18 3.1, as published by the Free Software Foundation.
  19
  20 You should have received a copy of the GNU General Public License and
  21 a copy of the GCC Runtime Library Exception along with this program;
  22 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23 <http://www.gnu.org/licenses/>.  */
  24
  25 #define __zero_reg__ r1
  26 #define __tmp_reg__ r0
  27 #define __SREG__ 0x3f
  28 #define __SP_H__ 0x3e
  29 #define __SP_L__ 0x3d
  30 #define __RAMPZ__ 0x3B
  31 #define __EIND__  0x3C
  32
  33 /* Most of the functions here are called directly from avr.md
  34    patterns, instead of using the standard libcall mechanisms.
  35    This can make better code because GCC knows exactly which
  36    of the call-used registers (not all of them) are clobbered.  */
  37
  38 /* FIXME:  At present, there is no SORT directive in the linker
  39            script so that we must not assume that different modules
  40            in the same input section like .libgcc.text.mul will be
  41            located close together.  Therefore, we cannot use
  42            RCALL/RJMP to call a function like __udivmodhi4 from
  43            __divmodhi4 and have to use lengthy XCALL/XJMP even
  44            though they are in the same input section and all same
  45            input sections together are small enough to reach every
  46            location with a RCALL/RJMP instruction.  */
  47
  48         .macro  mov_l  r_dest, r_src
  49 #if defined (__AVR_HAVE_MOVW__)
  50         movw    \r_dest, \r_src
  51 #else
  52         mov     \r_dest, \r_src
  53 #endif
  54         .endm
  55
  56         .macro  mov_h  r_dest, r_src
  57 #if defined (__AVR_HAVE_MOVW__)
  58         ; empty
  59 #else
  60         mov     \r_dest, \r_src
  61 #endif
  62         .endm
  63
  64 .macro  wmov  r_dest, r_src
  65 #if defined (__AVR_HAVE_MOVW__)
  66     movw \r_dest,   \r_src
  67 #else
  68     mov \r_dest,    \r_src
  69     mov \r_dest+1,  \r_src+1
  70 #endif
  71 .endm
  72
  73 #if defined (__AVR_HAVE_JMP_CALL__)
  74 #define XCALL call
  75 #define XJMP  jmp
  76 #else
  77 #define XCALL rcall
  78 #define XJMP  rjmp
  79 #endif
  80
  81 .macro DEFUN name
  82 .global \name
  83 .func \name
  84 \name:
  85 .endm
  86
  87 .macro ENDF name
  88 .size \name, .-\name
  89 .endfunc
  90 .endm
  91
  92 \f
  93 .section .text.libgcc.mul, "ax", @progbits
  94
  95 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  96 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
  97 #if !defined (__AVR_HAVE_MUL__)
  98 /*******************************************************
  99     Multiplication  8 x 8  without MUL
 100 *******************************************************/
 101 #if defined (L_mulqi3)
 102
 103 #define r_arg2  r22             /* multiplicand */
 104 #define r_arg1  r24             /* multiplier */
 105 #define r_res   __tmp_reg__     /* result */
 106
 107 DEFUN __mulqi3
 108         clr     r_res           ; clear result
 109 __mulqi3_loop:
 110         sbrc    r_arg1,0
 111         add     r_res,r_arg2
 112         add     r_arg2,r_arg2   ; shift multiplicand
 113         breq    __mulqi3_exit   ; while multiplicand != 0
 114         lsr     r_arg1          ;
 115         brne    __mulqi3_loop   ; exit if multiplier = 0
 116 __mulqi3_exit:
 117         mov     r_arg1,r_res    ; result to return register
 118         ret
 119 ENDF __mulqi3
 120
 121 #undef r_arg2
 122 #undef r_arg1
 123 #undef r_res
 124
 125 #endif  /* defined (L_mulqi3) */
 126
 127 #if defined (L_mulqihi3)
 128 DEFUN __mulqihi3
 129         clr     r25
 130         sbrc    r24, 7
 131         dec     r25
 132         clr     r23
 133         sbrc    r22, 7
 134         dec     r22
 135         XJMP    __mulhi3
 136 ENDF __mulqihi3:
 137 #endif /* defined (L_mulqihi3) */
 138
 139 #if defined (L_umulqihi3)
 140 DEFUN __umulqihi3
 141         clr     r25
 142         clr     r23
 143         XJMP    __mulhi3
 144 ENDF __umulqihi3
 145 #endif /* defined (L_umulqihi3) */
 146
 147 /*******************************************************
 148     Multiplication  16 x 16  without MUL
 149 *******************************************************/
 150 #if defined (L_mulhi3)
 151 #define r_arg1L r24             /* multiplier Low */
 152 #define r_arg1H r25             /* multiplier High */
 153 #define r_arg2L r22             /* multiplicand Low */
 154 #define r_arg2H r23             /* multiplicand High */
 155 #define r_resL  __tmp_reg__     /* result Low */
 156 #define r_resH  r21             /* result High */
 157
 158 DEFUN __mulhi3
 159         clr     r_resH          ; clear result
 160         clr     r_resL          ; clear result
 161 __mulhi3_loop:
 162         sbrs    r_arg1L,0
 163         rjmp    __mulhi3_skip1
 164         add     r_resL,r_arg2L  ; result + multiplicand
 165         adc     r_resH,r_arg2H
 166 __mulhi3_skip1:
 167         add     r_arg2L,r_arg2L ; shift multiplicand
 168         adc     r_arg2H,r_arg2H
 169
 170         cp      r_arg2L,__zero_reg__
 171         cpc     r_arg2H,__zero_reg__
 172         breq    __mulhi3_exit   ; while multiplicand != 0
 173
 174         lsr     r_arg1H         ; gets LSB of multiplier
 175         ror     r_arg1L
 176         sbiw    r_arg1L,0
 177         brne    __mulhi3_loop   ; exit if multiplier = 0
 178 __mulhi3_exit:
 179         mov     r_arg1H,r_resH  ; result to return register
 180         mov     r_arg1L,r_resL
 181         ret
 182 ENDF __mulhi3
 183
 184 #undef r_arg1L
 185 #undef r_arg1H
 186 #undef r_arg2L
 187 #undef r_arg2H
 188 #undef r_resL
 189 #undef r_resH
 190
 191 #endif /* defined (L_mulhi3) */
 192
 193 /*******************************************************
 194     Widening Multiplication  32 = 16 x 16  without MUL
 195 *******************************************************/
 196
 197 #if defined (L_mulhisi3)
 198 DEFUN __mulhisi3
 199 ;;; FIXME: This is dead code (noone calls it)
 200     mov_l   r18, r24
 201     mov_h   r19, r25
 202     clr     r24
 203     sbrc    r23, 7
 204     dec     r24
 205     mov     r25, r24
 206     clr     r20
 207     sbrc    r19, 7
 208     dec     r20
 209     mov     r21, r20
 210     XJMP    __mulsi3
 211 ENDF __mulhisi3
 212 #endif /* defined (L_mulhisi3) */
 213
 214 #if defined (L_umulhisi3)
 215 DEFUN __umulhisi3
 216 ;;; FIXME: This is dead code (noone calls it)
 217     mov_l   r18, r24
 218     mov_h   r19, r25
 219     clr     r24
 220     clr     r25
 221     mov_l   r20, r24
 222     mov_h   r21, r25
 223     XJMP    __mulsi3
 224 ENDF __umulhisi3
 225 #endif /* defined (L_umulhisi3) */
 226
 227 #if defined (L_mulsi3)
 228 /*******************************************************
 229     Multiplication  32 x 32  without MUL
 230 *******************************************************/
 231 #define r_arg1L  r22            /* multiplier Low */
 232 #define r_arg1H  r23
 233 #define r_arg1HL r24
 234 #define r_arg1HH r25            /* multiplier High */
 235
 236 #define r_arg2L  r18            /* multiplicand Low */
 237 #define r_arg2H  r19
 238 #define r_arg2HL r20
 239 #define r_arg2HH r21            /* multiplicand High */
 240
 241 #define r_resL   r26            /* result Low */
 242 #define r_resH   r27
 243 #define r_resHL  r30
 244 #define r_resHH  r31            /* result High */
 245
 246 DEFUN __mulsi3
 247         clr     r_resHH         ; clear result
 248         clr     r_resHL         ; clear result
 249         clr     r_resH          ; clear result
 250         clr     r_resL          ; clear result
 251 __mulsi3_loop:
 252         sbrs    r_arg1L,0
 253         rjmp    __mulsi3_skip1
 254         add     r_resL,r_arg2L          ; result + multiplicand
 255         adc     r_resH,r_arg2H
 256         adc     r_resHL,r_arg2HL
 257         adc     r_resHH,r_arg2HH
 258 __mulsi3_skip1:
 259         add     r_arg2L,r_arg2L         ; shift multiplicand
 260         adc     r_arg2H,r_arg2H
 261         adc     r_arg2HL,r_arg2HL
 262         adc     r_arg2HH,r_arg2HH
 263
 264         lsr     r_arg1HH        ; gets LSB of multiplier
 265         ror     r_arg1HL
 266         ror     r_arg1H
 267         ror     r_arg1L
 268         brne    __mulsi3_loop
 269         sbiw    r_arg1HL,0
 270         cpc     r_arg1H,r_arg1L
 271         brne    __mulsi3_loop           ; exit if multiplier = 0
 272 __mulsi3_exit:
 273         mov_h   r_arg1HH,r_resHH        ; result to return register
 274         mov_l   r_arg1HL,r_resHL
 275         mov_h   r_arg1H,r_resH
 276         mov_l   r_arg1L,r_resL
 277         ret
 278 ENDF __mulsi3
 279
 280 #undef r_arg1L
 281 #undef r_arg1H
 282 #undef r_arg1HL
 283 #undef r_arg1HH
 284
 285 #undef r_arg2L
 286 #undef r_arg2H
 287 #undef r_arg2HL
 288 #undef r_arg2HH
 289
 290 #undef r_resL
 291 #undef r_resH
 292 #undef r_resHL
 293 #undef r_resHH
 294
 295 #endif /* defined (L_mulsi3) */
 296
 297 #endif /* !defined (__AVR_HAVE_MUL__) */
 298 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 299 \f
 300 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 301 #if defined (__AVR_HAVE_MUL__)
 302 #define A0 26
 303 #define B0 18
 304 #define C0 22
 305
 306 #define A1 A0+1
 307
 308 #define B1 B0+1
 309 #define B2 B0+2
 310 #define B3 B0+3
 311
 312 #define C1 C0+1
 313 #define C2 C0+2
 314 #define C3 C0+3
 315
 316 /*******************************************************
 317     Widening Multiplication  32 = 16 x 16
 318 *******************************************************/
 319
 320 #if defined (L_mulhisi3)
 321 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
 322 ;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
 323 ;;; Clobbers: __tmp_reg__
 324 DEFUN __mulhisi3
 325     XCALL   __umulhisi3
 326     ;; Sign-extend B
 327     tst     B1
 328     brpl    1f
 329     sub     C2, A0
 330     sbc     C3, A1
 331 1:  ;; Sign-extend A
 332     XJMP __usmulhisi3_tail
 333 ENDF __mulhisi3
 334 #endif /* L_mulhisi3 */
 335
 336 #if defined (L_usmulhisi3)
 337 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
 338 ;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
 339 ;;; Clobbers: __tmp_reg__
 340 DEFUN __usmulhisi3
 341     XCALL   __umulhisi3
 342     ;; FALLTHRU
 343 ENDF __usmulhisi3
 344
 345 DEFUN __usmulhisi3_tail
 346     ;; Sign-extend A
 347     sbrs    A1, 7
 348     ret
 349     sub     C2, B0
 350     sbc     C3, B1
 351     ret
 352 ENDF __usmulhisi3_tail
 353 #endif /* L_usmulhisi3 */
 354
 355 #if defined (L_umulhisi3)
 356 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
 357 ;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
 358 ;;; Clobbers: __tmp_reg__
 359 DEFUN __umulhisi3
 360     mul     A0, B0
 361     movw    C0, r0
 362     mul     A1, B1
 363     movw    C2, r0
 364     mul     A0, B1
 365     rcall   1f
 366     mul     A1, B0
 367 1:  add     C1, r0
 368     adc     C2, r1
 369     clr     __zero_reg__
 370     adc     C3, __zero_reg__
 371     ret
 372 ENDF __umulhisi3
 373 #endif /* L_umulhisi3 */
 374
 375 /*******************************************************
 376     Widening Multiplication  32 = 16 x 32
 377 *******************************************************/
 378
 379 #if defined (L_mulshisi3)
 380 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
 381 ;;; (C3:C0) = (signed long) A1:A0   * B3:B0
 382 ;;; Clobbers: __tmp_reg__
 383 DEFUN __mulshisi3
 384 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
 385     ;; Some cores have problem skipping 2-word instruction
 386     tst     A1
 387     brmi    __mulohisi3
 388 #else
 389     sbrs    A1, 7
 390 #endif /* __AVR_HAVE_JMP_CALL__ */
 391     XJMP    __muluhisi3
 392     ;; FALLTHRU
 393 ENDF __mulshisi3
 394
 395 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
 396 ;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
 397 ;;; Clobbers: __tmp_reg__
 398 DEFUN __mulohisi3
 399     XCALL   __muluhisi3
 400     ;; One-extend R27:R26 (A1:A0)
 401     sub     C2, B0
 402     sbc     C3, B1
 403     ret
 404 ENDF __mulohisi3
 405 #endif /* L_mulshisi3 */
 406
 407 #if defined (L_muluhisi3)
 408 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
 409 ;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
 410 ;;; Clobbers: __tmp_reg__
 411 DEFUN __muluhisi3
 412     XCALL   __umulhisi3
 413     mul     A0, B3
 414     add     C3, r0
 415     mul     A1, B2
 416     add     C3, r0
 417     mul     A0, B2
 418     add     C2, r0
 419     adc     C3, r1
 420     clr     __zero_reg__
 421     ret
 422 ENDF __muluhisi3
 423 #endif /* L_muluhisi3 */
 424
 425 /*******************************************************
 426     Multiplication  32 x 32
 427 *******************************************************/
 428
 429 #if defined (L_mulsi3)
 430 ;;; R25:R22 = R25:R22 * R21:R18
 431 ;;; (C3:C0) = C3:C0   * B3:B0
 432 ;;; Clobbers: R26, R27, __tmp_reg__
 433 DEFUN __mulsi3
 434     movw    A0, C0
 435     push    C2
 436     push    C3
 437     XCALL   __muluhisi3
 438     pop     A1
 439     pop     A0
 440     ;; A1:A0 now contains the high word of A
 441     mul     A0, B0
 442     add     C2, r0
 443     adc     C3, r1
 444     mul     A0, B1
 445     add     C3, r0
 446     mul     A1, B0
 447     add     C3, r0
 448     clr     __zero_reg__
 449     ret
 450 ENDF __mulsi3
 451 #endif /* L_mulsi3 */
 452
 453 #undef A0
 454 #undef A1
 455
 456 #undef B0
 457 #undef B1
 458 #undef B2
 459 #undef B3
 460
 461 #undef C0
 462 #undef C1
 463 #undef C2
 464 #undef C3
 465
 466 #endif /* __AVR_HAVE_MUL__ */
 467 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 468
 469 \f
 470 .section .text.libgcc.div, "ax", @progbits
 471
 472 /*******************************************************
 473        Division 8 / 8 => (result + remainder)
 474 *******************************************************/
 475 #define r_rem   r25     /* remainder */
 476 #define r_arg1  r24     /* dividend, quotient */
 477 #define r_arg2  r22     /* divisor */
 478 #define r_cnt   r23     /* loop count */
 479
 480 #if defined (L_udivmodqi4)
 481 DEFUN __udivmodqi4
 482         sub     r_rem,r_rem     ; clear remainder and carry
 483         ldi     r_cnt,9         ; init loop counter
 484         rjmp    __udivmodqi4_ep ; jump to entry point
 485 __udivmodqi4_loop:
 486         rol     r_rem           ; shift dividend into remainder
 487         cp      r_rem,r_arg2    ; compare remainder & divisor
 488         brcs    __udivmodqi4_ep ; remainder <= divisor
 489         sub     r_rem,r_arg2    ; restore remainder
 490 __udivmodqi4_ep:
 491         rol     r_arg1          ; shift dividend (with CARRY)
 492         dec     r_cnt           ; decrement loop counter
 493         brne    __udivmodqi4_loop
 494         com     r_arg1          ; complement result
 495                                 ; because C flag was complemented in loop
 496         ret
 497 ENDF __udivmodqi4
 498 #endif /* defined (L_udivmodqi4) */
 499
 500 #if defined (L_divmodqi4)
 501 DEFUN __divmodqi4
 502         bst     r_arg1,7        ; store sign of dividend
 503         mov     __tmp_reg__,r_arg1
 504         eor     __tmp_reg__,r_arg2; r0.7 is sign of result
 505         sbrc    r_arg1,7
 506         neg     r_arg1          ; dividend negative : negate
 507         sbrc    r_arg2,7
 508         neg     r_arg2          ; divisor negative : negate
 509         XCALL   __udivmodqi4    ; do the unsigned div/mod
 510         brtc    __divmodqi4_1
 511         neg     r_rem           ; correct remainder sign
 512 __divmodqi4_1:
 513         sbrc    __tmp_reg__,7
 514         neg     r_arg1          ; correct result sign
 515 __divmodqi4_exit:
 516         ret
 517 ENDF __divmodqi4
 518 #endif /* defined (L_divmodqi4) */
 519
 520 #undef r_rem
 521 #undef r_arg1
 522 #undef r_arg2
 523 #undef r_cnt
 524
 525
 526 /*******************************************************
 527        Division 16 / 16 => (result + remainder)
 528 *******************************************************/
 529 #define r_remL  r26     /* remainder Low */
 530 #define r_remH  r27     /* remainder High */
 531
 532 /* return: remainder */
 533 #define r_arg1L r24     /* dividend Low */
 534 #define r_arg1H r25     /* dividend High */
 535
 536 /* return: quotient */
 537 #define r_arg2L r22     /* divisor Low */
 538 #define r_arg2H r23     /* divisor High */
 539
 540 #define r_cnt   r21     /* loop count */
 541
 542 #if defined (L_udivmodhi4)
 543 DEFUN __udivmodhi4
 544         sub     r_remL,r_remL
 545         sub     r_remH,r_remH   ; clear remainder and carry
 546         ldi     r_cnt,17        ; init loop counter
 547         rjmp    __udivmodhi4_ep ; jump to entry point
 548 __udivmodhi4_loop:
 549         rol     r_remL          ; shift dividend into remainder
 550         rol     r_remH
 551         cp      r_remL,r_arg2L  ; compare remainder & divisor
 552         cpc     r_remH,r_arg2H
 553         brcs    __udivmodhi4_ep ; remainder < divisor
 554         sub     r_remL,r_arg2L  ; restore remainder
 555         sbc     r_remH,r_arg2H
 556 __udivmodhi4_ep:
 557         rol     r_arg1L         ; shift dividend (with CARRY)
 558         rol     r_arg1H
 559         dec     r_cnt           ; decrement loop counter
 560         brne    __udivmodhi4_loop
 561         com     r_arg1L
 562         com     r_arg1H
 563 ; div/mod results to return registers, as for the div() function
 564         mov_l   r_arg2L, r_arg1L        ; quotient
 565         mov_h   r_arg2H, r_arg1H
 566         mov_l   r_arg1L, r_remL         ; remainder
 567         mov_h   r_arg1H, r_remH
 568         ret
 569 ENDF __udivmodhi4
 570 #endif /* defined (L_udivmodhi4) */
 571
 572 #if defined (L_divmodhi4)
 573 DEFUN __divmodhi4
 574     .global _div
 575 _div:
 576     bst     r_arg1H,7           ; store sign of dividend
 577     mov     __tmp_reg__,r_arg2H
 578     brtc    0f
 579     com     __tmp_reg__         ; r0.7 is sign of result
 580     rcall   __divmodhi4_neg1    ; dividend negative: negate
 581 0:
 582     sbrc    r_arg2H,7
 583     rcall   __divmodhi4_neg2    ; divisor negative: negate
 584     XCALL   __udivmodhi4        ; do the unsigned div/mod
 585     sbrc    __tmp_reg__,7
 586     rcall   __divmodhi4_neg2    ; correct remainder sign
 587     brtc    __divmodhi4_exit
 588 __divmodhi4_neg1:
 589     ;; correct dividend/remainder sign
 590     com     r_arg1H
 591     neg     r_arg1L
 592     sbci    r_arg1H,0xff
 593     ret
 594 __divmodhi4_neg2:
 595     ;; correct divisor/result sign
 596     com     r_arg2H
 597     neg     r_arg2L
 598     sbci    r_arg2H,0xff
 599 __divmodhi4_exit:
 600     ret
 601 ENDF __divmodhi4
 602 #endif /* defined (L_divmodhi4) */
 603
 604 #undef r_remH
 605 #undef r_remL
 606
 607 #undef r_arg1H
 608 #undef r_arg1L
 609
 610 #undef r_arg2H
 611 #undef r_arg2L
 612
 613 #undef r_cnt
 614
 615 /*******************************************************
 616        Division 24 / 24 => (result + remainder)
 617 *******************************************************/
 618
 619 ;; A[0..2]: In: Dividend; Out: Quotient
 620 #define A0  22
 621 #define A1  A0+1
 622 #define A2  A0+2
 623
 624 ;; B[0..2]: In: Divisor;   Out: Remainder
 625 #define B0  18
 626 #define B1  B0+1
 627 #define B2  B0+2
 628
 629 ;; C[0..2]: Expand remainder
 630 #define C0  __zero_reg__
 631 #define C1  26
 632 #define C2  25
 633
 634 ;; Loop counter
 635 #define r_cnt   21
 636
 637 #if defined (L_udivmodpsi4)
 638 ;; R24:R22 = R24:R22  udiv  R20:R18
 639 ;; R20:R18 = R24:R22  umod  R20:R18
 640 ;; Clobbers: R21, R25, R26
 641
 642 DEFUN __udivmodpsi4
 643     ; init loop counter
 644     ldi     r_cnt, 24+1
 645     ; Clear remainder and carry.  C0 is already 0
 646     clr     C1
 647     sub     C2, C2
 648     ; jump to entry point
 649     rjmp    __udivmodpsi4_start
 650 __udivmodpsi4_loop:
 651     ; shift dividend into remainder
 652     rol     C0
 653     rol     C1
 654     rol     C2
 655     ; compare remainder & divisor
 656     cp      C0, B0
 657     cpc     C1, B1
 658     cpc     C2, B2
 659     brcs    __udivmodpsi4_start ; remainder <= divisor
 660     sub     C0, B0              ; restore remainder
 661     sbc     C1, B1
 662     sbc     C2, B2
 663 __udivmodpsi4_start:
 664     ; shift dividend (with CARRY)
 665     rol     A0
 666     rol     A1
 667     rol     A2
 668     ; decrement loop counter
 669     dec     r_cnt
 670     brne    __udivmodpsi4_loop
 671     com     A0
 672     com     A1
 673     com     A2
 674     ; div/mod results to return registers
 675     ; remainder
 676     mov     B0, C0
 677     mov     B1, C1
 678     mov     B2, C2
 679     clr     __zero_reg__ ; C0
 680     ret
 681 ENDF __udivmodpsi4
 682 #endif /* defined (L_udivmodpsi4) */
 683
 684 #if defined (L_divmodpsi4)
 685 ;; R24:R22 = R24:R22  div  R20:R18
 686 ;; R20:R18 = R24:R22  mod  R20:R18
 687 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
 688
 689 DEFUN __divmodpsi4
 690     ; R0.7 will contain the sign of the result:
 691     ; R0.7 = A.sign ^ B.sign
 692     mov __tmp_reg__, B2
 693     ; T-flag = sign of dividend
 694     bst     A2, 7
 695     brtc    0f
 696     com     __tmp_reg__
 697     ; Adjust dividend's sign
 698     rcall   __divmodpsi4_negA
 699 0:
 700     ; Adjust divisor's sign
 701     sbrc    B2, 7
 702     rcall   __divmodpsi4_negB
 703
 704     ; Do the unsigned div/mod
 705     XCALL   __udivmodpsi4
 706
 707     ; Adjust quotient's sign
 708     sbrc    __tmp_reg__, 7
 709     rcall   __divmodpsi4_negA
 710
 711     ; Adjust remainder's sign
 712     brtc    __divmodpsi4_end
 713
 714 __divmodpsi4_negB:
 715     ; Correct divisor/remainder sign
 716     com     B2
 717     com     B1
 718     neg     B0
 719     sbci    B1, -1
 720     sbci    B2, -1
 721     ret
 722
 723     ; Correct dividend/quotient sign
 724 __divmodpsi4_negA:
 725     com     A2
 726     com     A1
 727     neg     A0
 728     sbci    A1, -1
 729     sbci    A2, -1
 730 __divmodpsi4_end:
 731     ret
 732
 733 ENDF __divmodpsi4
 734 #endif /* defined (L_divmodpsi4) */
 735
 736 #undef A0
 737 #undef A1
 738 #undef A2
 739
 740 #undef B0
 741 #undef B1
 742 #undef B2
 743
 744 #undef C0
 745 #undef C1
 746 #undef C2
 747
 748 #undef r_cnt
 749
 750 /*******************************************************
 751        Division 32 / 32 => (result + remainder)
 752 *******************************************************/
 753 #define r_remHH r31     /* remainder High */
 754 #define r_remHL r30
 755 #define r_remH  r27
 756 #define r_remL  r26     /* remainder Low */
 757
 758 /* return: remainder */
 759 #define r_arg1HH r25    /* dividend High */
 760 #define r_arg1HL r24
 761 #define r_arg1H  r23
 762 #define r_arg1L  r22    /* dividend Low */
 763
 764 /* return: quotient */
 765 #define r_arg2HH r21    /* divisor High */
 766 #define r_arg2HL r20
 767 #define r_arg2H  r19
 768 #define r_arg2L  r18    /* divisor Low */
 769
 770 #define r_cnt __zero_reg__  /* loop count (0 after the loop!) */
 771
 772 #if defined (L_udivmodsi4)
 773 DEFUN __udivmodsi4
 774         ldi     r_remL, 33      ; init loop counter
 775         mov     r_cnt, r_remL
 776         sub     r_remL,r_remL
 777         sub     r_remH,r_remH   ; clear remainder and carry
 778         mov_l   r_remHL, r_remL
 779         mov_h   r_remHH, r_remH
 780         rjmp    __udivmodsi4_ep ; jump to entry point
 781 __udivmodsi4_loop:
 782         rol     r_remL          ; shift dividend into remainder
 783         rol     r_remH
 784         rol     r_remHL
 785         rol     r_remHH
 786         cp      r_remL,r_arg2L  ; compare remainder & divisor
 787         cpc     r_remH,r_arg2H
 788         cpc     r_remHL,r_arg2HL
 789         cpc     r_remHH,r_arg2HH
 790         brcs    __udivmodsi4_ep ; remainder <= divisor
 791         sub     r_remL,r_arg2L  ; restore remainder
 792         sbc     r_remH,r_arg2H
 793         sbc     r_remHL,r_arg2HL
 794         sbc     r_remHH,r_arg2HH
 795 __udivmodsi4_ep:
 796         rol     r_arg1L         ; shift dividend (with CARRY)
 797         rol     r_arg1H
 798         rol     r_arg1HL
 799         rol     r_arg1HH
 800         dec     r_cnt           ; decrement loop counter
 801         brne    __udivmodsi4_loop
 802                                 ; __zero_reg__ now restored (r_cnt == 0)
 803         com     r_arg1L
 804         com     r_arg1H
 805         com     r_arg1HL
 806         com     r_arg1HH
 807 ; div/mod results to return registers, as for the ldiv() function
 808         mov_l   r_arg2L,  r_arg1L       ; quotient
 809         mov_h   r_arg2H,  r_arg1H
 810         mov_l   r_arg2HL, r_arg1HL
 811         mov_h   r_arg2HH, r_arg1HH
 812         mov_l   r_arg1L,  r_remL        ; remainder
 813         mov_h   r_arg1H,  r_remH
 814         mov_l   r_arg1HL, r_remHL
 815         mov_h   r_arg1HH, r_remHH
 816         ret
 817 ENDF __udivmodsi4
 818 #endif /* defined (L_udivmodsi4) */
 819
 820 #if defined (L_divmodsi4)
 821 DEFUN __divmodsi4
 822     mov     __tmp_reg__,r_arg2HH
 823     bst     r_arg1HH,7          ; store sign of dividend
 824     brtc    0f
 825     com     __tmp_reg__         ; r0.7 is sign of result
 826     rcall   __divmodsi4_neg1    ; dividend negative: negate
 827 0:
 828     sbrc    r_arg2HH,7
 829     rcall   __divmodsi4_neg2    ; divisor negative: negate
 830     XCALL   __udivmodsi4        ; do the unsigned div/mod
 831     sbrc    __tmp_reg__, 7      ; correct quotient sign
 832     rcall   __divmodsi4_neg2
 833     brtc    __divmodsi4_exit    ; correct remainder sign
 834 __divmodsi4_neg1:
 835     ;; correct dividend/remainder sign
 836     com     r_arg1HH
 837     com     r_arg1HL
 838     com     r_arg1H
 839     neg     r_arg1L
 840     sbci    r_arg1H, 0xff
 841     sbci    r_arg1HL,0xff
 842     sbci    r_arg1HH,0xff
 843     ret
 844 __divmodsi4_neg2:
 845     ;; correct divisor/quotient sign
 846     com     r_arg2HH
 847     com     r_arg2HL
 848     com     r_arg2H
 849     neg     r_arg2L
 850     sbci    r_arg2H,0xff
 851     sbci    r_arg2HL,0xff
 852     sbci    r_arg2HH,0xff
 853 __divmodsi4_exit:
 854     ret
 855 ENDF __divmodsi4
 856 #endif /* defined (L_divmodsi4) */
 857
 858
 859 /*******************************************************
 860        Division 64 / 64
 861        Modulo   64 % 64
 862 *******************************************************/
 863
 864 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
 865 ;; at least 16k of Program Memory.  For smaller Devices, depend
 866 ;; on MOVW.
 867
 868 #if defined (__AVR_HAVE_JMP_CALL__)
 869 #   define SPEED_DIV 8
 870 #elif defined (__AVR_HAVE_MOVW__)
 871 #   define SPEED_DIV 16
 872 #else
 873 #   define SPEED_DIV 0
 874 #endif
 875
 876 ;; A[0..7]: In: Dividend;
 877 ;; Out: Quotient  (T = 0)
 878 ;; Out: Remainder (T = 1)
 879 #define A0  18
 880 #define A1  A0+1
 881 #define A2  A0+2
 882 #define A3  A0+3
 883 #define A4  A0+4
 884 #define A5  A0+5
 885 #define A6  A0+6
 886 #define A7  A0+7
 887
 888 ;; B[0..7]: In: Divisor;   Out: Clobber
 889 #define B0  10
 890 #define B1  B0+1
 891 #define B2  B0+2
 892 #define B3  B0+3
 893 #define B4  B0+4
 894 #define B5  B0+5
 895 #define B6  B0+6
 896 #define B7  B0+7
 897
 898 ;; C[0..7]: Expand remainder;  Out: Remainder (unused)
 899 #define C0  8
 900 #define C1  C0+1
 901 #define C2  30
 902 #define C3  C2+1
 903 #define C4  28
 904 #define C5  C4+1
 905 #define C6  26
 906 #define C7  C6+1
 907
 908 ;; Holds Signs during Division Routine
 909 #define SS      __tmp_reg__
 910
 911 ;; Bit-Counter in Division Routine
 912 #define R_cnt   __zero_reg__
 913
 914 ;; Scratch Register for Negation
 915 #define NN      r31
 916
 917 #if defined (L_udivdi3)
 918
 919 ;; R25:R18 = R24:R18  umod  R17:R10
 920 ;; Ordinary ABI-Function
 921
 922 DEFUN __umoddi3
 923     set
 924     rjmp __udivdi3_umoddi3
 925 ENDF __umoddi3
 926
 927 ;; R25:R18 = R24:R18  udiv  R17:R10
 928 ;; Ordinary ABI-Function
 929
 930 DEFUN __udivdi3
 931     clt
 932 ENDF __udivdi3
 933
 934 DEFUN __udivdi3_umoddi3
 935     push    C0
 936     push    C1
 937     push    C4
 938     push    C5
 939     XCALL   __udivmod64
 940     pop     C5
 941     pop     C4
 942     pop     C1
 943     pop     C0
 944     ret
 945 ENDF __udivdi3_umoddi3
 946 #endif /* L_udivdi3 */
 947
 948 #if defined (L_udivmod64)
 949
 950 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
 951 ;; No Registers saved/restored; the Callers will take Care.
 952 ;; Preserves B[] and T-flag
 953 ;; T = 0: Compute Quotient  in A[]
 954 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
 955
 956 DEFUN __udivmod64
 957
 958     ;; Clear Remainder (C6, C7 will follow)
 959     clr     C0
 960     clr     C1
 961     wmov    C2, C0
 962     wmov    C4, C0
 963     ldi     C7, 64
 964
 965 #if SPEED_DIV == 0 || SPEED_DIV == 16
 966     ;; Initialize Loop-Counter
 967     mov     R_cnt, C7
 968     wmov    C6, C0
 969 #endif /* SPEED_DIV */
 970
 971 #if SPEED_DIV == 8
 972
 973     push    A7
 974     clr     C6
 975
 976 1:  ;; Compare shifted Devidend against Divisor
 977     ;; If -- even after Shifting -- it is smaller...
 978     CP  A7,B0  $  cpc C0,B1  $  cpc C1,B2  $  cpc C2,B3
 979     cpc C3,B4  $  cpc C4,B5  $  cpc C5,B6  $  cpc C6,B7
 980     brcc    2f
 981
 982     ;; ...then we can subtract it.  Thus, it is legal to shift left
 983                $  mov C6,C5  $  mov C5,C4  $  mov C4,C3
 984     mov C3,C2  $  mov C2,C1  $  mov C1,C0  $  mov C0,A7
 985     mov A7,A6  $  mov A6,A5  $  mov A5,A4  $  mov A4,A3
 986     mov A3,A2  $  mov A2,A1  $  mov A1,A0  $  clr A0
 987
 988     ;; 8 Bits are done
 989     subi    C7, 8
 990     brne    1b
 991
 992     ;; Shifted 64 Bits:  A7 has traveled to C7
 993     pop     C7
 994     ;; Divisor is greater than Dividend. We have:
 995     ;; A[] % B[] = A[]
 996     ;; A[] / B[] = 0
 997     ;; Thus, we can return immediately
 998     rjmp    5f
 999
1000 2:  ;; Initialze Bit-Counter with Number of Bits still to be performed
1001     mov     R_cnt, C7
1002
1003     ;; Push of A7 is not needed because C7 is still 0
1004     pop     C7
1005     clr     C7
1006
1007 #elif  SPEED_DIV == 16
1008
1009     ;; Compare shifted Dividend against Divisor
1010     cp      A7, B3
1011     cpc     C0, B4
1012     cpc     C1, B5
1013     cpc     C2, B6
1014     cpc     C3, B7
1015     brcc    2f
1016
1017     ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1018     ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1019     wmov  C2,A6  $  wmov C0,A4
1020     wmov  A6,A2  $  wmov A4,A0
1021     wmov  A2,C6  $  wmov A0,C4
1022
1023     ;; Set Bit Counter to 32
1024     lsr     R_cnt
1025 2:
1026 #elif SPEED_DIV
1027 #error SPEED_DIV = ?
1028 #endif /* SPEED_DIV */
1029
1030 ;; The very Division + Remainder Routine
1031
1032 3:  ;; Left-shift Dividend...
1033     lsl A0     $  rol A1     $  rol A2     $  rol A3
1034     rol A4     $  rol A5     $  rol A6     $  rol A7
1035
1036     ;; ...into Remainder
1037     rol C0     $  rol C1     $  rol C2     $  rol C3
1038     rol C4     $  rol C5     $  rol C6     $  rol C7
1039
1040     ;; Compare Remainder and Divisor
1041     CP  C0,B0  $  cpc C1,B1  $  cpc C2,B2  $  cpc C3,B3
1042     cpc C4,B4  $  cpc C5,B5  $  cpc C6,B6  $  cpc C7,B7
1043
1044     brcs 4f
1045
1046     ;; Divisor fits into Remainder:  Subtract it from Remainder...
1047     SUB C0,B0  $  sbc C1,B1  $  sbc C2,B2  $  sbc C3,B3
1048     sbc C4,B4  $  sbc C5,B5  $  sbc C6,B6  $  sbc C7,B7
1049
1050     ;; ...and set according Bit in the upcoming Quotient
1051     ;; The Bit will travel to its final Position
1052     ori A0, 1
1053
1054 4:  ;; This Bit is done
1055     dec     R_cnt
1056     brne    3b
1057     ;; __zero_reg__ is 0 again
1058
1059     ;; T = 0: We are fine with the Quotient in A[]
1060     ;; T = 1: Copy Remainder to A[]
1061 5:  brtc    6f
1062     wmov    A0, C0
1063     wmov    A2, C2
1064     wmov    A4, C4
1065     wmov    A6, C6
1066     ;; Move the Sign of the Result to SS.7
1067     lsl     SS
1068
1069 6:  ret
1070
1071 ENDF __udivmod64
1072 #endif /* L_udivmod64 */
1073
1074
1075 #if defined (L_divdi3)
1076
1077 ;; R25:R18 = R24:R18  mod  R17:R10
1078 ;; Ordinary ABI-Function
1079
1080 DEFUN __moddi3
1081     set
1082     rjmp    __divdi3_moddi3
1083 ENDF __moddi3
1084
1085 ;; R25:R18 = R24:R18  div  R17:R10
1086 ;; Ordinary ABI-Function
1087
1088 DEFUN __divdi3
1089     clt
1090 ENDF __divdi3
1091
1092 DEFUN  __divdi3_moddi3
1093 #if SPEED_DIV
1094     mov     r31, A7
1095     or      r31, B7
1096     brmi    0f
1097     ;; Both Signs are 0:  the following Complexitiy is not needed
1098     XJMP    __udivdi3_umoddi3
1099 #endif /* SPEED_DIV */
1100
1101 0:  ;; The Prologue
1102     ;; Save Z = 12 Registers:  Y, 17...8
1103     ;; No Frame needed (X = 0)
1104     clr r26
1105     clr r27
1106     ldi r30, lo8(gs(1f))
1107     ldi r31, hi8(gs(1f))
1108     XJMP __prologue_saves__ + ((18 - 12) * 2)
1109
1110 1:  ;; SS.7 will contain the Sign of the Quotient  (A.sign * B.sign)
1111     ;; SS.6 will contain the Sign of the Remainder (A.sign)
1112     mov     SS, A7
1113     asr     SS
1114     ;; Adjust Dividend's Sign as needed
1115 #if SPEED_DIV
1116     ;; Compiling for Speed we know that at least one Sign must be < 0
1117     ;; Thus, if A[] >= 0 then we know B[] < 0
1118     brpl    22f
1119 #else
1120     brpl    21f
1121 #endif /* SPEED_DIV */
1122
1123     XCALL   __negdi2
1124
1125     ;; Adjust Divisor's Sign and SS.7 as needed
1126 21: tst     B7
1127     brpl    3f
1128 22: ldi     NN, 1 << 7
1129     eor     SS, NN
1130
1131     ldi NN, -1
1132     com B4     $  com B5     $  com B6     $  com B7
1133                $  com B1     $  com B2     $  com B3
1134     NEG B0
1135                $  sbc B1,NN  $  sbc B2,NN  $  sbc B3,NN
1136     sbc B4,NN  $  sbc B5,NN  $  sbc B6,NN  $  sbc B7,NN
1137
1138 3:  ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
1139     XCALL   __udivmod64
1140
1141     ;; Adjust Result's Sign
1142 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1143     tst     SS
1144     brpl    4f
1145 #else
1146     sbrc    SS, 7
1147 #endif /* __AVR_HAVE_JMP_CALL__ */
1148     XCALL   __negdi2
1149
1150 4:  ;; Epilogue: Restore the Z = 12 Registers and return
1151     in r28, __SP_L__
1152 #if defined (__AVR_HAVE_8BIT_SP__)
1153 ;; FIXME: __AVR_HAVE_8BIT_SP__ is set on device level, not on core level
1154 ;;        so this lines are dead code.  To make it work, devices without
1155 ;;        SP_H must get their own multilib(s).
1156     clr r29
1157 #else
1158     in r29, __SP_H__
1159 #endif /* #SP = 8/16 */
1160     ldi r30, 12
1161     XJMP __epilogue_restores__ + ((18 - 12) * 2)
1162
1163 ENDF __divdi3_moddi3
1164
1165 #undef R_cnt
1166 #undef SS
1167 #undef NN
1168
1169 #endif /* L_divdi3 */
1170
1171 #if defined (L_negdi2)
1172 DEFUN __negdi2
1173
1174     com  A4    $  com  A5    $  com  A6    $  com  A7
1175                $  com  A1    $  com  A2    $  com  A3
1176     NEG  A0
1177                $  sbci A1,-1 $  sbci A2,-1 $  sbci A3,-1
1178     sbci A4,-1 $  sbci A5,-1 $  sbci A6,-1 $  sbci A7,-1
1179     ret
1180
1181 ENDF __negdi2
1182 #endif /* L_negdi2 */
1183
1184 #undef C7
1185 #undef C6
1186 #undef C5
1187 #undef C4
1188 #undef C3
1189 #undef C2
1190 #undef C1
1191 #undef C0
1192
1193 #undef B7
1194 #undef B6
1195 #undef B5
1196 #undef B4
1197 #undef B3
1198 #undef B2
1199 #undef B1
1200 #undef B0
1201
1202 #undef A7
1203 #undef A6
1204 #undef A5
1205 #undef A4
1206 #undef A3
1207 #undef A2
1208 #undef A1
1209 #undef A0
1210
1211 \f
1212 .section .text.libgcc.prologue, "ax", @progbits
1213
1214 /**********************************
1215  * This is a prologue subroutine
1216  **********************************/
1217 #if defined (L_prologue)
1218
1219 ;; This function does not clobber T-flag; 64-bit division relies on it
1220 DEFUN __prologue_saves__
1221         push r2
1222         push r3
1223         push r4
1224         push r5
1225         push r6
1226         push r7
1227         push r8
1228         push r9
1229         push r10
1230         push r11
1231         push r12
1232         push r13
1233         push r14
1234         push r15
1235         push r16
1236         push r17
1237         push r28
1238         push r29
1239 #if defined (__AVR_HAVE_8BIT_SP__)
1240 ;; FIXME: __AVR_HAVE_8BIT_SP__ is set on device level, not on core level
1241 ;;        so this lines are dead code.  To make it work, devices without
1242 ;;        SP_H must get their own multilib(s).
1243         in      r28,__SP_L__
1244         sub     r28,r26
1245         out     __SP_L__,r28
1246         clr     r29
1247 #else
1248         in      r28,__SP_L__
1249         in      r29,__SP_H__
1250         sub     r28,r26
1251         sbc     r29,r27
1252         in      __tmp_reg__,__SREG__
1253         cli
1254         out     __SP_H__,r29
1255         out     __SREG__,__tmp_reg__
1256         out     __SP_L__,r28
1257 #endif /* #SP = 8/16 */
1258
1259 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1260         eijmp
1261 #else
1262         ijmp
1263 #endif
1264
1265 ENDF __prologue_saves__
1266 #endif /* defined (L_prologue) */
1267
1268 /*
1269  * This is an epilogue subroutine
1270  */
1271 #if defined (L_epilogue)
1272
1273 DEFUN __epilogue_restores__
1274         ldd     r2,Y+18
1275         ldd     r3,Y+17
1276         ldd     r4,Y+16
1277         ldd     r5,Y+15
1278         ldd     r6,Y+14
1279         ldd     r7,Y+13
1280         ldd     r8,Y+12
1281         ldd     r9,Y+11
1282         ldd     r10,Y+10
1283         ldd     r11,Y+9
1284         ldd     r12,Y+8
1285         ldd     r13,Y+7
1286         ldd     r14,Y+6
1287         ldd     r15,Y+5
1288         ldd     r16,Y+4
1289         ldd     r17,Y+3
1290         ldd     r26,Y+2
1291 #if defined (__AVR_HAVE_8BIT_SP__)
1292 ;; FIXME: __AVR_HAVE_8BIT_SP__ is set on device level, not on core level
1293 ;;        so this lines are dead code.  To make it work, devices without
1294 ;;        SP_H must get their own multilib(s).
1295         ldd     r29,Y+1
1296         add     r28,r30
1297         out     __SP_L__,r28
1298         mov     r28, r26
1299 #else
1300         ldd     r27,Y+1
1301         add     r28,r30
1302         adc     r29,__zero_reg__
1303         in      __tmp_reg__,__SREG__
1304         cli
1305         out     __SP_H__,r29
1306         out     __SREG__,__tmp_reg__
1307         out     __SP_L__,r28
1308         mov_l   r28, r26
1309         mov_h   r29, r27
1310 #endif /* #SP = 8/16 */
1311         ret
1312 ENDF __epilogue_restores__
1313 #endif /* defined (L_epilogue) */
1314
1315 #ifdef L_exit
1316         .section .fini9,"ax",@progbits
1317 DEFUN _exit
1318         .weak   exit
1319 exit:
1320 ENDF _exit
1321
1322         /* Code from .fini8 ... .fini1 sections inserted by ld script.  */
1323
1324         .section .fini0,"ax",@progbits
1325         cli
1326 __stop_program:
1327         rjmp    __stop_program
1328 #endif /* defined (L_exit) */
1329
1330 #ifdef L_cleanup
1331         .weak   _cleanup
1332         .func   _cleanup
1333 _cleanup:
1334         ret
1335 .endfunc
1336 #endif /* defined (L_cleanup) */
1337
1338 \f
1339 .section .text.libgcc, "ax", @progbits
1340
1341 #ifdef L_tablejump
1342 DEFUN __tablejump2__
1343         lsl     r30
1344         rol     r31
1345     ;; FALLTHRU
1346 ENDF __tablejump2__
1347
1348 DEFUN __tablejump__
1349 #if defined (__AVR_HAVE_LPMX__)
1350         lpm __tmp_reg__, Z+
1351         lpm r31, Z
1352         mov r30, __tmp_reg__
1353 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1354         eijmp
1355 #else
1356         ijmp
1357 #endif
1358
1359 #else /* !HAVE_LPMX */
1360         lpm
1361         adiw r30, 1
1362         push r0
1363         lpm
1364         push r0
1365 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1366         in   __tmp_reg__, __EIND__
1367         push __tmp_reg__
1368 #endif
1369         ret
1370 #endif /* !HAVE_LPMX */
1371 ENDF __tablejump__
1372 #endif /* defined (L_tablejump) */
1373
1374 #ifdef L_copy_data
1375         .section .init4,"ax",@progbits
1376 DEFUN __do_copy_data
1377 #if defined(__AVR_HAVE_ELPMX__)
1378         ldi     r17, hi8(__data_end)
1379         ldi     r26, lo8(__data_start)
1380         ldi     r27, hi8(__data_start)
1381         ldi     r30, lo8(__data_load_start)
1382         ldi     r31, hi8(__data_load_start)
1383         ldi     r16, hh8(__data_load_start)
1384         out     __RAMPZ__, r16
1385         rjmp    .L__do_copy_data_start
1386 .L__do_copy_data_loop:
1387         elpm    r0, Z+
1388         st      X+, r0
1389 .L__do_copy_data_start:
1390         cpi     r26, lo8(__data_end)
1391         cpc     r27, r17
1392         brne    .L__do_copy_data_loop
1393 #elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
1394         ldi     r17, hi8(__data_end)
1395         ldi     r26, lo8(__data_start)
1396         ldi     r27, hi8(__data_start)
1397         ldi     r30, lo8(__data_load_start)
1398         ldi     r31, hi8(__data_load_start)
1399         ldi     r16, hh8(__data_load_start - 0x10000)
1400 .L__do_copy_data_carry:
1401         inc     r16
1402         out     __RAMPZ__, r16
1403         rjmp    .L__do_copy_data_start
1404 .L__do_copy_data_loop:
1405         elpm
1406         st      X+, r0
1407         adiw    r30, 1
1408         brcs    .L__do_copy_data_carry
1409 .L__do_copy_data_start:
1410         cpi     r26, lo8(__data_end)
1411         cpc     r27, r17
1412         brne    .L__do_copy_data_loop
1413 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
1414         ldi     r17, hi8(__data_end)
1415         ldi     r26, lo8(__data_start)
1416         ldi     r27, hi8(__data_start)
1417         ldi     r30, lo8(__data_load_start)
1418         ldi     r31, hi8(__data_load_start)
1419         rjmp    .L__do_copy_data_start
1420 .L__do_copy_data_loop:
1421 #if defined (__AVR_HAVE_LPMX__)
1422         lpm     r0, Z+
1423 #else
1424         lpm
1425         adiw    r30, 1
1426 #endif
1427         st      X+, r0
1428 .L__do_copy_data_start:
1429         cpi     r26, lo8(__data_end)
1430         cpc     r27, r17
1431         brne    .L__do_copy_data_loop
1432 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
1433 ENDF __do_copy_data
1434 #endif /* L_copy_data */
1435
1436 /* __do_clear_bss is only necessary if there is anything in .bss section.  */
1437
1438 #ifdef L_clear_bss
1439         .section .init4,"ax",@progbits
1440 DEFUN __do_clear_bss
1441         ldi     r17, hi8(__bss_end)
1442         ldi     r26, lo8(__bss_start)
1443         ldi     r27, hi8(__bss_start)
1444         rjmp    .do_clear_bss_start
1445 .do_clear_bss_loop:
1446         st      X+, __zero_reg__
1447 .do_clear_bss_start:
1448         cpi     r26, lo8(__bss_end)
1449         cpc     r27, r17
1450         brne    .do_clear_bss_loop
1451 ENDF __do_clear_bss
1452 #endif /* L_clear_bss */
1453
1454 /* __do_global_ctors and __do_global_dtors are only necessary
1455    if there are any constructors/destructors.  */
1456
1457 #ifdef L_ctors
1458         .section .init6,"ax",@progbits
1459 DEFUN __do_global_ctors
1460 #if defined(__AVR_HAVE_RAMPZ__)
1461         ldi     r17, hi8(__ctors_start)
1462         ldi     r28, lo8(__ctors_end)
1463         ldi     r29, hi8(__ctors_end)
1464         ldi     r16, hh8(__ctors_end)
1465         rjmp    .L__do_global_ctors_start
1466 .L__do_global_ctors_loop:
1467         sbiw    r28, 2
1468         sbc     r16, __zero_reg__
1469         mov_h   r31, r29
1470         mov_l   r30, r28
1471         out     __RAMPZ__, r16
1472         XCALL   __tablejump_elpm__
1473 .L__do_global_ctors_start:
1474         cpi     r28, lo8(__ctors_start)
1475         cpc     r29, r17
1476         ldi     r24, hh8(__ctors_start)
1477         cpc     r16, r24
1478         brne    .L__do_global_ctors_loop
1479 #else
1480         ldi     r17, hi8(__ctors_start)
1481         ldi     r28, lo8(__ctors_end)
1482         ldi     r29, hi8(__ctors_end)
1483         rjmp    .L__do_global_ctors_start
1484 .L__do_global_ctors_loop:
1485         sbiw    r28, 2
1486         mov_h   r31, r29
1487         mov_l   r30, r28
1488         XCALL   __tablejump__
1489 .L__do_global_ctors_start:
1490         cpi     r28, lo8(__ctors_start)
1491         cpc     r29, r17
1492         brne    .L__do_global_ctors_loop
1493 #endif /* defined(__AVR_HAVE_RAMPZ__) */
1494 ENDF __do_global_ctors
1495 #endif /* L_ctors */
1496
1497 #ifdef L_dtors
1498         .section .fini6,"ax",@progbits
1499 DEFUN __do_global_dtors
1500 #if defined(__AVR_HAVE_RAMPZ__)
1501         ldi     r17, hi8(__dtors_end)
1502         ldi     r28, lo8(__dtors_start)
1503         ldi     r29, hi8(__dtors_start)
1504         ldi     r16, hh8(__dtors_start)
1505         rjmp    .L__do_global_dtors_start
1506 .L__do_global_dtors_loop:
1507         sbiw    r28, 2
1508         sbc     r16, __zero_reg__
1509         mov_h   r31, r29
1510         mov_l   r30, r28
1511         out     __RAMPZ__, r16
1512         XCALL   __tablejump_elpm__
1513 .L__do_global_dtors_start:
1514         cpi     r28, lo8(__dtors_end)
1515         cpc     r29, r17
1516         ldi     r24, hh8(__dtors_end)
1517         cpc     r16, r24
1518         brne    .L__do_global_dtors_loop
1519 #else
1520         ldi     r17, hi8(__dtors_end)
1521         ldi     r28, lo8(__dtors_start)
1522         ldi     r29, hi8(__dtors_start)
1523         rjmp    .L__do_global_dtors_start
1524 .L__do_global_dtors_loop:
1525         mov_h   r31, r29
1526         mov_l   r30, r28
1527         XCALL   __tablejump__
1528         adiw    r28, 2
1529 .L__do_global_dtors_start:
1530         cpi     r28, lo8(__dtors_end)
1531         cpc     r29, r17
1532         brne    .L__do_global_dtors_loop
1533 #endif /* defined(__AVR_HAVE_RAMPZ__) */
1534 ENDF __do_global_dtors
1535 #endif /* L_dtors */
1536
1537 .section .text.libgcc, "ax", @progbits
1538
1539 #ifdef L_tablejump_elpm
1540 DEFUN __tablejump_elpm__
1541 #if defined (__AVR_HAVE_ELPM__)
1542 #if defined (__AVR_HAVE_LPMX__)
1543         elpm    __tmp_reg__, Z+
1544         elpm    r31, Z
1545         mov     r30, __tmp_reg__
1546 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1547         eijmp
1548 #else
1549         ijmp
1550 #endif
1551
1552 #else
1553         elpm
1554         adiw    r30, 1
1555         push    r0
1556         elpm
1557         push    r0
1558 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1559         in      __tmp_reg__, __EIND__
1560         push    __tmp_reg__
1561 #endif
1562         ret
1563 #endif
1564 #endif /* defined (__AVR_HAVE_ELPM__) */
1565 ENDF __tablejump_elpm__
1566 #endif /* defined (L_tablejump_elpm) */
1567
1568 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1569 ;; Loading n bytes from Flash; n = 3,4
1570 ;; R22... = Flash[Z]
1571 ;; Clobbers: __tmp_reg__
1572
1573 #if (defined (L_load_3)        \
1574      || defined (L_load_4))    \
1575     && !defined (__AVR_HAVE_LPMX__)
1576
1577 ;; Destination
1578 #define D0  22
1579 #define D1  D0+1
1580 #define D2  D0+2
1581 #define D3  D0+3
1582
1583 .macro  .load dest, n
1584     lpm
1585     mov     \dest, r0
1586 .if \dest != D0+\n-1
1587     adiw    r30, 1
1588 .else
1589     sbiw    r30, \n-1
1590 .endif
1591 .endm
1592
1593 #if defined (L_load_3)
1594 DEFUN __load_3
1595     push  D3
1596     XCALL __load_4
1597     pop   D3
1598     ret
1599 ENDF __load_3
1600 #endif /* L_load_3 */
1601
1602 #if defined (L_load_4)
1603 DEFUN __load_4
1604     .load D0, 4
1605     .load D1, 4
1606     .load D2, 4
1607     .load D3, 4
1608     ret
1609 ENDF __load_4
1610 #endif /* L_load_4 */
1611
1612 #endif /* L_load_3 || L_load_3 */
1613
1614 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1615 ;; Loading n bytes from Flash; n = 2,3,4
1616 ;; R22... = Flash[R21:Z]
1617 ;; Clobbers: __tmp_reg__, R21, R30, R31
1618
1619 #if (defined (L_xload_2)            \
1620      || defined (L_xload_3)         \
1621      || defined (L_xload_4))        \
1622     && defined (__AVR_HAVE_ELPM__)  \
1623     && !defined (__AVR_HAVE_ELPMX__)
1624
1625 #if !defined (__AVR_HAVE_RAMPZ__)
1626 #error Need RAMPZ
1627 #endif /* have RAMPZ */
1628
1629 ;; Destination
1630 #define D0  22
1631 #define D1  D0+1
1632 #define D2  D0+2
1633 #define D3  D0+3
1634
1635 ;; Register containing bits 16+ of the address
1636
1637 #define HHI8  21
1638
1639 .macro  .xload dest, n
1640     elpm
1641     mov     \dest, r0
1642 .if \dest != D0+\n-1
1643     adiw    r30, 1
1644     adc     HHI8, __zero_reg__
1645     out     __RAMPZ__, HHI8
1646 .endif
1647 .endm
1648
1649 #if defined (L_xload_2)
1650 DEFUN __xload_2
1651     out     __RAMPZ__, HHI8
1652     .xload D0, 2
1653     .xload D1, 2
1654     ret
1655 ENDF __xload_2
1656 #endif /* L_xload_2 */
1657
1658 #if defined (L_xload_3)
1659 DEFUN __xload_3
1660     out     __RAMPZ__, HHI8
1661     .xload D0, 3
1662     .xload D1, 3
1663     .xload D2, 3
1664     ret
1665 ENDF __xload_3
1666 #endif /* L_xload_3 */
1667
1668 #if defined (L_xload_4)
1669 DEFUN __xload_4
1670     out     __RAMPZ__, HHI8
1671     .xload D0, 4
1672     .xload D1, 4
1673     .xload D2, 4
1674     .xload D3, 4
1675     ret
1676 ENDF __xload_4
1677 #endif /* L_xload_4 */
1678
1679 #endif /* L_xload_{2|3|4} && ELPM */
1680
1681 \f
1682 .section .text.libgcc.builtins, "ax", @progbits
1683
1684 /**********************************
1685  * Find first set Bit (ffs)
1686  **********************************/
1687
1688 #if defined (L_ffssi2)
1689 ;; find first set bit
1690 ;; r25:r24 = ffs32 (r25:r22)
1691 ;; clobbers: r22, r26
1692 DEFUN __ffssi2
1693     clr  r26
1694     tst  r22
1695     brne 1f
1696     subi r26, -8
1697     or   r22, r23
1698     brne 1f
1699     subi r26, -8
1700     or   r22, r24
1701     brne 1f
1702     subi r26, -8
1703     or   r22, r25
1704     brne 1f
1705     ret
1706 1:  mov  r24, r22
1707     XJMP __loop_ffsqi2
1708 ENDF __ffssi2
1709 #endif /* defined (L_ffssi2) */
1710
1711 #if defined (L_ffshi2)
1712 ;; find first set bit
1713 ;; r25:r24 = ffs16 (r25:r24)
1714 ;; clobbers: r26
1715 DEFUN __ffshi2
1716     clr  r26
1717 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1718     ;; Some cores have problem skipping 2-word instruction
1719     tst  r24
1720     breq 2f
1721 #else
1722     cpse r24, __zero_reg__
1723 #endif /* __AVR_HAVE_JMP_CALL__ */
1724 1:  XJMP __loop_ffsqi2
1725 2:  ldi  r26, 8
1726     or   r24, r25
1727     brne 1b
1728     ret
1729 ENDF __ffshi2
1730 #endif /* defined (L_ffshi2) */
1731
1732 #if defined (L_loop_ffsqi2)
1733 ;; Helper for ffshi2, ffssi2
1734 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
1735 ;; r24 must be != 0
1736 ;; clobbers: r26
1737 DEFUN __loop_ffsqi2
1738     inc  r26
1739     lsr  r24
1740     brcc __loop_ffsqi2
1741     mov  r24, r26
1742     clr  r25
1743     ret
1744 ENDF __loop_ffsqi2
1745 #endif /* defined (L_loop_ffsqi2) */
1746
1747 \f
1748 /**********************************
1749  * Count trailing Zeros (ctz)
1750  **********************************/
1751
1752 #if defined (L_ctzsi2)
1753 ;; count trailing zeros
1754 ;; r25:r24 = ctz32 (r25:r22)
1755 ;; clobbers: r26, r22
1756 ;; ctz(0) = 255
1757 ;; Note that ctz(0) in undefined for GCC
1758 DEFUN __ctzsi2
1759     XCALL __ffssi2
1760     dec  r24
1761     ret
1762 ENDF __ctzsi2
1763 #endif /* defined (L_ctzsi2) */
1764
1765 #if defined (L_ctzhi2)
1766 ;; count trailing zeros
1767 ;; r25:r24 = ctz16 (r25:r24)
1768 ;; clobbers: r26
1769 ;; ctz(0) = 255
1770 ;; Note that ctz(0) in undefined for GCC
1771 DEFUN __ctzhi2
1772     XCALL __ffshi2
1773     dec  r24
1774     ret
1775 ENDF __ctzhi2
1776 #endif /* defined (L_ctzhi2) */
1777
1778 \f
1779 /**********************************
1780  * Count leading Zeros (clz)
1781  **********************************/
1782
1783 #if defined (L_clzdi2)
1784 ;; count leading zeros
1785 ;; r25:r24 = clz64 (r25:r18)
1786 ;; clobbers: r22, r23, r26
1787 DEFUN __clzdi2
1788     XCALL __clzsi2
1789     sbrs r24, 5
1790     ret
1791     mov_l r22, r18
1792     mov_h r23, r19
1793     mov_l r24, r20
1794     mov_h r25, r21
1795     XCALL __clzsi2
1796     subi r24, -32
1797     ret
1798 ENDF __clzdi2
1799 #endif /* defined (L_clzdi2) */
1800
1801 #if defined (L_clzsi2)
1802 ;; count leading zeros
1803 ;; r25:r24 = clz32 (r25:r22)
1804 ;; clobbers: r26
1805 DEFUN __clzsi2
1806     XCALL __clzhi2
1807     sbrs r24, 4
1808     ret
1809     mov_l r24, r22
1810     mov_h r25, r23
1811     XCALL __clzhi2
1812     subi r24, -16
1813     ret
1814 ENDF __clzsi2
1815 #endif /* defined (L_clzsi2) */
1816
1817 #if defined (L_clzhi2)
1818 ;; count leading zeros
1819 ;; r25:r24 = clz16 (r25:r24)
1820 ;; clobbers: r26
1821 DEFUN __clzhi2
1822     clr  r26
1823     tst  r25
1824     brne 1f
1825     subi r26, -8
1826     or   r25, r24
1827     brne 1f
1828     ldi  r24, 16
1829     ret
1830 1:  cpi  r25, 16
1831     brsh 3f
1832     subi r26, -3
1833     swap r25
1834 2:  inc  r26
1835 3:  lsl  r25
1836     brcc 2b
1837     mov  r24, r26
1838     clr  r25
1839     ret
1840 ENDF __clzhi2
1841 #endif /* defined (L_clzhi2) */
1842
1843 \f
1844 /**********************************
1845  * Parity
1846  **********************************/
1847
1848 #if defined (L_paritydi2)
1849 ;; r25:r24 = parity64 (r25:r18)
1850 ;; clobbers: __tmp_reg__
1851 DEFUN __paritydi2
1852     eor  r24, r18
1853     eor  r24, r19
1854     eor  r24, r20
1855     eor  r24, r21
1856     XJMP __paritysi2
1857 ENDF __paritydi2
1858 #endif /* defined (L_paritydi2) */
1859
1860 #if defined (L_paritysi2)
1861 ;; r25:r24 = parity32 (r25:r22)
1862 ;; clobbers: __tmp_reg__
1863 DEFUN __paritysi2
1864     eor  r24, r22
1865     eor  r24, r23
1866     XJMP __parityhi2
1867 ENDF __paritysi2
1868 #endif /* defined (L_paritysi2) */
1869
1870 #if defined (L_parityhi2)
1871 ;; r25:r24 = parity16 (r25:r24)
1872 ;; clobbers: __tmp_reg__
1873 DEFUN __parityhi2
1874     eor  r24, r25
1875 ;; FALLTHRU
1876 ENDF __parityhi2
1877
1878 ;; r25:r24 = parity8 (r24)
1879 ;; clobbers: __tmp_reg__
1880 DEFUN __parityqi2
1881     ;; parity is in r24[0..7]
1882     mov  __tmp_reg__, r24
1883     swap __tmp_reg__
1884     eor  r24, __tmp_reg__
1885     ;; parity is in r24[0..3]
1886     subi r24, -4
1887     andi r24, -5
1888     subi r24, -6
1889     ;; parity is in r24[0,3]
1890     sbrc r24, 3
1891     inc  r24
1892     ;; parity is in r24[0]
1893     andi r24, 1
1894     clr  r25
1895     ret
1896 ENDF __parityqi2
1897 #endif /* defined (L_parityhi2) */
1898
1899 \f
1900 /**********************************
1901  * Population Count
1902  **********************************/
1903
1904 #if defined (L_popcounthi2)
1905 ;; population count
1906 ;; r25:r24 = popcount16 (r25:r24)
1907 ;; clobbers: __tmp_reg__
1908 DEFUN __popcounthi2
1909     XCALL __popcountqi2
1910     push r24
1911     mov  r24, r25
1912     XCALL __popcountqi2
1913     clr  r25
1914     ;; FALLTHRU
1915 ENDF __popcounthi2
1916
1917 DEFUN __popcounthi2_tail
1918     pop   __tmp_reg__
1919     add   r24, __tmp_reg__
1920     ret
1921 ENDF __popcounthi2_tail
1922 #endif /* defined (L_popcounthi2) */
1923
1924 #if defined (L_popcountsi2)
1925 ;; population count
1926 ;; r25:r24 = popcount32 (r25:r22)
1927 ;; clobbers: __tmp_reg__
1928 DEFUN __popcountsi2
1929     XCALL __popcounthi2
1930     push  r24
1931     mov_l r24, r22
1932     mov_h r25, r23
1933     XCALL __popcounthi2
1934     XJMP  __popcounthi2_tail
1935 ENDF __popcountsi2
1936 #endif /* defined (L_popcountsi2) */
1937
1938 #if defined (L_popcountdi2)
1939 ;; population count
1940 ;; r25:r24 = popcount64 (r25:r18)
1941 ;; clobbers: r22, r23, __tmp_reg__
1942 DEFUN __popcountdi2
1943     XCALL __popcountsi2
1944     push  r24
1945     mov_l r22, r18
1946     mov_h r23, r19
1947     mov_l r24, r20
1948     mov_h r25, r21
1949     XCALL __popcountsi2
1950     XJMP  __popcounthi2_tail
1951 ENDF __popcountdi2
1952 #endif /* defined (L_popcountdi2) */
1953
1954 #if defined (L_popcountqi2)
1955 ;; population count
1956 ;; r24 = popcount8 (r24)
1957 ;; clobbers: __tmp_reg__
1958 DEFUN __popcountqi2
1959     mov  __tmp_reg__, r24
1960     andi r24, 1
1961     lsr  __tmp_reg__
1962     lsr  __tmp_reg__
1963     adc  r24, __zero_reg__
1964     lsr  __tmp_reg__
1965     adc  r24, __zero_reg__
1966     lsr  __tmp_reg__
1967     adc  r24, __zero_reg__
1968     lsr  __tmp_reg__
1969     adc  r24, __zero_reg__
1970     lsr  __tmp_reg__
1971     adc  r24, __zero_reg__
1972     lsr  __tmp_reg__
1973     adc  r24, __tmp_reg__
1974     ret
1975 ENDF __popcountqi2
1976 #endif /* defined (L_popcountqi2) */
1977
1978 \f
1979 /**********************************
1980  * Swap bytes
1981  **********************************/
1982
1983 ;; swap two registers with different register number
1984 .macro bswap a, b
1985     eor \a, \b
1986     eor \b, \a
1987     eor \a, \b
1988 .endm
1989
1990 #if defined (L_bswapsi2)
1991 ;; swap bytes
1992 ;; r25:r22 = bswap32 (r25:r22)
1993 DEFUN __bswapsi2
1994     bswap r22, r25
1995     bswap r23, r24
1996     ret
1997 ENDF __bswapsi2
1998 #endif /* defined (L_bswapsi2) */
1999
2000 #if defined (L_bswapdi2)
2001 ;; swap bytes
2002 ;; r25:r18 = bswap64 (r25:r18)
2003 DEFUN __bswapdi2
2004     bswap r18, r25
2005     bswap r19, r24
2006     bswap r20, r23
2007     bswap r21, r22
2008     ret
2009 ENDF __bswapdi2
2010 #endif /* defined (L_bswapdi2) */
2011
2012 \f
2013 /**********************************
2014  * 64-bit shifts
2015  **********************************/
2016
2017 #if defined (L_ashrdi3)
2018 ;; Arithmetic shift right
2019 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
2020 DEFUN __ashrdi3
2021     push r16
2022     andi r16, 63
2023     breq 2f
2024 1:  asr  r25
2025     ror  r24
2026     ror  r23
2027     ror  r22
2028     ror  r21
2029     ror  r20
2030     ror  r19
2031     ror  r18
2032     dec  r16
2033     brne 1b
2034 2:  pop  r16
2035     ret
2036 ENDF __ashrdi3
2037 #endif /* defined (L_ashrdi3) */
2038
2039 #if defined (L_lshrdi3)
2040 ;; Logic shift right
2041 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
2042 DEFUN __lshrdi3
2043     push r16
2044     andi r16, 63
2045     breq 2f
2046 1:  lsr  r25
2047     ror  r24
2048     ror  r23
2049     ror  r22
2050     ror  r21
2051     ror  r20
2052     ror  r19
2053     ror  r18
2054     dec  r16
2055     brne 1b
2056 2:  pop  r16
2057     ret
2058 ENDF __lshrdi3
2059 #endif /* defined (L_lshrdi3) */
2060
2061 #if defined (L_ashldi3)
2062 ;; Shift left
2063 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
2064 DEFUN __ashldi3
2065     push r16
2066     andi r16, 63
2067     breq 2f
2068 1:  lsl  r18
2069     rol  r19
2070     rol  r20
2071     rol  r21
2072     rol  r22
2073     rol  r23
2074     rol  r24
2075     rol  r25
2076     dec  r16
2077     brne 1b
2078 2:  pop  r16
2079     ret
2080 ENDF __ashldi3
2081 #endif /* defined (L_ashldi3) */
2082
2083 \f
2084 .section .text.libgcc.fmul, "ax", @progbits
2085
2086 /***********************************************************/
2087 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
2088 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
2089 /***********************************************************/
2090
2091 #define A1 24
2092 #define B1 25
2093 #define C0 22
2094 #define C1 23
2095 #define A0 __tmp_reg__
2096
2097 #ifdef L_fmuls
2098 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
2099 ;;; Clobbers: r24, r25, __tmp_reg__
2100 DEFUN __fmuls
2101     ;; A0.7 = negate result?
2102     mov  A0, A1
2103     eor  A0, B1
2104     ;; B1 = |B1|
2105     sbrc B1, 7
2106     neg  B1
2107     XJMP __fmulsu_exit
2108 ENDF __fmuls
2109 #endif /* L_fmuls */
2110
2111 #ifdef L_fmulsu
2112 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
2113 ;;; Clobbers: r24, r25, __tmp_reg__
2114 DEFUN __fmulsu
2115     ;; A0.7 = negate result?
2116     mov  A0, A1
2117 ;; FALLTHRU
2118 ENDF __fmulsu
2119
2120 ;; Helper for __fmuls and __fmulsu
2121 DEFUN __fmulsu_exit
2122     ;; A1 = |A1|
2123     sbrc A1, 7
2124     neg  A1
2125 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2126     ;; Some cores have problem skipping 2-word instruction
2127     tst  A0
2128     brmi 1f
2129 #else
2130     sbrs A0, 7
2131 #endif /* __AVR_HAVE_JMP_CALL__ */
2132     XJMP  __fmul
2133 1:  XCALL __fmul
2134     ;; C = -C iff A0.7 = 1
2135     com  C1
2136     neg  C0
2137     sbci C1, -1
2138     ret
2139 ENDF __fmulsu_exit
2140 #endif /* L_fmulsu */
2141
2142
2143 #ifdef L_fmul
2144 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
2145 ;;; Clobbers: r24, r25, __tmp_reg__
2146 DEFUN __fmul
2147     ; clear result
2148     clr   C0
2149     clr   C1
2150     clr   A0
2151 1:  tst   B1
2152     ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
2153 2:  brpl  3f
2154     ;; C += A
2155     add   C0, A0
2156     adc   C1, A1
2157 3:  ;; A >>= 1
2158     lsr   A1
2159     ror   A0
2160     ;; B <<= 1
2161     lsl   B1
2162     brne  2b
2163     ret
2164 ENDF __fmul
2165 #endif /* L_fmul */
2166
2167 #undef A0
2168 #undef A1
2169 #undef B1
2170 #undef C0
2171 #undef C1