1 /* -*- Mode: Asm -*- */
2 /* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Contributed by Denis Chertykov <chertykov@gmail.com>
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 #define __zero_reg__ r1
26 #define __tmp_reg__ r0
30 #define __RAMPZ__ 0x3B
33 /* Most of the functions here are called directly from avr.md
34 patterns, instead of using the standard libcall mechanisms.
35 This can make better code because GCC knows exactly which
36 of the call-used registers (not all of them) are clobbered. */
38 /* FIXME: At present, there is no SORT directive in the linker
39 script so that we must not assume that different modules
40 in the same input section like .libgcc.text.mul will be
41 located close together. Therefore, we cannot use
42 RCALL/RJMP to call a function like __udivmodhi4 from
43 __divmodhi4 and have to use lengthy XCALL/XJMP even
44 though they are in the same input section and all same
45 input sections together are small enough to reach every
46 location with a RCALL/RJMP instruction. */
48 .macro mov_l r_dest, r_src
49 #if defined (__AVR_HAVE_MOVW__)
56 .macro mov_h r_dest, r_src
57 #if defined (__AVR_HAVE_MOVW__)
64 .macro wmov r_dest, r_src
65 #if defined (__AVR_HAVE_MOVW__)
69 mov \r_dest+1, \r_src+1
73 #if defined (__AVR_HAVE_JMP_CALL__)
93 .section .text.libgcc.mul, "ax", @progbits
95 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
96 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
97 #if !defined (__AVR_HAVE_MUL__)
98 /*******************************************************
99 Multiplication 8 x 8 without MUL
100 *******************************************************/
101 #if defined (L_mulqi3)
103 #define r_arg2 r22 /* multiplicand */
104 #define r_arg1 r24 /* multiplier */
105 #define r_res __tmp_reg__ /* result */
108 clr r_res ; clear result
112 add r_arg2,r_arg2 ; shift multiplicand
113 breq __mulqi3_exit ; while multiplicand != 0
115 brne __mulqi3_loop ; exit if multiplier = 0
117 mov r_arg1,r_res ; result to return register
125 #endif /* defined (L_mulqi3) */
127 #if defined (L_mulqihi3)
137 #endif /* defined (L_mulqihi3) */
139 #if defined (L_umulqihi3)
145 #endif /* defined (L_umulqihi3) */
147 /*******************************************************
148 Multiplication 16 x 16 without MUL
149 *******************************************************/
150 #if defined (L_mulhi3)
151 #define r_arg1L r24 /* multiplier Low */
152 #define r_arg1H r25 /* multiplier High */
153 #define r_arg2L r22 /* multiplicand Low */
154 #define r_arg2H r23 /* multiplicand High */
155 #define r_resL __tmp_reg__ /* result Low */
156 #define r_resH r21 /* result High */
159 clr r_resH ; clear result
160 clr r_resL ; clear result
164 add r_resL,r_arg2L ; result + multiplicand
167 add r_arg2L,r_arg2L ; shift multiplicand
170 cp r_arg2L,__zero_reg__
171 cpc r_arg2H,__zero_reg__
172 breq __mulhi3_exit ; while multiplicand != 0
174 lsr r_arg1H ; gets LSB of multiplier
177 brne __mulhi3_loop ; exit if multiplier = 0
179 mov r_arg1H,r_resH ; result to return register
191 #endif /* defined (L_mulhi3) */
193 /*******************************************************
194 Widening Multiplication 32 = 16 x 16 without MUL
195 *******************************************************/
197 #if defined (L_mulhisi3)
199 ;;; FIXME: This is dead code (noone calls it)
212 #endif /* defined (L_mulhisi3) */
214 #if defined (L_umulhisi3)
216 ;;; FIXME: This is dead code (noone calls it)
225 #endif /* defined (L_umulhisi3) */
227 #if defined (L_mulsi3)
228 /*******************************************************
229 Multiplication 32 x 32 without MUL
230 *******************************************************/
231 #define r_arg1L r22 /* multiplier Low */
234 #define r_arg1HH r25 /* multiplier High */
236 #define r_arg2L r18 /* multiplicand Low */
239 #define r_arg2HH r21 /* multiplicand High */
241 #define r_resL r26 /* result Low */
244 #define r_resHH r31 /* result High */
247 clr r_resHH ; clear result
248 clr r_resHL ; clear result
249 clr r_resH ; clear result
250 clr r_resL ; clear result
254 add r_resL,r_arg2L ; result + multiplicand
259 add r_arg2L,r_arg2L ; shift multiplicand
261 adc r_arg2HL,r_arg2HL
262 adc r_arg2HH,r_arg2HH
264 lsr r_arg1HH ; gets LSB of multiplier
271 brne __mulsi3_loop ; exit if multiplier = 0
273 mov_h r_arg1HH,r_resHH ; result to return register
274 mov_l r_arg1HL,r_resHL
295 #endif /* defined (L_mulsi3) */
297 #endif /* !defined (__AVR_HAVE_MUL__) */
298 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
300 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
301 #if defined (__AVR_HAVE_MUL__)
316 /*******************************************************
317 Widening Multiplication 32 = 16 x 16
318 *******************************************************/
320 #if defined (L_mulhisi3)
321 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
322 ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
323 ;;; Clobbers: __tmp_reg__
332 XJMP __usmulhisi3_tail
334 #endif /* L_mulhisi3 */
336 #if defined (L_usmulhisi3)
337 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
338 ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
339 ;;; Clobbers: __tmp_reg__
345 DEFUN __usmulhisi3_tail
352 ENDF __usmulhisi3_tail
353 #endif /* L_usmulhisi3 */
355 #if defined (L_umulhisi3)
356 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
357 ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
358 ;;; Clobbers: __tmp_reg__
373 #endif /* L_umulhisi3 */
375 /*******************************************************
376 Widening Multiplication 32 = 16 x 32
377 *******************************************************/
379 #if defined (L_mulshisi3)
380 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
381 ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
382 ;;; Clobbers: __tmp_reg__
384 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
385 ;; Some cores have problem skipping 2-word instruction
390 #endif /* __AVR_HAVE_JMP_CALL__ */
395 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
396 ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
397 ;;; Clobbers: __tmp_reg__
400 ;; One-extend R27:R26 (A1:A0)
405 #endif /* L_mulshisi3 */
407 #if defined (L_muluhisi3)
408 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
409 ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
410 ;;; Clobbers: __tmp_reg__
423 #endif /* L_muluhisi3 */
425 /*******************************************************
426 Multiplication 32 x 32
427 *******************************************************/
429 #if defined (L_mulsi3)
430 ;;; R25:R22 = R25:R22 * R21:R18
431 ;;; (C3:C0) = C3:C0 * B3:B0
432 ;;; Clobbers: R26, R27, __tmp_reg__
440 ;; A1:A0 now contains the high word of A
451 #endif /* L_mulsi3 */
466 #endif /* __AVR_HAVE_MUL__ */
467 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
470 .section .text.libgcc.div, "ax", @progbits
472 /*******************************************************
473 Division 8 / 8 => (result + remainder)
474 *******************************************************/
475 #define r_rem r25 /* remainder */
476 #define r_arg1 r24 /* dividend, quotient */
477 #define r_arg2 r22 /* divisor */
478 #define r_cnt r23 /* loop count */
480 #if defined (L_udivmodqi4)
482 sub r_rem,r_rem ; clear remainder and carry
483 ldi r_cnt,9 ; init loop counter
484 rjmp __udivmodqi4_ep ; jump to entry point
486 rol r_rem ; shift dividend into remainder
487 cp r_rem,r_arg2 ; compare remainder & divisor
488 brcs __udivmodqi4_ep ; remainder <= divisor
489 sub r_rem,r_arg2 ; restore remainder
491 rol r_arg1 ; shift dividend (with CARRY)
492 dec r_cnt ; decrement loop counter
493 brne __udivmodqi4_loop
494 com r_arg1 ; complement result
495 ; because C flag was complemented in loop
498 #endif /* defined (L_udivmodqi4) */
500 #if defined (L_divmodqi4)
502 bst r_arg1,7 ; store sign of dividend
503 mov __tmp_reg__,r_arg1
504 eor __tmp_reg__,r_arg2; r0.7 is sign of result
506 neg r_arg1 ; dividend negative : negate
508 neg r_arg2 ; divisor negative : negate
509 XCALL __udivmodqi4 ; do the unsigned div/mod
511 neg r_rem ; correct remainder sign
514 neg r_arg1 ; correct result sign
518 #endif /* defined (L_divmodqi4) */
526 /*******************************************************
527 Division 16 / 16 => (result + remainder)
528 *******************************************************/
529 #define r_remL r26 /* remainder Low */
530 #define r_remH r27 /* remainder High */
532 /* return: remainder */
533 #define r_arg1L r24 /* dividend Low */
534 #define r_arg1H r25 /* dividend High */
536 /* return: quotient */
537 #define r_arg2L r22 /* divisor Low */
538 #define r_arg2H r23 /* divisor High */
540 #define r_cnt r21 /* loop count */
542 #if defined (L_udivmodhi4)
545 sub r_remH,r_remH ; clear remainder and carry
546 ldi r_cnt,17 ; init loop counter
547 rjmp __udivmodhi4_ep ; jump to entry point
549 rol r_remL ; shift dividend into remainder
551 cp r_remL,r_arg2L ; compare remainder & divisor
553 brcs __udivmodhi4_ep ; remainder < divisor
554 sub r_remL,r_arg2L ; restore remainder
557 rol r_arg1L ; shift dividend (with CARRY)
559 dec r_cnt ; decrement loop counter
560 brne __udivmodhi4_loop
563 ; div/mod results to return registers, as for the div() function
564 mov_l r_arg2L, r_arg1L ; quotient
565 mov_h r_arg2H, r_arg1H
566 mov_l r_arg1L, r_remL ; remainder
567 mov_h r_arg1H, r_remH
570 #endif /* defined (L_udivmodhi4) */
572 #if defined (L_divmodhi4)
576 bst r_arg1H,7 ; store sign of dividend
577 mov __tmp_reg__,r_arg2H
579 com __tmp_reg__ ; r0.7 is sign of result
580 rcall __divmodhi4_neg1 ; dividend negative: negate
583 rcall __divmodhi4_neg2 ; divisor negative: negate
584 XCALL __udivmodhi4 ; do the unsigned div/mod
586 rcall __divmodhi4_neg2 ; correct remainder sign
587 brtc __divmodhi4_exit
589 ;; correct dividend/remainder sign
595 ;; correct divisor/result sign
602 #endif /* defined (L_divmodhi4) */
615 /*******************************************************
616 Division 24 / 24 => (result + remainder)
617 *******************************************************/
619 ;; A[0..2]: In: Dividend; Out: Quotient
624 ;; B[0..2]: In: Divisor; Out: Remainder
629 ;; C[0..2]: Expand remainder
630 #define C0 __zero_reg__
637 #if defined (L_udivmodpsi4)
638 ;; R24:R22 = R24:R22 udiv R20:R18
639 ;; R20:R18 = R24:R22 umod R20:R18
640 ;; Clobbers: R21, R25, R26
645 ; Clear remainder and carry. C0 is already 0
648 ; jump to entry point
649 rjmp __udivmodpsi4_start
651 ; shift dividend into remainder
655 ; compare remainder & divisor
659 brcs __udivmodpsi4_start ; remainder <= divisor
660 sub C0, B0 ; restore remainder
664 ; shift dividend (with CARRY)
668 ; decrement loop counter
670 brne __udivmodpsi4_loop
674 ; div/mod results to return registers
679 clr __zero_reg__ ; C0
682 #endif /* defined (L_udivmodpsi4) */
684 #if defined (L_divmodpsi4)
685 ;; R24:R22 = R24:R22 div R20:R18
686 ;; R20:R18 = R24:R22 mod R20:R18
687 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
690 ; R0.7 will contain the sign of the result:
691 ; R0.7 = A.sign ^ B.sign
693 ; T-flag = sign of dividend
697 ; Adjust dividend's sign
698 rcall __divmodpsi4_negA
700 ; Adjust divisor's sign
702 rcall __divmodpsi4_negB
704 ; Do the unsigned div/mod
707 ; Adjust quotient's sign
709 rcall __divmodpsi4_negA
711 ; Adjust remainder's sign
712 brtc __divmodpsi4_end
715 ; Correct divisor/remainder sign
723 ; Correct dividend/quotient sign
734 #endif /* defined (L_divmodpsi4) */
750 /*******************************************************
751 Division 32 / 32 => (result + remainder)
752 *******************************************************/
753 #define r_remHH r31 /* remainder High */
756 #define r_remL r26 /* remainder Low */
758 /* return: remainder */
759 #define r_arg1HH r25 /* dividend High */
762 #define r_arg1L r22 /* dividend Low */
764 /* return: quotient */
765 #define r_arg2HH r21 /* divisor High */
768 #define r_arg2L r18 /* divisor Low */
770 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
772 #if defined (L_udivmodsi4)
774 ldi r_remL, 33 ; init loop counter
777 sub r_remH,r_remH ; clear remainder and carry
778 mov_l r_remHL, r_remL
779 mov_h r_remHH, r_remH
780 rjmp __udivmodsi4_ep ; jump to entry point
782 rol r_remL ; shift dividend into remainder
786 cp r_remL,r_arg2L ; compare remainder & divisor
790 brcs __udivmodsi4_ep ; remainder <= divisor
791 sub r_remL,r_arg2L ; restore remainder
796 rol r_arg1L ; shift dividend (with CARRY)
800 dec r_cnt ; decrement loop counter
801 brne __udivmodsi4_loop
802 ; __zero_reg__ now restored (r_cnt == 0)
807 ; div/mod results to return registers, as for the ldiv() function
808 mov_l r_arg2L, r_arg1L ; quotient
809 mov_h r_arg2H, r_arg1H
810 mov_l r_arg2HL, r_arg1HL
811 mov_h r_arg2HH, r_arg1HH
812 mov_l r_arg1L, r_remL ; remainder
813 mov_h r_arg1H, r_remH
814 mov_l r_arg1HL, r_remHL
815 mov_h r_arg1HH, r_remHH
818 #endif /* defined (L_udivmodsi4) */
820 #if defined (L_divmodsi4)
822 mov __tmp_reg__,r_arg2HH
823 bst r_arg1HH,7 ; store sign of dividend
825 com __tmp_reg__ ; r0.7 is sign of result
826 rcall __divmodsi4_neg1 ; dividend negative: negate
829 rcall __divmodsi4_neg2 ; divisor negative: negate
830 XCALL __udivmodsi4 ; do the unsigned div/mod
831 sbrc __tmp_reg__, 7 ; correct quotient sign
832 rcall __divmodsi4_neg2
833 brtc __divmodsi4_exit ; correct remainder sign
835 ;; correct dividend/remainder sign
845 ;; correct divisor/quotient sign
856 #endif /* defined (L_divmodsi4) */
859 /*******************************************************
862 *******************************************************/
864 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
865 ;; at least 16k of Program Memory. For smaller Devices, depend
868 #if defined (__AVR_HAVE_JMP_CALL__)
870 #elif defined (__AVR_HAVE_MOVW__)
871 # define SPEED_DIV 16
876 ;; A[0..7]: In: Dividend;
877 ;; Out: Quotient (T = 0)
878 ;; Out: Remainder (T = 1)
888 ;; B[0..7]: In: Divisor; Out: Clobber
898 ;; C[0..7]: Expand remainder; Out: Remainder (unused)
908 ;; Holds Signs during Division Routine
909 #define SS __tmp_reg__
911 ;; Bit-Counter in Division Routine
912 #define R_cnt __zero_reg__
914 ;; Scratch Register for Negation
917 #if defined (L_udivdi3)
919 ;; R25:R18 = R24:R18 umod R17:R10
920 ;; Ordinary ABI-Function
924 rjmp __udivdi3_umoddi3
927 ;; R25:R18 = R24:R18 udiv R17:R10
928 ;; Ordinary ABI-Function
934 DEFUN __udivdi3_umoddi3
945 ENDF __udivdi3_umoddi3
946 #endif /* L_udivdi3 */
948 #if defined (L_udivmod64)
950 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
951 ;; No Registers saved/restored; the Callers will take Care.
952 ;; Preserves B[] and T-flag
953 ;; T = 0: Compute Quotient in A[]
954 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
958 ;; Clear Remainder (C6, C7 will follow)
965 #if SPEED_DIV == 0 || SPEED_DIV == 16
966 ;; Initialize Loop-Counter
969 #endif /* SPEED_DIV */
976 1: ;; Compare shifted Devidend against Divisor
977 ;; If -- even after Shifting -- it is smaller...
978 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
979 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
982 ;; ...then we can subtract it. Thus, it is legal to shift left
983 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
984 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
985 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
986 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
992 ;; Shifted 64 Bits: A7 has traveled to C7
994 ;; Divisor is greater than Dividend. We have:
997 ;; Thus, we can return immediately
1000 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1003 ;; Push of A7 is not needed because C7 is still 0
1007 #elif SPEED_DIV == 16
1009 ;; Compare shifted Dividend against Divisor
1017 ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1018 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1019 wmov C2,A6 $ wmov C0,A4
1020 wmov A6,A2 $ wmov A4,A0
1021 wmov A2,C6 $ wmov A0,C4
1023 ;; Set Bit Counter to 32
1027 #error SPEED_DIV = ?
1028 #endif /* SPEED_DIV */
1030 ;; The very Division + Remainder Routine
1032 3: ;; Left-shift Dividend...
1033 lsl A0 $ rol A1 $ rol A2 $ rol A3
1034 rol A4 $ rol A5 $ rol A6 $ rol A7
1036 ;; ...into Remainder
1037 rol C0 $ rol C1 $ rol C2 $ rol C3
1038 rol C4 $ rol C5 $ rol C6 $ rol C7
1040 ;; Compare Remainder and Divisor
1041 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1042 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1046 ;; Divisor fits into Remainder: Subtract it from Remainder...
1047 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1048 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1050 ;; ...and set according Bit in the upcoming Quotient
1051 ;; The Bit will travel to its final Position
1054 4: ;; This Bit is done
1057 ;; __zero_reg__ is 0 again
1059 ;; T = 0: We are fine with the Quotient in A[]
1060 ;; T = 1: Copy Remainder to A[]
1066 ;; Move the Sign of the Result to SS.7
1072 #endif /* L_udivmod64 */
1075 #if defined (L_divdi3)
1077 ;; R25:R18 = R24:R18 mod R17:R10
1078 ;; Ordinary ABI-Function
1082 rjmp __divdi3_moddi3
1085 ;; R25:R18 = R24:R18 div R17:R10
1086 ;; Ordinary ABI-Function
1092 DEFUN __divdi3_moddi3
1097 ;; Both Signs are 0: the following Complexitiy is not needed
1098 XJMP __udivdi3_umoddi3
1099 #endif /* SPEED_DIV */
1102 ;; Save Z = 12 Registers: Y, 17...8
1103 ;; No Frame needed (X = 0)
1106 ldi r30, lo8(gs(1f))
1107 ldi r31, hi8(gs(1f))
1108 XJMP __prologue_saves__ + ((18 - 12) * 2)
1110 1: ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1111 ;; SS.6 will contain the Sign of the Remainder (A.sign)
1114 ;; Adjust Dividend's Sign as needed
1116 ;; Compiling for Speed we know that at least one Sign must be < 0
1117 ;; Thus, if A[] >= 0 then we know B[] < 0
1121 #endif /* SPEED_DIV */
1125 ;; Adjust Divisor's Sign and SS.7 as needed
1132 com B4 $ com B5 $ com B6 $ com B7
1133 $ com B1 $ com B2 $ com B3
1135 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
1136 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
1138 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
1141 ;; Adjust Result's Sign
1142 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1147 #endif /* __AVR_HAVE_JMP_CALL__ */
1150 4: ;; Epilogue: Restore the Z = 12 Registers and return
1152 #if defined (__AVR_HAVE_8BIT_SP__)
1153 ;; FIXME: __AVR_HAVE_8BIT_SP__ is set on device level, not on core level
1154 ;; so this lines are dead code. To make it work, devices without
1155 ;; SP_H must get their own multilib(s).
1159 #endif /* #SP = 8/16 */
1161 XJMP __epilogue_restores__ + ((18 - 12) * 2)
1163 ENDF __divdi3_moddi3
1169 #endif /* L_divdi3 */
1171 #if defined (L_negdi2)
1174 com A4 $ com A5 $ com A6 $ com A7
1175 $ com A1 $ com A2 $ com A3
1177 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
1178 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
1182 #endif /* L_negdi2 */
1212 .section .text.libgcc.prologue, "ax", @progbits
1214 /**********************************
1215 * This is a prologue subroutine
1216 **********************************/
1217 #if defined (L_prologue)
1219 ;; This function does not clobber T-flag; 64-bit division relies on it
1220 DEFUN __prologue_saves__
1239 #if defined (__AVR_HAVE_8BIT_SP__)
1240 ;; FIXME: __AVR_HAVE_8BIT_SP__ is set on device level, not on core level
1241 ;; so this lines are dead code. To make it work, devices without
1242 ;; SP_H must get their own multilib(s).
1252 in __tmp_reg__,__SREG__
1255 out __SREG__,__tmp_reg__
1257 #endif /* #SP = 8/16 */
1259 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1265 ENDF __prologue_saves__
1266 #endif /* defined (L_prologue) */
1269 * This is an epilogue subroutine
1271 #if defined (L_epilogue)
1273 DEFUN __epilogue_restores__
1291 #if defined (__AVR_HAVE_8BIT_SP__)
1292 ;; FIXME: __AVR_HAVE_8BIT_SP__ is set on device level, not on core level
1293 ;; so this lines are dead code. To make it work, devices without
1294 ;; SP_H must get their own multilib(s).
1302 adc r29,__zero_reg__
1303 in __tmp_reg__,__SREG__
1306 out __SREG__,__tmp_reg__
1310 #endif /* #SP = 8/16 */
1312 ENDF __epilogue_restores__
1313 #endif /* defined (L_epilogue) */
1316 .section .fini9,"ax",@progbits
1322 /* Code from .fini8 ... .fini1 sections inserted by ld script. */
1324 .section .fini0,"ax",@progbits
1328 #endif /* defined (L_exit) */
1336 #endif /* defined (L_cleanup) */
1339 .section .text.libgcc, "ax", @progbits
1342 DEFUN __tablejump2__
1349 #if defined (__AVR_HAVE_LPMX__)
1352 mov r30, __tmp_reg__
1353 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1359 #else /* !HAVE_LPMX */
1365 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1366 in __tmp_reg__, __EIND__
1370 #endif /* !HAVE_LPMX */
1372 #endif /* defined (L_tablejump) */
1375 .section .init4,"ax",@progbits
1376 DEFUN __do_copy_data
1377 #if defined(__AVR_HAVE_ELPMX__)
1378 ldi r17, hi8(__data_end)
1379 ldi r26, lo8(__data_start)
1380 ldi r27, hi8(__data_start)
1381 ldi r30, lo8(__data_load_start)
1382 ldi r31, hi8(__data_load_start)
1383 ldi r16, hh8(__data_load_start)
1385 rjmp .L__do_copy_data_start
1386 .L__do_copy_data_loop:
1389 .L__do_copy_data_start:
1390 cpi r26, lo8(__data_end)
1392 brne .L__do_copy_data_loop
1393 #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
1394 ldi r17, hi8(__data_end)
1395 ldi r26, lo8(__data_start)
1396 ldi r27, hi8(__data_start)
1397 ldi r30, lo8(__data_load_start)
1398 ldi r31, hi8(__data_load_start)
1399 ldi r16, hh8(__data_load_start - 0x10000)
1400 .L__do_copy_data_carry:
1403 rjmp .L__do_copy_data_start
1404 .L__do_copy_data_loop:
1408 brcs .L__do_copy_data_carry
1409 .L__do_copy_data_start:
1410 cpi r26, lo8(__data_end)
1412 brne .L__do_copy_data_loop
1413 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
1414 ldi r17, hi8(__data_end)
1415 ldi r26, lo8(__data_start)
1416 ldi r27, hi8(__data_start)
1417 ldi r30, lo8(__data_load_start)
1418 ldi r31, hi8(__data_load_start)
1419 rjmp .L__do_copy_data_start
1420 .L__do_copy_data_loop:
1421 #if defined (__AVR_HAVE_LPMX__)
1428 .L__do_copy_data_start:
1429 cpi r26, lo8(__data_end)
1431 brne .L__do_copy_data_loop
1432 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
1434 #endif /* L_copy_data */
1436 /* __do_clear_bss is only necessary if there is anything in .bss section. */
1439 .section .init4,"ax",@progbits
1440 DEFUN __do_clear_bss
1441 ldi r17, hi8(__bss_end)
1442 ldi r26, lo8(__bss_start)
1443 ldi r27, hi8(__bss_start)
1444 rjmp .do_clear_bss_start
1447 .do_clear_bss_start:
1448 cpi r26, lo8(__bss_end)
1450 brne .do_clear_bss_loop
1452 #endif /* L_clear_bss */
1454 /* __do_global_ctors and __do_global_dtors are only necessary
1455 if there are any constructors/destructors. */
1458 .section .init6,"ax",@progbits
1459 DEFUN __do_global_ctors
1460 #if defined(__AVR_HAVE_RAMPZ__)
1461 ldi r17, hi8(__ctors_start)
1462 ldi r28, lo8(__ctors_end)
1463 ldi r29, hi8(__ctors_end)
1464 ldi r16, hh8(__ctors_end)
1465 rjmp .L__do_global_ctors_start
1466 .L__do_global_ctors_loop:
1468 sbc r16, __zero_reg__
1472 XCALL __tablejump_elpm__
1473 .L__do_global_ctors_start:
1474 cpi r28, lo8(__ctors_start)
1476 ldi r24, hh8(__ctors_start)
1478 brne .L__do_global_ctors_loop
1480 ldi r17, hi8(__ctors_start)
1481 ldi r28, lo8(__ctors_end)
1482 ldi r29, hi8(__ctors_end)
1483 rjmp .L__do_global_ctors_start
1484 .L__do_global_ctors_loop:
1489 .L__do_global_ctors_start:
1490 cpi r28, lo8(__ctors_start)
1492 brne .L__do_global_ctors_loop
1493 #endif /* defined(__AVR_HAVE_RAMPZ__) */
1494 ENDF __do_global_ctors
1495 #endif /* L_ctors */
1498 .section .fini6,"ax",@progbits
1499 DEFUN __do_global_dtors
1500 #if defined(__AVR_HAVE_RAMPZ__)
1501 ldi r17, hi8(__dtors_end)
1502 ldi r28, lo8(__dtors_start)
1503 ldi r29, hi8(__dtors_start)
1504 ldi r16, hh8(__dtors_start)
1505 rjmp .L__do_global_dtors_start
1506 .L__do_global_dtors_loop:
1508 sbc r16, __zero_reg__
1512 XCALL __tablejump_elpm__
1513 .L__do_global_dtors_start:
1514 cpi r28, lo8(__dtors_end)
1516 ldi r24, hh8(__dtors_end)
1518 brne .L__do_global_dtors_loop
1520 ldi r17, hi8(__dtors_end)
1521 ldi r28, lo8(__dtors_start)
1522 ldi r29, hi8(__dtors_start)
1523 rjmp .L__do_global_dtors_start
1524 .L__do_global_dtors_loop:
1529 .L__do_global_dtors_start:
1530 cpi r28, lo8(__dtors_end)
1532 brne .L__do_global_dtors_loop
1533 #endif /* defined(__AVR_HAVE_RAMPZ__) */
1534 ENDF __do_global_dtors
1535 #endif /* L_dtors */
1537 .section .text.libgcc, "ax", @progbits
1539 #ifdef L_tablejump_elpm
1540 DEFUN __tablejump_elpm__
1541 #if defined (__AVR_HAVE_ELPM__)
1542 #if defined (__AVR_HAVE_LPMX__)
1543 elpm __tmp_reg__, Z+
1545 mov r30, __tmp_reg__
1546 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1558 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1559 in __tmp_reg__, __EIND__
1564 #endif /* defined (__AVR_HAVE_ELPM__) */
1565 ENDF __tablejump_elpm__
1566 #endif /* defined (L_tablejump_elpm) */
1568 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1569 ;; Loading n bytes from Flash; n = 3,4
1570 ;; R22... = Flash[Z]
1571 ;; Clobbers: __tmp_reg__
1573 #if (defined (L_load_3) \
1574 || defined (L_load_4)) \
1575 && !defined (__AVR_HAVE_LPMX__)
1583 .macro .load dest, n
1586 .if \dest != D0+\n-1
1593 #if defined (L_load_3)
1600 #endif /* L_load_3 */
1602 #if defined (L_load_4)
1610 #endif /* L_load_4 */
1612 #endif /* L_load_3 || L_load_3 */
1614 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1615 ;; Loading n bytes from Flash; n = 2,3,4
1616 ;; R22... = Flash[R21:Z]
1617 ;; Clobbers: __tmp_reg__, R21, R30, R31
1619 #if (defined (L_xload_2) \
1620 || defined (L_xload_3) \
1621 || defined (L_xload_4)) \
1622 && defined (__AVR_HAVE_ELPM__) \
1623 && !defined (__AVR_HAVE_ELPMX__)
1625 #if !defined (__AVR_HAVE_RAMPZ__)
1627 #endif /* have RAMPZ */
1635 ;; Register containing bits 16+ of the address
1639 .macro .xload dest, n
1642 .if \dest != D0+\n-1
1644 adc HHI8, __zero_reg__
1649 #if defined (L_xload_2)
1656 #endif /* L_xload_2 */
1658 #if defined (L_xload_3)
1666 #endif /* L_xload_3 */
1668 #if defined (L_xload_4)
1677 #endif /* L_xload_4 */
1679 #endif /* L_xload_{2|3|4} && ELPM */
1682 .section .text.libgcc.builtins, "ax", @progbits
1684 /**********************************
1685 * Find first set Bit (ffs)
1686 **********************************/
1688 #if defined (L_ffssi2)
1689 ;; find first set bit
1690 ;; r25:r24 = ffs32 (r25:r22)
1691 ;; clobbers: r22, r26
1709 #endif /* defined (L_ffssi2) */
1711 #if defined (L_ffshi2)
1712 ;; find first set bit
1713 ;; r25:r24 = ffs16 (r25:r24)
1717 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1718 ;; Some cores have problem skipping 2-word instruction
1722 cpse r24, __zero_reg__
1723 #endif /* __AVR_HAVE_JMP_CALL__ */
1724 1: XJMP __loop_ffsqi2
1730 #endif /* defined (L_ffshi2) */
1732 #if defined (L_loop_ffsqi2)
1733 ;; Helper for ffshi2, ffssi2
1734 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
1745 #endif /* defined (L_loop_ffsqi2) */
1748 /**********************************
1749 * Count trailing Zeros (ctz)
1750 **********************************/
1752 #if defined (L_ctzsi2)
1753 ;; count trailing zeros
1754 ;; r25:r24 = ctz32 (r25:r22)
1755 ;; clobbers: r26, r22
1757 ;; Note that ctz(0) in undefined for GCC
1763 #endif /* defined (L_ctzsi2) */
1765 #if defined (L_ctzhi2)
1766 ;; count trailing zeros
1767 ;; r25:r24 = ctz16 (r25:r24)
1770 ;; Note that ctz(0) in undefined for GCC
1776 #endif /* defined (L_ctzhi2) */
1779 /**********************************
1780 * Count leading Zeros (clz)
1781 **********************************/
1783 #if defined (L_clzdi2)
1784 ;; count leading zeros
1785 ;; r25:r24 = clz64 (r25:r18)
1786 ;; clobbers: r22, r23, r26
1799 #endif /* defined (L_clzdi2) */
1801 #if defined (L_clzsi2)
1802 ;; count leading zeros
1803 ;; r25:r24 = clz32 (r25:r22)
1815 #endif /* defined (L_clzsi2) */
1817 #if defined (L_clzhi2)
1818 ;; count leading zeros
1819 ;; r25:r24 = clz16 (r25:r24)
1841 #endif /* defined (L_clzhi2) */
1844 /**********************************
1846 **********************************/
1848 #if defined (L_paritydi2)
1849 ;; r25:r24 = parity64 (r25:r18)
1850 ;; clobbers: __tmp_reg__
1858 #endif /* defined (L_paritydi2) */
1860 #if defined (L_paritysi2)
1861 ;; r25:r24 = parity32 (r25:r22)
1862 ;; clobbers: __tmp_reg__
1868 #endif /* defined (L_paritysi2) */
1870 #if defined (L_parityhi2)
1871 ;; r25:r24 = parity16 (r25:r24)
1872 ;; clobbers: __tmp_reg__
1878 ;; r25:r24 = parity8 (r24)
1879 ;; clobbers: __tmp_reg__
1881 ;; parity is in r24[0..7]
1882 mov __tmp_reg__, r24
1884 eor r24, __tmp_reg__
1885 ;; parity is in r24[0..3]
1889 ;; parity is in r24[0,3]
1892 ;; parity is in r24[0]
1897 #endif /* defined (L_parityhi2) */
1900 /**********************************
1902 **********************************/
1904 #if defined (L_popcounthi2)
1906 ;; r25:r24 = popcount16 (r25:r24)
1907 ;; clobbers: __tmp_reg__
1917 DEFUN __popcounthi2_tail
1919 add r24, __tmp_reg__
1921 ENDF __popcounthi2_tail
1922 #endif /* defined (L_popcounthi2) */
1924 #if defined (L_popcountsi2)
1926 ;; r25:r24 = popcount32 (r25:r22)
1927 ;; clobbers: __tmp_reg__
1934 XJMP __popcounthi2_tail
1936 #endif /* defined (L_popcountsi2) */
1938 #if defined (L_popcountdi2)
1940 ;; r25:r24 = popcount64 (r25:r18)
1941 ;; clobbers: r22, r23, __tmp_reg__
1950 XJMP __popcounthi2_tail
1952 #endif /* defined (L_popcountdi2) */
1954 #if defined (L_popcountqi2)
1956 ;; r24 = popcount8 (r24)
1957 ;; clobbers: __tmp_reg__
1959 mov __tmp_reg__, r24
1963 adc r24, __zero_reg__
1965 adc r24, __zero_reg__
1967 adc r24, __zero_reg__
1969 adc r24, __zero_reg__
1971 adc r24, __zero_reg__
1973 adc r24, __tmp_reg__
1976 #endif /* defined (L_popcountqi2) */
1979 /**********************************
1981 **********************************/
1983 ;; swap two registers with different register number
1990 #if defined (L_bswapsi2)
1992 ;; r25:r22 = bswap32 (r25:r22)
1998 #endif /* defined (L_bswapsi2) */
2000 #if defined (L_bswapdi2)
2002 ;; r25:r18 = bswap64 (r25:r18)
2010 #endif /* defined (L_bswapdi2) */
2013 /**********************************
2015 **********************************/
2017 #if defined (L_ashrdi3)
2018 ;; Arithmetic shift right
2019 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
2037 #endif /* defined (L_ashrdi3) */
2039 #if defined (L_lshrdi3)
2040 ;; Logic shift right
2041 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
2059 #endif /* defined (L_lshrdi3) */
2061 #if defined (L_ashldi3)
2063 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
2081 #endif /* defined (L_ashldi3) */
2084 .section .text.libgcc.fmul, "ax", @progbits
2086 /***********************************************************/
2087 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
2088 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
2089 /***********************************************************/
2095 #define A0 __tmp_reg__
2098 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
2099 ;;; Clobbers: r24, r25, __tmp_reg__
2101 ;; A0.7 = negate result?
2109 #endif /* L_fmuls */
2112 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
2113 ;;; Clobbers: r24, r25, __tmp_reg__
2115 ;; A0.7 = negate result?
2120 ;; Helper for __fmuls and __fmulsu
2125 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2126 ;; Some cores have problem skipping 2-word instruction
2131 #endif /* __AVR_HAVE_JMP_CALL__ */
2134 ;; C = -C iff A0.7 = 1
2140 #endif /* L_fmulsu */
2144 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
2145 ;;; Clobbers: r24, r25, __tmp_reg__
2152 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.