gcc/config/h8300/lib1funcs.asm

   1 ;; libgcc1 routines for the Hitachi H8/300 CPU.
   2 ;; Contributed by Steve Chamberlain <sac@cygnus.com>
   3
   4 /* Copyright (C) 1994, 2000 Free Software Foundation, Inc.
   5
   6 This file is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 In addition to the permissions in the GNU General Public License, the
  12 Free Software Foundation gives you unlimited permission to link the
  13 compiled version of this file into combinations with other programs,
  14 and to distribute those combinations without any restriction coming
  15 from the use of this file.  (The General Public License restrictions
  16 do apply in other respects; for example, they cover modification of
  17 the file, and distribution when not linked into a combine
  18 executable.)
  19
  20 This file is distributed in the hope that it will be useful, but
  21 WITHOUT ANY WARRANTY; without even the implied warranty of
  22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  23 General Public License for more details.
  24
  25 You should have received a copy of the GNU General Public License
  26 along with this program; see the file COPYING.  If not, write to
  27 the Free Software Foundation, 59 Temple Place - Suite 330,
  28 Boston, MA 02111-1307, USA.  */
  29
  30 /* Assembler register definitions.  */
  31
  32 #define A0 r0
  33 #define A0L r0l
  34 #define A0H r0h
  35
  36 #define A1 r1
  37 #define A1L r1l
  38 #define A1H r1h
  39
  40 #define A2 r2
  41 #define A2L r2l
  42 #define A2H r2h
  43
  44 #define A3 r3
  45 #define A3L r3l
  46 #define A3H r3h
  47
  48 #define S0 r4
  49 #define S0L r4l
  50 #define S0H r4h
  51
  52 #define S1 r5
  53 #define S1L r5l
  54 #define S1H r5h
  55
  56 #define S2 r6
  57 #define S2L r6l
  58 #define S2H r6h
  59
  60 #ifdef __H8300__
  61 #define MOVP    mov.w   /* pointers are 16 bits */
  62 #define ADDP    add.w
  63 #define CMPP    cmp.w
  64 #define PUSHP   push
  65 #define POPP    pop
  66
  67 #define A0P     r0
  68 #define A1P     r1
  69 #define A2P     r2
  70 #define A3P     r3
  71 #define S0P     r4
  72 #define S1P     r5
  73 #define S2P     r6
  74 #endif
  75
  76 #if defined (__H8300H__) || defined (__H8300S__)
  77 #define MOVP    mov.l   /* pointers are 32 bits */
  78 #define ADDP    add.l
  79 #define CMPP    cmp.l
  80 #define PUSHP   push.l
  81 #define POPP    pop.l
  82
  83 #define A0P     er0
  84 #define A1P     er1
  85 #define A2P     er2
  86 #define A3P     er3
  87 #define S0P     er4
  88 #define S1P     er5
  89 #define S2P     er6
  90
  91 #define A0E     e0
  92 #define A1E     e1
  93 #define A2E     e2
  94 #define A3E     e3
  95 #endif
  96
  97 #ifdef __H8300H__
  98         .h8300h
  99 #endif
 100
 101 #ifdef __H8300S__
 102         .h8300s
 103 #endif
 104
 105 #ifdef L_cmpsi2
 106 #ifdef __H8300__
 107         .section .text
 108         .align 2
 109         .global ___cmpsi2
 110 ___cmpsi2:
 111         cmp.w   A2,A0
 112         bne     .L2
 113         cmp.w   A3,A1
 114         bne     .L2
 115         mov.w   #1,A0
 116         rts
 117 .L2:
 118         cmp.w   A0,A2
 119         bgt     .L4
 120         bne     .L3
 121         cmp.w   A1,A3
 122         bls     .L3
 123 .L4:
 124         sub.w   A0,A0
 125         rts
 126 .L3:
 127         mov.w   #2,A0
 128 .L5:
 129         rts
 130         .end
 131 #endif
 132 #endif /* L_cmpsi2 */
 133
 134 #ifdef L_ucmpsi2
 135 #ifdef __H8300__
 136         .section .text
 137         .align 2
 138         .global ___ucmpsi2
 139 ___ucmpsi2:
 140         cmp.w   A2,A0
 141         bne     .L2
 142         cmp.w   A3,A1
 143         bne     .L2
 144         mov.w   #1,A0
 145         rts
 146 .L2:
 147         cmp.w   A0,A2
 148         bhi     .L4
 149         bne     .L3
 150         cmp.w   A1,A3
 151         bls     .L3
 152 .L4:
 153         sub.w   A0,A0
 154         rts
 155 .L3:
 156         mov.w   #2,A0
 157 .L5:
 158         rts
 159         .end
 160 #endif
 161 #endif /* L_ucmpsi2 */
 162
 163 #ifdef L_divhi3
 164
 165 ;; HImode divides for the H8/300.
 166 ;; We bunch all of this into one object file since there are several
 167 ;; "supporting routines".
 168
 169 ; general purpose normalize routine
 170 ;
 171 ; divisor in A0
 172 ; dividend in A1
 173 ; turns both into +ve numbers, and leaves what the answer sign
 174 ; should be in A2L
 175
 176 #ifdef __H8300__
 177         .section .text
 178         .align 2
 179 divnorm:
 180         mov.b   #0x0,A2L
 181         or      A0H,A0H         ; is divisor > 0
 182         bge     _lab1
 183         not     A0H             ; no - then make it +ve
 184         not     A0L
 185         adds    #1,A0
 186         xor     #0x1,A2L        ; and remember that in A2L
 187 _lab1:  or      A1H,A1H ; look at dividend
 188         bge     _lab2
 189         not     A1H             ; it is -ve, make it positive
 190         not     A1L
 191         adds    #1,A1
 192         xor     #0x1,A2L; and toggle sign of result
 193 _lab2:  rts
 194 ;; Basically the same, except that the sign of the divisor determines
 195 ;; the sign.
 196 modnorm:
 197         mov.b   #0x0,A2L
 198         or      A0H,A0H         ; is divisor > 0
 199         bge     _lab7
 200         not     A0H             ; no - then make it +ve
 201         not     A0L
 202         adds    #1,A0
 203         xor     #0x1,A2L        ; and remember that in A2L
 204 _lab7:  or      A1H,A1H ; look at dividend
 205         bge     _lab8
 206         not     A1H             ; it is -ve, make it positive
 207         not     A1L
 208         adds    #1,A1
 209 _lab8:  rts
 210
 211 ; A0=A0/A1 signed
 212
 213         .global ___divhi3
 214 ___divhi3:
 215         bsr     divnorm
 216         bsr     ___udivhi3
 217 negans: or      A2L,A2L ; should answer be negative ?
 218         beq     _lab4
 219         not     A0H     ; yes, so make it so
 220         not     A0L
 221         adds    #1,A0
 222 _lab4:  rts
 223
 224 ; A0=A0%A1 signed
 225
 226         .global ___modhi3
 227 ___modhi3:
 228         bsr     modnorm
 229         bsr     ___udivhi3
 230         mov     A3,A0
 231         bra     negans
 232
 233 ; A0=A0%A1 unsigned
 234
 235         .global ___umodhi3
 236 ___umodhi3:
 237         bsr     ___udivhi3
 238         mov     A3,A0
 239         rts
 240
 241 ; A0=A0/A1 unsigned
 242 ; A3=A0%A1 unsigned
 243 ; A2H trashed
 244 ; D high 8 bits of denom
 245 ; d low 8 bits of denom
 246 ; N high 8 bits of num
 247 ; n low 8 bits of num
 248 ; M high 8 bits of mod
 249 ; m low 8 bits of mod
 250 ; Q high 8 bits of quot
 251 ; q low 8 bits of quot
 252 ; P preserve
 253
 254 ; The h8 only has a 16/8 bit divide, so we look at the incoming and
 255 ; see how to partition up the expression.
 256
 257         .global ___udivhi3
 258 ___udivhi3:
 259                                 ; A0 A1 A2 A3
 260                                 ; Nn Dd       P
 261         sub.w   A3,A3           ; Nn Dd xP 00
 262         or      A1H,A1H
 263         bne     divlongway
 264         or      A0H,A0H
 265         beq     _lab6
 266
 267 ; we know that D == 0 and N is != 0
 268         mov.b   A0H,A3L         ; Nn Dd xP 0N
 269         divxu   A1L,A3          ;          MQ
 270         mov.b   A3L,A0H         ; Q
 271 ; dealt with N, do n
 272 _lab6:  mov.b   A0L,A3L         ;           n
 273         divxu   A1L,A3          ;          mq
 274         mov.b   A3L,A0L         ; Qq
 275         mov.b   A3H,A3L         ;           m
 276         mov.b   #0x0,A3H        ; Qq       0m
 277         rts
 278
 279 ; D != 0 - which means the denominator is
 280 ;          loop around to get the result.
 281
 282 divlongway:
 283         mov.b   A0H,A3L         ; Nn Dd xP 0N
 284         mov.b   #0x0,A0H        ; high byte of answer has to be zero
 285         mov.b   #0x8,A2H        ;       8
 286 div8:   add.b   A0L,A0L         ; n*=2
 287         rotxl   A3L             ; Make remainder bigger
 288         rotxl   A3H
 289         sub.w   A1,A3           ; Q-=N
 290         bhs     setbit          ; set a bit ?
 291         add.w   A1,A3           ;  no : too far , Q+=N
 292
 293         dec     A2H
 294         bne     div8            ; next bit
 295         rts
 296
 297 setbit: inc     A0L             ; do insert bit
 298         dec     A2H
 299         bne     div8            ; next bit
 300         rts
 301
 302 #endif /* __H8300__ */
 303 #endif /* L_divhi3 */
 304
 305 #ifdef L_divsi3
 306
 307 ;; 4 byte integer divides for the H8/300.
 308 ;;
 309 ;; We have one routine which does all the work and lots of
 310 ;; little ones which prepare the args and massage the sign.
 311 ;; We bunch all of this into one object file since there are several
 312 ;; "supporting routines".
 313
 314         .section .text
 315         .align 2
 316
 317 ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
 318 ; This function is here to keep branch displacements small.
 319
 320 #ifdef __H8300__
 321
 322 divnorm:
 323         mov.b   #0,S2L          ; keep the sign in S2
 324         mov.b   A0H,A0H         ; is the numerator -ve
 325         bge     postive
 326
 327         ; negate arg
 328         not     A0H
 329         not     A1H
 330         not     A0L
 331         not     A1L
 332
 333         add     #1,A1L
 334         addx    #0,A1H
 335         addx    #0,A0L
 336         addx    #0,A0H
 337
 338         mov.b   #1,S2L          ; the sign will be -ve
 339 postive:
 340         mov.b   A2H,A2H         ; is the denominator -ve
 341         bge     postive2
 342         not     A2L
 343         not     A2H
 344         not     A3L
 345         not     A3H
 346         add.b   #1,A3L
 347         addx    #0,A3H
 348         addx    #0,A2L
 349         addx    #0,A2H
 350         xor     #1,S2L          ; toggle result sign
 351 postive2:
 352         rts
 353
 354 ;; Basically the same, except that the sign of the divisor determines
 355 ;; the sign.
 356 modnorm:
 357         mov.b   #0,S2L          ; keep the sign in S2
 358         mov.b   A0H,A0H         ; is the numerator -ve
 359         bge     mpostive
 360
 361         ; negate arg
 362         not     A0H
 363         not     A1H
 364         not     A0L
 365         not     A1L
 366
 367         add     #1,A1L
 368         addx    #0,A1H
 369         addx    #0,A0L
 370         addx    #0,A0H
 371
 372         mov.b   #1,S2L          ; the sign will be -ve
 373 mpostive:
 374         mov.b   A2H,A2H         ; is the denominator -ve
 375         bge     mpostive2
 376         not     A2L
 377         not     A2H
 378         not     A3L
 379         not     A3H
 380         add.b   #1,A3L
 381         addx    #0,A3H
 382         addx    #0,A2L
 383         addx    #0,A2H
 384 mpostive2:
 385         rts
 386
 387 #else /* __H8300H__ */
 388
 389 divnorm:
 390         mov.b   #0,S2L          ; keep the sign in S2
 391         mov.l   A0P,A0P         ; is the numerator -ve
 392         bge     postive
 393
 394         neg.l   A0P             ; negate arg
 395         mov.b   #1,S2L          ; the sign will be -ve
 396
 397 postive:
 398         mov.l   A1P,A1P         ; is the denominator -ve
 399         bge     postive2
 400
 401         neg.l   A1P             ; negate arg
 402         xor.b   #1,S2L          ; toggle result sign
 403
 404 postive2:
 405         rts
 406
 407 ;; Basically the same, except that the sign of the divisor determines
 408 ;; the sign.
 409 modnorm:
 410         mov.b   #0,S2L          ; keep the sign in S2
 411         mov.l   A0P,A0P         ; is the numerator -ve
 412         bge     mpostive
 413
 414         neg.l   A0P             ; negate arg
 415         mov.b   #1,S2L          ; the sign will be -ve
 416
 417 mpostive:
 418         mov.l   A1P,A1P         ; is the denominator -ve
 419         bge     mpostive2
 420
 421         neg.l   A1P             ; negate arg
 422
 423 mpostive2:
 424         rts
 425
 426 #endif
 427
 428 ; numerator in A0/A1
 429 ; denominator in A2/A3
 430         .global ___modsi3
 431 ___modsi3:
 432         PUSHP   S2P
 433         PUSHP   S0P
 434         PUSHP   S1P
 435
 436         bsr     modnorm
 437         bsr     divmodsi4
 438 #ifdef __H8300__
 439         mov     S0,A0
 440         mov     S1,A1
 441 #else
 442         mov.l   S0P,A0P
 443 #endif
 444         bra     exitdiv
 445
 446         .global ___udivsi3
 447 ___udivsi3:
 448         PUSHP   S2P
 449         PUSHP   S0P
 450         PUSHP   S1P
 451         mov.b   #0,S2L  ; keep sign low
 452         bsr     divmodsi4
 453         bra     exitdiv
 454
 455         .global ___umodsi3
 456 ___umodsi3:
 457         PUSHP   S2P
 458         PUSHP   S0P
 459         PUSHP   S1P
 460         mov.b   #0,S2L  ; keep sign low
 461         bsr     divmodsi4
 462 #ifdef __H8300__
 463         mov     S0,A0
 464         mov     S1,A1
 465 #else
 466         mov.l   S0P,A0P
 467 #endif
 468         bra     exitdiv
 469
 470         .global ___divsi3
 471 ___divsi3:
 472         PUSHP   S2P
 473         PUSHP   S0P
 474         PUSHP   S1P
 475         jsr     divnorm
 476         jsr     divmodsi4
 477
 478         ; examine what the sign should be
 479 exitdiv:
 480         POPP    S1P
 481         POPP    S0P
 482
 483         or      S2L,S2L
 484         beq     reti
 485
 486         ; should be -ve
 487 #ifdef __H8300__
 488         not     A0H
 489         not     A1H
 490         not     A0L
 491         not     A1L
 492
 493         add     #1,A1L
 494         addx    #0,A1H
 495         addx    #0,A0L
 496         addx    #0,A0H
 497 #else /* __H8300H__ */
 498         neg.l   A0P
 499 #endif
 500
 501 reti:
 502         POPP    S2P
 503         rts
 504
 505         ; takes A0/A1 numerator (A0P for 300h)
 506         ; A2/A3 denominator (A1P for 300h)
 507         ; returns A0/A1 quotient (A0P for 300h)
 508         ; S0/S1 remainder (S0P for 300h)
 509         ; trashes S2
 510
 511 #ifdef __H8300__
 512
 513 divmodsi4:
 514         sub.w   S0,S0           ; zero play area
 515         mov.w   S0,S1
 516         mov.b   A2H,S2H
 517         or      A2L,S2H
 518         or      A3H,S2H
 519         bne     DenHighZero
 520         mov.b   A0H,A0H
 521         bne     NumByte0Zero
 522         mov.b   A0L,A0L
 523         bne     NumByte1Zero
 524         mov.b   A1H,A1H
 525         bne     NumByte2Zero
 526         bra     NumByte3Zero
 527 NumByte0Zero:
 528         mov.b   A0H,S1L
 529         divxu   A3L,S1
 530         mov.b   S1L,A0H
 531 NumByte1Zero:
 532         mov.b   A0L,S1L
 533         divxu   A3L,S1
 534         mov.b   S1L,A0L
 535 NumByte2Zero:
 536         mov.b   A1H,S1L
 537         divxu   A3L,S1
 538         mov.b   S1L,A1H
 539 NumByte3Zero:
 540         mov.b   A1L,S1L
 541         divxu   A3L,S1
 542         mov.b   S1L,A1L
 543
 544         mov.b   S1H,S1L
 545         mov.b   #0x0,S1H
 546         rts
 547
 548 ; have to do the divide by shift and test
 549 DenHighZero:
 550         mov.b   A0H,S1L
 551         mov.b   A0L,A0H
 552         mov.b   A1H,A0L
 553         mov.b   A1L,A1H
 554
 555         mov.b   #0,A1L
 556         mov.b   #24,S2H ; only do 24 iterations
 557
 558 nextbit:
 559         add.w   A1,A1   ; double the answer guess
 560         rotxl   A0L
 561         rotxl   A0H
 562
 563         rotxl   S1L     ; double remainder
 564         rotxl   S1H
 565         rotxl   S0L
 566         rotxl   S0H
 567         sub.w   A3,S1   ; does it all fit
 568         subx    A2L,S0L
 569         subx    A2H,S0H
 570         bhs     setone
 571
 572         add.w   A3,S1   ; no, restore mistake
 573         addx    A2L,S0L
 574         addx    A2H,S0H
 575
 576         dec     S2H
 577         bne     nextbit
 578         rts
 579
 580 setone:
 581         inc     A1L
 582         dec     S2H
 583         bne     nextbit
 584         rts
 585
 586 #else /* __H8300H__ */
 587
 588 divmodsi4:
 589         sub.l   S0P,S0P         ; zero play area
 590         mov.w   A1E,A1E         ; denominator top word 0?
 591         bne     DenHighZero
 592
 593         ; do it the easy way, see page 107 in manual
 594         mov.w   A0E,A2
 595         extu.l  A2P
 596         divxu.w A1,A2P
 597         mov.w   A2E,A0E
 598         divxu.w A1,A0P
 599         mov.w   A0E,S0
 600         mov.w   A2,A0E
 601         extu.l  S0P
 602         rts
 603
 604 DenHighZero:
 605         mov.w   A0E,A2
 606         mov.b   A2H,S0L
 607         mov.b   A2L,A2H
 608         mov.b   A0H,A2L
 609         mov.w   A2,A0E
 610         mov.b   A0L,A0H
 611         mov.b   #0,A0L
 612         mov.b   #24,S2H         ; only do 24 iterations
 613
 614 nextbit:
 615         shll.l  A0P             ; double the answer guess
 616         rotxl.l S0P             ; double remainder
 617         sub.l   A1P,S0P         ; does it all fit?
 618         bhs     setone
 619
 620         add.l   A1P,S0P         ; no, restore mistake
 621         dec     S2H
 622         bne     nextbit
 623         rts
 624
 625 setone:
 626         inc     A0L
 627         dec     S2H
 628         bne     nextbit
 629         rts
 630
 631 #endif
 632 #endif /* L_divsi3 */
 633
 634 #ifdef L_mulhi3
 635
 636 ;; HImode multiply.
 637 ; The h8 only has an 8*8->16 multiply.
 638 ; The answer is the same as:
 639 ;
 640 ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
 641 ; (we can ignore A1.h * A0.h cause that will all off the top)
 642 ; A0 in
 643 ; A1 in
 644 ; A0 answer
 645
 646 #ifdef __H8300__
 647         .section .text
 648         .align 2
 649         .global ___mulhi3
 650 ___mulhi3:
 651         mov.b   A1L,A2L         ; A2l gets srcb.l
 652         mulxu   A0L,A2          ; A2 gets first sub product
 653
 654         mov.b   A0H,A3L         ; prepare for
 655         mulxu   A1L,A3          ; second sub product
 656
 657         add.b   A3L,A2H         ; sum first two terms
 658
 659         mov.b   A1H,A3L         ; third sub product
 660         mulxu   A0L,A3
 661
 662         add.b   A3L,A2H         ; almost there
 663         mov.w   A2,A0           ; that is
 664         rts
 665
 666 #endif
 667 #endif /* L_mulhi3 */
 668
 669 #ifdef L_mulsi3
 670
 671 ;; SImode multiply.
 672 ;;
 673 ;; I think that shift and add may be sufficient for this.  Using the
 674 ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead.  This way
 675 ;; the inner loop uses maybe 20 cycles + overhead, but terminates
 676 ;; quickly on small args.
 677 ;;
 678 ;; A0/A1 src_a
 679 ;; A2/A3 src_b
 680 ;;
 681 ;;  while (a)
 682 ;;    {
 683 ;;      if (a & 1)
 684 ;;        r += b;
 685 ;;      a >>= 1;
 686 ;;      b <<= 1;
 687 ;;    }
 688
 689         .section .text
 690         .align 2
 691
 692 #ifdef __H8300__
 693
 694         .global ___mulsi3
 695 ___mulsi3:
 696         PUSHP   S0P
 697         PUSHP   S1P
 698         PUSHP   S2P
 699
 700         sub.w   S0,S0
 701         sub.w   S1,S1
 702
 703         ; while (a)
 704 _top:   mov.w   A0,A0
 705         bne     _more
 706         mov.w   A1,A1
 707         beq     _done
 708 _more:  ; if (a & 1)
 709         bld     #0,A1L
 710         bcc     _nobit
 711         ; r += b
 712         add.w   A3,S1
 713         addx    A2L,S0L
 714         addx    A2H,S0H
 715 _nobit:
 716         ; a >>= 1
 717         shlr    A0H
 718         rotxr   A0L
 719         rotxr   A1H
 720         rotxr   A1L
 721
 722         ; b <<= 1
 723         add.w   A3,A3
 724         addx    A2L,A2L
 725         addx    A2H,A2H
 726         bra     _top
 727
 728 _done:
 729         mov.w   S0,A0
 730         mov.w   S1,A1
 731         POPP    S2P
 732         POPP    S1P
 733         POPP    S0P
 734         rts
 735
 736 #else /* __H8300H__ */
 737
 738         .global ___mulsi3
 739 ___mulsi3:
 740         sub.l   A2P,A2P
 741
 742         ; while (a)
 743 _top:   mov.l   A0P,A0P
 744         beq     _done
 745
 746         ; if (a & 1)
 747         bld     #0,A0L
 748         bcc     _nobit
 749
 750         ; r += b
 751         add.l   A1P,A2P
 752
 753 _nobit:
 754         ; a >>= 1
 755         shlr.l  A0P
 756
 757         ; b <<= 1
 758         shll.l  A1P
 759         bra     _top
 760
 761 _done:
 762         mov.l   A2P,A0P
 763         rts
 764
 765 #endif
 766 #endif /* L_mulsi3 */