gcc/config/h8300/lib1funcs.asm

   1 ;; libgcc1 routines for the Hitachi h8/300 cpu.
   2 ;; Contributed by Steve Chamberlain.
   3 ;; sac@cygnus.com
   4
   5 /* Copyright (C) 1994 Free Software Foundation, Inc.
   6
   7 This file is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 2, or (at your option) any
  10 later version.
  11
  12 In addition to the permissions in the GNU General Public License, the
  13 Free Software Foundation gives you unlimited permission to link the
  14 compiled version of this file with other programs, and to distribute
  15 those programs without any restriction coming from the use of this
  16 file.  (The General Public License restrictions do apply in other
  17 respects; for example, they cover modification of the file, and
  18 distribution when not linked into another program.)
  19
  20 This file is distributed in the hope that it will be useful, but
  21 WITHOUT ANY WARRANTY; without even the implied warranty of
  22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  23 General Public License for more details.
  24
  25 You should have received a copy of the GNU General Public License
  26 along with this program; see the file COPYING.  If not, write to
  27 the Free Software Foundation, 59 Temple Place - Suite 330,
  28 Boston, MA 02111-1307, USA.  */
  29
  30 /* As a special exception, if you link this library with other files,
  31    some of which are compiled with GCC, to produce an executable,
  32    this library does not by itself cause the resulting executable
  33    to be covered by the GNU General Public License.
  34    This exception does not however invalidate any other reasons why
  35    the executable file might be covered by the GNU General Public License.  */
  36
  37 /* Assembler register definitions.  */
  38
  39 #define A0 r0
  40 #define A0L r0l
  41 #define A0H r0h
  42
  43 #define A1 r1
  44 #define A1L r1l
  45 #define A1H r1h
  46
  47 #define A2 r2
  48 #define A2L r2l
  49 #define A2H r2h
  50
  51 #define A3 r3
  52 #define A3L r3l
  53 #define A3H r3h
  54
  55 #define S0 r4
  56 #define S0L r4l
  57 #define S0H r4h
  58
  59 #define S1 r5
  60 #define S1L r5l
  61 #define S1H r5h
  62
  63 #define S2 r6
  64 #define S2L r6l
  65 #define S2H r6h
  66
  67 #ifdef __H8300__
  68 #define MOVP    mov.w   /* pointers are 16 bits */
  69 #define ADDP    add.w
  70 #define CMPP    cmp.w
  71 #define PUSHP   push
  72 #define POPP    pop
  73
  74 #define A0P     r0
  75 #define A1P     r1
  76 #define A2P     r2
  77 #define A3P     r3
  78 #define S0P     r4
  79 #define S1P     r5
  80 #define S2P     r6
  81 #endif
  82
  83 #ifdef __H8300H__
  84 #define MOVP    mov.l   /* pointers are 32 bits */
  85 #define ADDP    add.l
  86 #define CMPP    cmp.l
  87 #define PUSHP   push.l
  88 #define POPP    pop.l
  89
  90 #define A0P     er0
  91 #define A1P     er1
  92 #define A2P     er2
  93 #define A3P     er3
  94 #define S0P     er4
  95 #define S1P     er5
  96 #define S2P     er6
  97
  98 #define A0E     e0
  99 #define A1E     e1
 100 #define A2E     e2
 101 #define A3E     e3
 102 #endif
 103
 104 #ifdef L_cmpsi2
 105 #ifdef __H8300__
 106         .section .text
 107         .align 2
 108         .global ___cmpsi2
 109 ___cmpsi2:
 110         cmp.w   A2,A0
 111         bne     .L2
 112         cmp.w   A3,A1
 113         bne     .L2
 114         mov.w   #1,A0
 115         rts
 116 .L2:
 117         cmp.w   A0,A2
 118         bgt     .L4
 119         bne     .L3
 120         cmp.w   A1,A3
 121         bls     .L3
 122 .L4:
 123         sub.w   A0,A0
 124         rts
 125 .L3:
 126         mov.w   #2,A0
 127 .L5:
 128         rts
 129         .end
 130 #endif
 131 #endif /* L_cmpsi2 */
 132
 133 #ifdef L_ucmpsi2
 134 #ifdef __H8300__
 135         .section .text
 136         .align 2
 137         .global ___ucmpsi2
 138 ___ucmpsi2:
 139         cmp.w   A2,A0
 140         bne     .L2
 141         cmp.w   A3,A1
 142         bne     .L2
 143         mov.w   #1,A0
 144         rts
 145 .L2:
 146         cmp.w   A0,A2
 147         bhi     .L4
 148         bne     .L3
 149         cmp.w   A1,A3
 150         bls     .L3
 151 .L4:
 152         sub.w   A0,A0
 153         rts
 154 .L3:
 155         mov.w   #2,A0
 156 .L5:
 157         rts
 158         .end
 159 #endif
 160 #endif /* L_ucmpsi2 */
 161
 162 #ifdef L_divhi3
 163
 164 ;; HImode divides for the H8/300.
 165 ;; We bunch all of this into one object file since there are several
 166 ;; "supporting routines".
 167
 168 ; general purpose normalize routine
 169 ;
 170 ; divisor in A0
 171 ; dividend in A1
 172 ; turns both into +ve numbers, and leaves what the answer sign
 173 ; should be in A2L
 174
 175 #ifdef __H8300__
 176         .section .text
 177         .align 2
 178 divnorm:
 179         mov.b   #0x0,A2L
 180         or      A0H,A0H         ; is divisor > 0
 181         bge     _lab1
 182         not     A0H             ; no - then make it +ve
 183         not     A0L
 184         adds    #1,A0
 185         xor     #0x1,A2L        ; and remember that in A2L
 186 _lab1:  or      A1H,A1H ; look at dividend
 187         bge     _lab2
 188         not     A1H             ; it is -ve, make it positive
 189         not     A1L
 190         adds    #1,A1
 191         xor     #0x1,A2L; and toggle sign of result
 192 _lab2:  rts
 193
 194 ; A0=A0/A1 signed
 195
 196         .global ___divhi3
 197 ___divhi3:
 198         bsr     divnorm
 199         bsr     ___udivhi3
 200 negans: or      A2L,A2L ; should answer be negative ?
 201         beq     _lab4
 202         not     A0H     ; yes, so make it so
 203         not     A0L
 204         adds    #1,A0
 205 _lab4:  rts
 206
 207 ; A0=A0%A1 signed
 208
 209         .global ___modhi3
 210 ___modhi3:
 211         bsr     divnorm
 212         bsr     ___udivhi3
 213         mov     A3,A0
 214         bra     negans
 215
 216 ; A0=A0%A1 unsigned
 217
 218         .global ___umodhi3
 219 ___umodhi3:
 220         bsr     ___udivhi3
 221         mov     A3,A0
 222         rts
 223
 224 ; A0=A0/A1 unsigned
 225 ; A3=A0%A1 unsigned
 226 ; A2H trashed
 227 ; D high 8 bits of denom
 228 ; d low 8 bits of denom
 229 ; N high 8 bits of num
 230 ; n low 8 bits of num
 231 ; M high 8 bits of mod
 232 ; m low 8 bits of mod
 233 ; Q high 8 bits of quot
 234 ; q low 8 bits of quot
 235 ; P preserve
 236
 237 ; The h8 only has a 16/8 bit divide, so we look at the incoming and
 238 ; see how to partition up the expression.
 239
 240         .global ___udivhi3
 241 ___udivhi3:
 242                                 ; A0 A1 A2 A3
 243                                 ; Nn Dd       P
 244         sub.w   A3,A3           ; Nn Dd xP 00
 245         or      A1H,A1H
 246         bne     divlongway
 247         or      A0H,A0H
 248         beq     _lab6
 249
 250 ; we know that D == 0 and N is != 0
 251         mov.b   A0H,A3L         ; Nn Dd xP 0N
 252         divxu   A1L,A3          ;          MQ
 253         mov.b   A3L,A0H         ; Q
 254 ; dealt with N, do n
 255 _lab6:  mov.b   A0L,A3L         ;           n
 256         divxu   A1L,A3          ;          mq
 257         mov.b   A3L,A0L         ; Qq
 258         mov.b   A3H,A3L         ;           m
 259         mov.b   #0x0,A3H        ; Qq       0m
 260         rts
 261
 262 ; D != 0 - which means the denominator is
 263 ;          loop around to get the result.
 264
 265 divlongway:
 266         mov.b   A0H,A3L         ; Nn Dd xP 0N
 267         mov.b   #0x0,A0H        ; high byte of answer has to be zero
 268         mov.b   #0x8,A2H        ;       8
 269 div8:   add.b   A0L,A0L         ; n*=2
 270         rotxl   A3L             ; Make remainder bigger
 271         rotxl   A3H
 272         sub.w   A1,A3           ; Q-=N
 273         bhs     setbit          ; set a bit ?
 274         add.w   A1,A3           ;  no : too far , Q+=N
 275
 276         dec     A2H
 277         bne     div8            ; next bit
 278         rts
 279
 280 setbit: inc     A0L             ; do insert bit
 281         dec     A2H
 282         bne     div8            ; next bit
 283         rts
 284
 285 #endif /* __H8300__ */
 286 #endif /* L_divhi3 */
 287
 288 #ifdef L_divsi3
 289
 290 ;; 4 byte integer divides for the H8/300.
 291 ;;
 292 ;; We have one routine which does all the work and lots of
 293 ;; little ones which prepare the args and massage the sign.
 294 ;; We bunch all of this into one object file since there are several
 295 ;; "supporting routines".
 296
 297 #ifdef __H8300H__
 298         .h8300h
 299 #endif
 300
 301         .section .text
 302         .align 2
 303
 304 ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
 305 ; This function is here to keep branch displacements small.
 306
 307 #ifdef __H8300__
 308
 309 divnorm:
 310         mov.b   #0,S2L          ; keep the sign in S2
 311         mov.b   A0H,A0H         ; is the numerator -ve
 312         bge     postive
 313
 314         ; negate arg
 315         not     A0H
 316         not     A1H
 317         not     A0L
 318         not     A1L
 319
 320         add     #1,A1L
 321         addx    #0,A1H
 322         addx    #0,A0H
 323         addx    #0,A0L
 324
 325         mov.b   #1,S2L          ; the sign will be -ve
 326 postive:
 327         mov.b   A2H,A2H         ; is the denominator -ve
 328         bge     postive2
 329         not     A2L
 330         not     A2H
 331         not     A3L
 332         not     A3H
 333         add.b   #1,A3L
 334         addx    #0,A3H
 335         addx    #0,A2L
 336         addx    #0,A2H
 337         xor     #1,S2L          ; toggle result sign
 338 postive2:
 339         rts
 340
 341 #else /* __H8300H__ */
 342
 343 divnorm:
 344         mov.b   #0,S2L          ; keep the sign in S2
 345         mov.l   A0P,A0P         ; is the numerator -ve
 346         bge     postive
 347
 348         neg.l   A0P             ; negate arg
 349         mov.b   #1,S2L          ; the sign will be -ve
 350
 351 postive:
 352         mov.l   A1P,A1P         ; is the denominator -ve
 353         bge     postive2
 354
 355         neg.l   A1P             ; negate arg
 356         xor.b   #1,S2L          ; toggle result sign
 357
 358 postive2:
 359         rts
 360
 361 #endif
 362
 363 ; numerator in A0/A1
 364 ; denominator in A2/A3
 365         .global ___modsi3
 366 ___modsi3:
 367         PUSHP   S2P
 368         PUSHP   S0P
 369         PUSHP   S1P
 370
 371         bsr     divnorm
 372         bsr     divmodsi4
 373 #ifdef __H8300__
 374         mov     S0,A0
 375         mov     S1,A1
 376 #else
 377         mov.l   S0P,A0P
 378 #endif
 379         bra     exitdiv
 380
 381         .global ___udivsi3
 382 ___udivsi3:
 383         PUSHP   S2P
 384         PUSHP   S0P
 385         PUSHP   S1P
 386         mov.b   #0,S2L  ; keep sign low
 387         bsr     divmodsi4
 388         bra     exitdiv
 389
 390         .global ___umodsi3
 391 ___umodsi3:
 392         PUSHP   S2P
 393         PUSHP   S0P
 394         PUSHP   S1P
 395         mov.b   #0,S2L  ; keep sign low
 396         bsr     divmodsi4
 397 #ifdef __H8300__
 398         mov     S0,A0
 399         mov     S1,A1
 400 #else
 401         mov.l   S0P,A0P
 402 #endif
 403         bra     exitdiv
 404
 405         .global ___divsi3
 406 ___divsi3:
 407         PUSHP   S2P
 408         PUSHP   S0P
 409         PUSHP   S1P
 410         jsr     divnorm
 411         jsr     divmodsi4
 412
 413         ; examine what the sign should be
 414 exitdiv:
 415         POPP    S1P
 416         POPP    S0P
 417
 418         or      S2L,S2L
 419         beq     reti
 420
 421         ; should be -ve
 422 #ifdef __H8300__
 423         not     A0H
 424         not     A1H
 425         not     A0L
 426         not     A1L
 427
 428         add     #1,A1L
 429         addx    #0,A1H
 430         addx    #0,A0H
 431         addx    #0,A0L
 432 #else /* __H8300H__ */
 433         neg.l   A0P
 434 #endif
 435
 436 reti:
 437         POPP    S2P
 438         rts
 439
 440         ; takes A0/A1 numerator (A0P for 300h)
 441         ; A2/A3 denominator (A1P for 300h)
 442         ; returns A0/A1 quotient (A0P for 300h)
 443         ; S0/S1 remainder (S0P for 300h)
 444         ; trashes S2
 445
 446 #ifdef __H8300__
 447
 448 divmodsi4:
 449         sub.w   S0,S0           ; zero play area
 450         mov.w   S0,S1
 451         mov.b   A2H,S2H
 452         or      A2L,S2H
 453         or      A3H,S2H
 454         bne     DenHighZero
 455         mov.b   A0H,A0H
 456         bne     NumByte0Zero
 457         mov.b   A0L,A0L
 458         bne     NumByte1Zero
 459         mov.b   A1H,A1H
 460         bne     NumByte2Zero
 461         bra     NumByte3Zero
 462 NumByte0Zero:
 463         mov.b   A0H,S1L
 464         divxu   A3L,S1
 465         mov.b   S1L,A0H
 466 NumByte1Zero:
 467         mov.b   A0L,S1L
 468         divxu   A3L,S1
 469         mov.b   S1L,A0L
 470 NumByte2Zero:
 471         mov.b   A1H,S1L
 472         divxu   A3L,S1
 473         mov.b   S1L,A1H
 474 NumByte3Zero:
 475         mov.b   A1L,S1L
 476         divxu   A3L,S1
 477         mov.b   S1L,A1L
 478
 479         mov.b   S1H,S1L
 480         mov.b   #0x0,S1H
 481         rts
 482
 483 ; have to do the divide by shift and test
 484 DenHighZero:
 485         mov.b   A0H,S1L
 486         mov.b   A0L,A0H
 487         mov.b   A1H,A0L
 488         mov.b   A1L,A1H
 489
 490         mov.b   #0,A1L
 491         mov.b   #24,S2H ; only do 24 iterations
 492
 493 nextbit:
 494         add.w   A1,A1   ; double the answer guess
 495         rotxl   A0L
 496         rotxl   A0H
 497
 498         rotxl   S1L     ; double remainder
 499         rotxl   S1H
 500         rotxl   S0L
 501         rotxl   S0H
 502         sub.w   A3,S1   ; does it all fit
 503         subx    A2L,S0L
 504         subx    A2H,S0H
 505         bhs     setone
 506
 507         add.w   A3,S1   ; no, restore mistake
 508         addx    A2L,S0L
 509         addx    A2H,S0H
 510
 511         dec     S2H
 512         bne     nextbit
 513         rts
 514
 515 setone:
 516         inc     A1L
 517         dec     S2H
 518         bne     nextbit
 519         rts
 520
 521 #else /* __H8300H__ */
 522
 523 divmodsi4:
 524         sub.l   S0P,S0P         ; zero play area
 525         mov.w   A1E,A1E         ; denominator top word 0?
 526         bne     DenHighZero
 527
 528         ; do it the easy way, see page 107 in manual
 529         mov.w   A0E,A2
 530         extu.l  A2P
 531         divxu.w A1,A2P
 532         mov.w   A2E,A0E
 533         divxu.w A1,A0P
 534         mov.w   A0E,S0
 535         mov.w   A2,A0E
 536         extu.l  S0P
 537         rts
 538
 539 DenHighZero:
 540         mov.w   A0E,A2
 541         mov.b   A2H,S0L
 542         mov.b   A2L,A2H
 543         mov.b   A0H,A2L
 544         mov.w   A2,A0E
 545         mov.b   A0L,A0H
 546         mov.b   #0,A0L
 547         mov.b   #24,S2H         ; only do 24 iterations
 548
 549 nextbit:
 550         shll.l  A0P             ; double the answer guess
 551         rotxl.l S0P             ; double remainder
 552         sub.l   A1P,S0P         ; does it all fit?
 553         bhs     setone
 554
 555         add.l   A1P,S0P         ; no, restore mistake
 556         dec     S2H
 557         bne     nextbit
 558         rts
 559
 560 setone:
 561         inc     A0L
 562         dec     S2H
 563         bne     nextbit
 564         rts
 565
 566 #endif
 567 #endif /* L_divsi3 */
 568
 569 #ifdef L_mulhi3
 570
 571 ;; HImode multiply.
 572 ; The h8 only has an 8*8->16 multiply.
 573 ; The answer is the same as:
 574 ;
 575 ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
 576 ; (we can ignore A1.h * A0.h cause that will all off the top)
 577 ; A0 in
 578 ; A1 in
 579 ; A0 answer
 580
 581 #ifdef __H8300__
 582         .section .text
 583         .align 2
 584         .global ___mulhi3
 585 ___mulhi3:
 586         mov.b   A1L,A2L         ; A2l gets srcb.l
 587         mulxu   A0L,A2          ; A2 gets first sub product
 588
 589         mov.b   A0H,A3L         ; prepare for
 590         mulxu   A1L,A3          ; second sub product
 591
 592         add.b   A3L,A2H         ; sum first two terms
 593
 594         mov.b   A1H,A3L         ; third sub product
 595         mulxu   A0L,A3
 596
 597         add.b   A3L,A2H         ; almost there
 598         mov.w   A2,A0           ; that is
 599         rts
 600
 601 #endif
 602 #endif /* L_mulhi3 */
 603
 604 #ifdef L_mulsi3
 605
 606 ;; SImode multiply.
 607 ;;
 608 ;; I think that shift and add may be sufficient for this.  Using the
 609 ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead.  This way
 610 ;; the inner loop uses maybe 20 cycles + overhead, but terminates
 611 ;; quickly on small args.
 612 ;;
 613 ;; A0/A1 src_a
 614 ;; A2/A3 src_b
 615 ;;
 616 ;;  while (a)
 617 ;;    {
 618 ;;      if (a & 1)
 619 ;;        r += b;
 620 ;;      a >>= 1;
 621 ;;      b <<= 1;
 622 ;;    }
 623
 624         .section .text
 625         .align 2
 626
 627 #ifdef __H8300__
 628
 629         .global ___mulsi3
 630 ___mulsi3:
 631         PUSHP   S0P
 632         PUSHP   S1P
 633         PUSHP   S2P
 634
 635         sub.w   S0,S0
 636         sub.w   S1,S1
 637
 638         ; while (a)
 639 _top:   mov.w   A0,A0
 640         bne     _more
 641         mov.w   A1,A1
 642         beq     _done
 643 _more:  ; if (a & 1)
 644         bld     #0,A1L
 645         bcc     _nobit
 646         ; r += b
 647         add.w   A3,S1
 648         addx    A2L,S0L
 649         addx    A2H,S0H
 650 _nobit:
 651         ; a >>= 1
 652         shlr    A0H
 653         rotxr   A0L
 654         rotxr   A1H
 655         rotxr   A1L
 656
 657         ; b <<= 1
 658         add.w   A3,A3
 659         addx    A2L,A2L
 660         addx    A2H,A2H
 661         bra     _top
 662
 663 _done:
 664         mov.w   S0,A0
 665         mov.w   S1,A1
 666         POPP    S2P
 667         POPP    S1P
 668         POPP    S0P
 669         rts
 670
 671 #else /* __H8300H__ */
 672
 673         .h8300h
 674
 675         .global ___mulsi3
 676 ___mulsi3:
 677         sub.l   A2P,A2P
 678
 679         ; while (a)
 680 _top:   mov.l   A0P,A0P
 681         beq     _done
 682
 683         ; if (a & 1)
 684         bld     #0,A0L
 685         bcc     _nobit
 686
 687         ; r += b
 688         add.l   A1P,A2P
 689
 690 _nobit:
 691         ; a >>= 1
 692         shlr.l  A0P
 693
 694         ; b <<= 1
 695         shll.l  A1P
 696         bra     _top
 697
 698 _done:
 699         mov.l   A2P,A0P
 700         rts
 701
 702 #endif
 703 #endif /* L_mulsi3 */