gcc/config/h8300/lib1funcs.asm

   1 ;; libgcc1 routines for the Hitachi h8/300 cpu.
   2 ;; Contributed by Steve Chamberlain.
   3 ;; sac@cygnus.com
   4 ;; This file is in the public domain.
   5
   6 /* Assembler register definitions.  */
   7
   8 #define A0 r0
   9 #define A0L r0l
  10 #define A0H r0h
  11
  12 #define A1 r1
  13 #define A1L r1l
  14 #define A1H r1h
  15
  16 #define A2 r2
  17 #define A2L r2l
  18 #define A2H r2h
  19
  20 #define A3 r3
  21 #define A3L r3l
  22 #define A3H r3h
  23
  24 #define S0 r4
  25 #define S0L r4l
  26 #define S0H r4h
  27
  28 #define S1 r5
  29 #define S1L r5l
  30 #define S1H r5h
  31
  32 #define S2 r6
  33 #define S2L r6l
  34 #define S2H r6h
  35
  36 #ifdef __H8300__
  37 #define MOVP    mov.w   /* pointers are 16 bits */
  38 #define ADDP    add.w
  39 #define CMPP    cmp.w
  40 #define PUSHP   push
  41 #define POPP    pop
  42
  43 #define A0P     r0
  44 #define A1P     r1
  45 #define A2P     r2
  46 #define A3P     r3
  47 #define S0P     r4
  48 #define S1P     r5
  49 #define S2P     r6
  50 #endif
  51
  52 #ifdef __H8300H__
  53 #define MOVP    mov.l   /* pointers are 32 bits */
  54 #define ADDP    add.l
  55 #define CMPP    cmp.l
  56 #define PUSHP   push.l
  57 #define POPP    pop.l
  58
  59 #define A0P     er0
  60 #define A1P     er1
  61 #define A2P     er2
  62 #define A3P     er3
  63 #define S0P     er4
  64 #define S1P     er5
  65 #define S2P     er6
  66
  67 #define A0E     e0
  68 #define A1E     e1
  69 #define A2E     e2
  70 #define A3E     e3
  71 #endif
  72
  73 #ifdef L_cmpsi2
  74 #ifdef __H8300__
  75         .section .text
  76         .align 2
  77         .global ___cmpsi2
  78 ___cmpsi2:
  79         cmp.w   A2,A0
  80         bne     .L2
  81         cmp.w   A3,A1
  82         bne     .L2
  83         mov.w   #1,A0
  84         rts
  85 .L2:
  86         cmp.w   A0,A2
  87         bgt     .L4
  88         bne     .L3
  89         cmp.w   A1,A3
  90         bls     .L3
  91 .L4:
  92         sub.w   A0,A0
  93         rts
  94 .L3:
  95         mov.w   #2,A0
  96 .L5:
  97         rts
  98         .end
  99 #endif
 100 #endif /* L_cmpsi2 */
 101
 102 #ifdef L_ucmpsi2
 103 #ifdef __H8300__
 104         .section .text
 105         .align 2
 106         .global ___ucmpsi2
 107 ___ucmpsi2:
 108         cmp.w   A2,A0
 109         bne     .L2
 110         cmp.w   A3,A1
 111         bne     .L2
 112         mov.w   #1,A0
 113         rts
 114 .L2:
 115         cmp.w   A0,A2
 116         bhi     .L4
 117         bne     .L3
 118         cmp.w   A1,A3
 119         bls     .L3
 120 .L4:
 121         sub.w   A0,A0
 122         rts
 123 .L3:
 124         mov.w   #2,A0
 125 .L5:
 126         rts
 127         .end
 128 #endif
 129 #endif /* L_ucmpsi2 */
 130
 131 #ifdef L_divhi3
 132
 133 ;; HImode divides for the H8/300.
 134 ;; We bunch all of this into one object file since there are several
 135 ;; "supporting routines".
 136
 137 ; general purpose normalize routine
 138 ;
 139 ; divisor in A0
 140 ; dividend in A1
 141 ; turns both into +ve numbers, and leaves what the answer sign
 142 ; should be in A2L
 143
 144 #ifdef __H8300__
 145         .section .text
 146         .align 2
 147 divnorm:
 148         mov.b   #0x0,A2L
 149         or      A0H,A0H         ; is divisor > 0
 150         bge     _lab1
 151         not     A0H             ; no - then make it +ve
 152         not     A0L
 153         adds    #1,A0
 154         xor     #0x1,A2L        ; and remember that in A2L
 155 _lab1:  or      A1H,A1H ; look at dividend
 156         bge     _lab2
 157         not     A1H             ; it is -ve, make it positive
 158         not     A1L
 159         adds    #1,A1
 160         xor     #0x1,A2L; and toggle sign of result
 161 _lab2:  rts
 162
 163 ; A0=A0/A1 signed
 164
 165         .global ___divhi3
 166 ___divhi3:
 167         bsr     divnorm
 168         bsr     ___udivhi3
 169 negans: or      A2L,A2L ; should answer be negative ?
 170         beq     _lab4
 171         not     A0H     ; yes, so make it so
 172         not     A0L
 173         adds    #1,A0
 174 _lab4:  rts
 175
 176 ; A0=A0%A1 signed
 177
 178         .global ___modhi3
 179 ___modhi3:
 180         bsr     divnorm
 181         bsr     ___udivhi3
 182         mov     A3,A0
 183         bra     negans
 184
 185 ; A0=A0%A1 unsigned
 186
 187         .global ___umodhi3
 188 ___umodhi3:
 189         bsr     ___udivhi3
 190         mov     A3,A0
 191         rts
 192
 193 ; A0=A0/A1 unsigned
 194 ; A3=A0%A1 unsigned
 195 ; A2H trashed
 196 ; D high 8 bits of denom
 197 ; d low 8 bits of denom
 198 ; N high 8 bits of num
 199 ; n low 8 bits of num
 200 ; M high 8 bits of mod
 201 ; m low 8 bits of mod
 202 ; Q high 8 bits of quot
 203 ; q low 8 bits of quot
 204 ; P preserve
 205
 206 ; The h8 only has a 16/8 bit divide, so we look at the incoming and
 207 ; see how to partition up the expression.
 208
 209         .global ___udivhi3
 210 ___udivhi3:
 211                                 ; A0 A1 A2 A3
 212                                 ; Nn Dd       P
 213         sub.w   A3,A3           ; Nn Dd xP 00
 214         or      A1H,A1H
 215         bne     divlongway
 216         or      A0H,A0H
 217         beq     _lab6
 218
 219 ; we know that D == 0 and N is != 0
 220         mov.b   A0H,A3L         ; Nn Dd xP 0N
 221         divxu   A1L,A3          ;          MQ
 222         mov.b   A3L,A0H         ; Q
 223 ; dealt with N, do n
 224 _lab6:  mov.b   A0L,A3L         ;           n
 225         divxu   A1L,A3          ;          mq
 226         mov.b   A3L,A0L         ; Qq
 227         mov.b   A3H,A3L         ;           m
 228         mov.b   #0x0,A3H        ; Qq       0m
 229         rts
 230
 231 ; D != 0 - which means the denominator is
 232 ;          loop around to get the result.
 233
 234 divlongway:
 235         mov.b   A0H,A3L         ; Nn Dd xP 0N
 236         mov.b   #0x0,A0H        ; high byte of answer has to be zero
 237         mov.b   #0x8,A2H        ;       8
 238 div8:   add.b   A0L,A0L         ; n*=2
 239         rotxl   A3L             ; Make remainder bigger
 240         rotxl   A3H
 241         sub.w   A1,A3           ; Q-=N
 242         bhs     setbit          ; set a bit ?
 243         add.w   A1,A3           ;  no : too far , Q+=N
 244
 245         dec     A2H
 246         bne     div8            ; next bit
 247         rts
 248
 249 setbit: inc     A0L             ; do insert bit
 250         dec     A2H
 251         bne     div8            ; next bit
 252         rts
 253
 254 #endif /* __H8300__ */
 255 #endif /* L_divhi3 */
 256
 257 #ifdef L_divsi3
 258
 259 ;; 4 byte integer divides for the H8/300.
 260 ;;
 261 ;; We have one routine which does all the work and lots of
 262 ;; little ones which prepare the args and massage the sign.
 263 ;; We bunch all of this into one object file since there are several
 264 ;; "supporting routines".
 265
 266 #ifdef __H8300H__
 267         .h8300h
 268 #endif
 269
 270         .section .text
 271         .align 2
 272
 273 ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
 274 ; This function is here to keep branch displacements small.
 275
 276 #ifdef __H8300__
 277
 278 divnorm:
 279         mov.b   #0,S2L          ; keep the sign in S2
 280         mov.b   A0H,A0H         ; is the numerator -ve
 281         bge     postive
 282
 283         ; negate arg
 284         not     A0H
 285         not     A1H
 286         not     A0L
 287         not     A1L
 288
 289         add     #1,A1L
 290         addx    #0,A1H
 291         addx    #0,A0H
 292         addx    #0,A0L
 293
 294         mov.b   #1,S2L          ; the sign will be -ve
 295 postive:
 296         mov.b   A2H,A2H         ; is the denominator -ve
 297         bge     postive2
 298         not     A2L
 299         not     A2H
 300         not     A3L
 301         not     A3H
 302         add.b   #1,A3L
 303         addx    #0,A3H
 304         addx    #0,A2L
 305         addx    #0,A2H
 306         xor     #1,S2L          ; toggle result sign
 307 postive2:
 308         rts
 309
 310 #else /* __H8300H__ */
 311
 312 divnorm:
 313         mov.b   #0,S2L          ; keep the sign in S2
 314         mov.l   A0P,A0P         ; is the numerator -ve
 315         bge     postive
 316
 317         neg.l   A0P             ; negate arg
 318         mov.b   #1,S2L          ; the sign will be -ve
 319
 320 postive:
 321         mov.l   A1P,A1P         ; is the denominator -ve
 322         bge     postive2
 323
 324         neg.l   A1P             ; negate arg
 325         xor.b   #1,S2L          ; toggle result sign
 326
 327 postive2:
 328         rts
 329
 330 #endif
 331
 332 ; numerator in A0/A1
 333 ; denominator in A2/A3
 334         .global ___modsi3
 335 ___modsi3:
 336         PUSHP   S2P
 337         PUSHP   S0P
 338         PUSHP   S1P
 339
 340         bsr     divnorm
 341         bsr     divmodsi4
 342 #ifdef __H8300__
 343         mov     S0,A0
 344         mov     S1,A1
 345 #else
 346         mov.l   S0P,A0P
 347 #endif
 348         bra     exitdiv
 349
 350         .global ___udivsi3
 351 ___udivsi3:
 352         PUSHP   S2P
 353         PUSHP   S0P
 354         PUSHP   S1P
 355         mov.b   #0,S2L  ; keep sign low
 356         bsr     divmodsi4
 357         bra     exitdiv
 358
 359         .global ___umodsi3
 360 ___umodsi3:
 361         PUSHP   S2P
 362         PUSHP   S0P
 363         PUSHP   S1P
 364         mov.b   #0,S2L  ; keep sign low
 365         bsr     divmodsi4
 366 #ifdef __H8300__
 367         mov     S0,A0
 368         mov     S1,A1
 369 #else
 370         mov.l   S0P,A0P
 371 #endif
 372         bra     exitdiv
 373
 374         .global ___divsi3
 375 ___divsi3:
 376         PUSHP   S2P
 377         PUSHP   S0P
 378         PUSHP   S1P
 379         jsr     divnorm
 380         jsr     divmodsi4
 381
 382         ; examine what the sign should be
 383 exitdiv:
 384         POPP    S1P
 385         POPP    S0P
 386
 387         or      S2L,S2L
 388         beq     reti
 389
 390         ; should be -ve
 391 #ifdef __H8300__
 392         not     A0H
 393         not     A1H
 394         not     A0L
 395         not     A1L
 396
 397         add     #1,A1L
 398         addx    #0,A1H
 399         addx    #0,A0H
 400         addx    #0,A0L
 401 #else /* __H8300H__ */
 402         neg.l   A0P
 403 #endif
 404
 405 reti:
 406         POPP    S2P
 407         rts
 408
 409         ; takes A0/A1 numerator (A0P for 300h)
 410         ; A2/A3 denominator (A1P for 300h)
 411         ; returns A0/A1 quotient (A0P for 300h)
 412         ; S0/S1 remainder (S0P for 300h)
 413         ; trashes S2
 414
 415 #ifdef __H8300__
 416
 417 divmodsi4:
 418         sub.w   S0,S0           ; zero play area
 419         mov.w   S0,S1
 420         mov.b   A2H,S2H
 421         or      A2L,S2H
 422         or      A3H,S2H
 423         bne     DenHighZero
 424         mov.b   A0H,A0H
 425         bne     NumByte0Zero
 426         mov.b   A0L,A0L
 427         bne     NumByte1Zero
 428         mov.b   A1H,A1H
 429         bne     NumByte2Zero
 430         bra     NumByte3Zero
 431 NumByte0Zero:
 432         mov.b   A0H,S1L
 433         divxu   A3L,S1
 434         mov.b   S1L,A0H
 435 NumByte1Zero:
 436         mov.b   A0L,S1L
 437         divxu   A3L,S1
 438         mov.b   S1L,A0L
 439 NumByte2Zero:
 440         mov.b   A1H,S1L
 441         divxu   A3L,S1
 442         mov.b   S1L,A1H
 443 NumByte3Zero:
 444         mov.b   A1L,S1L
 445         divxu   A3L,S1
 446         mov.b   S1L,A1L
 447
 448         mov.b   S1H,S1L
 449         mov.b   #0x0,S1H
 450         rts
 451
 452 ; have to do the divide by shift and test
 453 DenHighZero:
 454         mov.b   A0H,S1L
 455         mov.b   A0L,A0H
 456         mov.b   A1H,A0L
 457         mov.b   A1L,A1H
 458
 459         mov.b   #0,A1L
 460         mov.b   #24,S2H ; only do 24 iterations
 461
 462 nextbit:
 463         add.w   A1,A1   ; double the answer guess
 464         rotxl   A0L
 465         rotxl   A0H
 466
 467         rotxl   S1L     ; double remainder
 468         rotxl   S1H
 469         rotxl   S0L
 470         rotxl   S0H
 471         sub.w   A3,S1   ; does it all fit
 472         subx    A2L,S0L
 473         subx    A2H,S0H
 474         bhs     setone
 475
 476         add.w   A3,S1   ; no, restore mistake
 477         addx    A2L,S0L
 478         addx    A2H,S0H
 479
 480         dec     S2H
 481         bne     nextbit
 482         rts
 483
 484 setone:
 485         inc     A1L
 486         dec     S2H
 487         bne     nextbit
 488         rts
 489
 490 #else /* __H8300H__ */
 491
 492 divmodsi4:
 493         sub.l   S0P,S0P         ; zero play area
 494         mov.w   A1E,A1E         ; denominator top word 0?
 495         bne     DenHighZero
 496
 497         ; do it the easy way, see page 107 in manual
 498         mov.w   A0E,A2
 499         extu.l  A2P
 500         divxu.w A1,A2P
 501         mov.w   A2E,A0E
 502         divxu.w A1,A0P
 503         mov.w   A0E,S0
 504         mov.w   A2,A0E
 505         extu.l  S0P
 506         rts
 507
 508 DenHighZero:
 509         mov.w   A0E,A2
 510         mov.b   A2H,S0L
 511         mov.b   A2L,A2H
 512         mov.b   A0H,A2L
 513         mov.w   A2,A0E
 514         mov.b   A0L,A0H
 515         mov.b   #0,A0L
 516         mov.b   #24,S2H         ; only do 24 iterations
 517
 518 nextbit:
 519         shll.l  A0P             ; double the answer guess
 520         rotxl.l S0P             ; double remainder
 521         sub.l   A1P,S0P         ; does it all fit?
 522         bhs     setone
 523
 524         add.l   A1P,S0P         ; no, restore mistake
 525         dec     S2H
 526         bne     nextbit
 527         rts
 528
 529 setone:
 530         inc     A0L
 531         dec     S2H
 532         bne     nextbit
 533         rts
 534
 535 #endif
 536 #endif /* L_divsi3 */
 537
 538 #ifdef L_mulhi3
 539
 540 ;; HImode multiply.
 541 ; The h8 only has an 8*8->16 multiply.
 542 ; The answer is the same as:
 543 ;
 544 ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
 545 ; (we can ignore A1.h * A0.h cause that will all off the top)
 546 ; A0 in
 547 ; A1 in
 548 ; A0 answer
 549
 550 #ifdef __H8300__
 551         .section .text
 552         .align 2
 553         .global ___mulhi3
 554 ___mulhi3:
 555         mov.b   A1L,A2L         ; A2l gets srcb.l
 556         mulxu   A0L,A2          ; A2 gets first sub product
 557
 558         mov.b   A0H,A3L         ; prepare for
 559         mulxu   A1L,A3          ; second sub product
 560
 561         add.b   A3L,A2H         ; sum first two terms
 562
 563         mov.b   A1H,A3L         ; third sub product
 564         mulxu   A0L,A3
 565
 566         add.b   A3L,A2H         ; almost there
 567         mov.w   A2,A0           ; that is
 568         rts
 569
 570 #endif
 571 #endif /* L_mulhi3 */
 572
 573 #ifdef L_mulsi3
 574
 575 ;; SImode multiply.
 576 ;;
 577 ;; I think that shift and add may be sufficient for this.  Using the
 578 ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead.  This way
 579 ;; the inner loop uses maybe 20 cycles + overhead, but terminates
 580 ;; quickly on small args.
 581 ;;
 582 ;; A0/A1 src_a
 583 ;; A2/A3 src_b
 584 ;;
 585 ;;  while (a)
 586 ;;    {
 587 ;;      if (a & 1)
 588 ;;        r += b;
 589 ;;      a >>= 1;
 590 ;;      b <<= 1;
 591 ;;    }
 592
 593         .section .text
 594         .align 2
 595
 596 #ifdef __H8300__
 597
 598         .global ___mulsi3
 599 ___mulsi3:
 600         PUSHP   S0P
 601         PUSHP   S1P
 602         PUSHP   S2P
 603
 604         sub.w   S0,S0
 605         sub.w   S1,S1
 606
 607         ; while (a)
 608 _top:   mov.w   A0,A0
 609         bne     _more
 610         mov.w   A1,A1
 611         beq     _done
 612 _more:  ; if (a & 1)
 613         bld     #0,A1L
 614         bcc     _nobit
 615         ; r += b
 616         add.w   A3,S1
 617         addx    A2L,S0L
 618         addx    A2H,S0H
 619 _nobit:
 620         ; a >>= 1
 621         shlr    A0H
 622         rotxr   A0L
 623         rotxr   A1H
 624         rotxr   A1L
 625
 626         ; b <<= 1
 627         add.w   A3,A3
 628         addx    A2L,A2L
 629         addx    A2H,A2H
 630         bra     _top
 631
 632 _done:
 633         mov.w   S0,A0
 634         mov.w   S1,A1
 635         POPP    S2P
 636         POPP    S1P
 637         POPP    S0P
 638         rts
 639
 640 #else /* __H8300H__ */
 641
 642         .h8300h
 643
 644         .global ___mulsi3
 645 ___mulsi3:
 646         sub.l   A2P,A2P
 647
 648         ; while (a)
 649 _top:   mov.l   A0P,A0P
 650         beq     _done
 651
 652         ; if (a & 1)
 653         bld     #0,A0L
 654         bcc     _nobit
 655
 656         ; r += b
 657         add.l   A1P,A2P
 658
 659 _nobit:
 660         ; a >>= 1
 661         shlr.l  A0P
 662
 663         ; b <<= 1
 664         shll.l  A1P
 665         bra     _top
 666
 667 _done:
 668         mov.l   A2P,A0P
 669         rts
 670
 671 #endif
 672 #endif /* L_mulsi3 */