1 /* 64-bit millicode, original author Hewlett-Packard
2 adapted for gcc by Paul Bame <bame@debian.org>
3 and Alan Modra <alan@linuxcare.com.au>
5 Copyright 2001 Free Software Foundation, Inc.
7 This file is part of GNU CC and is released under the terms of
8 of the GNU General Public License as published by the Free Software
9 Foundation; either version 2, or (at your option) any later version.
10 See the file COPYING in the top-level GNU CC source directory for a copy
18 /* Hardware General Registers. */
52 /* Hardware Space Registers. */
62 /* Hardware Floating Point Registers. */
80 /* Hardware Control Registers. */
82 sar: .reg %cr11 /* Shift Amount Register */
84 /* Software Architecture General Registers. */
85 rp: .reg r2 /* return pointer */
87 mrp: .reg r2 /* millicode return pointer */
89 mrp: .reg r31 /* millicode return pointer */
91 ret0: .reg r28 /* return value */
92 ret1: .reg r29 /* return value (high part of double) */
93 sp: .reg r30 /* stack pointer */
94 dp: .reg r27 /* data pointer */
95 arg0: .reg r26 /* argument */
96 arg1: .reg r25 /* argument or high part of double argument */
97 arg2: .reg r24 /* argument */
98 arg3: .reg r23 /* argument or high part of double argument */
100 /* Software Architecture Space Registers. */
101 /* sr0 ; return link from BLE */
102 sret: .reg sr1 /* return value */
103 sarg: .reg sr1 /* argument */
104 /* sr4 ; PC SPACE tracker */
105 /* sr5 ; process private data */
107 /* Frame Offsets (millicode convention!) Used when calling other
108 millicode routines. Stack unwinding is dependent upon these
110 r31_slot: .equ -20 /* "current RP" slot */
111 sr0_slot: .equ -16 /* "static link" slot */
113 mrp_slot: .equ -16 /* "current RP" slot */
114 psp_slot: .equ -8 /* "previous SP" slot */
116 mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */
120 #define DEFINE(name,value)name: .EQU value
121 #define RDEFINE(name,value)name: .REG value
123 #define MILLI_BE(lbl) BE lbl(sr7,r0)
124 #define MILLI_BEN(lbl) BE,n lbl(sr7,r0)
125 #define MILLI_BLE(lbl) BLE lbl(sr7,r0)
126 #define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0)
127 #define MILLIRETN BE,n 0(sr0,mrp)
128 #define MILLIRET BE 0(sr0,mrp)
129 #define MILLI_RETN BE,n 0(sr0,mrp)
130 #define MILLI_RET BE 0(sr0,mrp)
132 #define MILLI_BE(lbl) B lbl
133 #define MILLI_BEN(lbl) B,n lbl
134 #define MILLI_BLE(lbl) BL lbl,mrp
135 #define MILLI_BLEN(lbl) BL,n lbl,mrp
136 #define MILLIRETN BV,n 0(mrp)
137 #define MILLIRET BV 0(mrp)
138 #define MILLI_RETN BV,n 0(mrp)
139 #define MILLI_RET BV 0(mrp)
143 #define CAT(a,b) a##b
145 #define CAT(a,b) a/**/b
149 #define SUBSPA_MILLI .section .text
150 #define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
151 #define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
153 #define SUBSPA_DATA .section .data
155 #define GLOBAL $global$
156 #define GSYM(sym) !sym:
157 #define LSYM(sym) !CAT(.L,sym:)
158 #define LREF(sym) CAT(.L,sym)
163 /* This used to be .milli but since link32 places different named
164 sections in different segments millicode ends up a long ways away
165 from .text (1meg?). This way they will be a lot closer.
167 The SUBSPA_MILLI_* specify locality sets for certain millicode
168 modules in order to ensure that modules that call one another are
169 placed close together. Without locality sets this is unlikely to
170 happen because of the Dynamite linker library search algorithm. We
171 want these modules close together so that short calls always reach
172 (we don't want to require long calls or use long call stubs). */
174 #define SUBSPA_MILLI .subspa .text
175 #define SUBSPA_MILLI_DIV .subspa .text$dv,align=16
176 #define SUBSPA_MILLI_MUL .subspa .text$mu,align=16
177 #define ATTR_MILLI .attr code,read,execute
178 #define SUBSPA_DATA .subspa .data
179 #define ATTR_DATA .attr init_data,read,write
182 #define SUBSPA_MILLI .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8
183 #define SUBSPA_MILLI_DIV SUBSPA_MILLI
184 #define SUBSPA_MILLI_MUL SUBSPA_MILLI
186 #define SUBSPA_DATA .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero
188 #define GLOBAL $global$
190 #define SPACE_DATA .space $PRIVATE$,spnum=1,sort=16
192 #define GSYM(sym) !sym
193 #define LSYM(sym) !CAT(L$,sym)
194 #define LREF(sym) CAT(L$,sym)
199 /* ROUTINES: $$divI, $$divoI
201 Single precision divide for signed binary integers.
203 The quotient is truncated towards zero.
204 The sign of the quotient is the XOR of the signs of the dividend and
206 Divide by zero is trapped.
207 Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
213 . sr0 == return space when called externally
220 OTHER REGISTERS AFFECTED:
224 . Causes a trap under the following conditions:
225 . divisor is zero (traps with ADDIT,= 0,25,0)
226 . dividend==-2**31 and divisor==-1 and routine is $$divoI
227 . (traps with ADDO 26,25,0)
228 . Changes memory at the following places:
233 . Suitable for internal or external millicode.
234 . Assumes the special millicode register conventions.
237 . Branchs to other millicode routines using BE
238 . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
240 . For selected divisors, calls a divide by constant routine written by
241 . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13.
243 . The only overflow case is -2**31 divided by -1.
244 . Both routines return -2**31 but only $$divoI traps. */
247 RDEFINE(retreg,ret1) /* r29 */
251 .import $$divI_2,millicode
252 .import $$divI_3,millicode
253 .import $$divI_4,millicode
254 .import $$divI_5,millicode
255 .import $$divI_6,millicode
256 .import $$divI_7,millicode
257 .import $$divI_8,millicode
258 .import $$divI_9,millicode
259 .import $$divI_10,millicode
260 .import $$divI_12,millicode
261 .import $$divI_14,millicode
262 .import $$divI_15,millicode
263 .export $$divI,millicode
264 .export $$divoI,millicode
269 comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */
271 ldo -1(arg1),temp /* is there at most one bit set ? */
272 and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */
273 addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */
276 addi,>= 0,arg0,retreg /* if numerator is negative, add the */
277 add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */
278 extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
279 extrs retreg,15,16,retreg /* retreg = retreg >> 16 */
280 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
281 ldi 0xcc,temp1 /* setup 0xcc in temp1 */
282 extru,= arg1,23,8,temp /* test denominator with 0xff00 */
283 extrs retreg,23,24,retreg /* retreg = retreg >> 8 */
284 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
285 ldi 0xaa,temp /* setup 0xaa in temp */
286 extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
287 extrs retreg,27,28,retreg /* retreg = retreg >> 4 */
288 and,= arg1,temp1,r0 /* test denominator with 0xcc */
289 extrs retreg,29,30,retreg /* retreg = retreg >> 2 */
290 and,= arg1,temp,r0 /* test denominator with 0xaa */
291 extrs retreg,30,31,retreg /* retreg = retreg >> 1 */
294 addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */
295 b,n LREF(regular_seq)
296 sub r0,arg1,temp /* make denominator positive */
297 comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */
298 ldo -1(temp),retreg /* is there at most one bit set ? */
299 and,= temp,retreg,r0 /* if so, the denominator is power of 2 */
300 b,n LREF(regular_seq)
301 sub r0,arg0,retreg /* negate numerator */
302 comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */
303 copy retreg,arg0 /* set up arg0, arg1 and temp */
304 copy temp,arg1 /* before branching to pow2 */
308 comib,>>=,n 15,arg1,LREF(small_divisor)
309 add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
311 subi 0,retreg,retreg /* make it positive */
312 sub 0,arg1,temp /* clear carry, */
313 /* negate the divisor */
314 ds 0,temp,0 /* set V-bit to the comple- */
315 /* ment of the divisor sign */
316 add retreg,retreg,retreg /* shift msb bit into carry */
317 ds r0,arg1,temp /* 1st divide step, if no carry */
318 addc retreg,retreg,retreg /* shift retreg with/into carry */
319 ds temp,arg1,temp /* 2nd divide step */
320 addc retreg,retreg,retreg /* shift retreg with/into carry */
321 ds temp,arg1,temp /* 3rd divide step */
322 addc retreg,retreg,retreg /* shift retreg with/into carry */
323 ds temp,arg1,temp /* 4th divide step */
324 addc retreg,retreg,retreg /* shift retreg with/into carry */
325 ds temp,arg1,temp /* 5th divide step */
326 addc retreg,retreg,retreg /* shift retreg with/into carry */
327 ds temp,arg1,temp /* 6th divide step */
328 addc retreg,retreg,retreg /* shift retreg with/into carry */
329 ds temp,arg1,temp /* 7th divide step */
330 addc retreg,retreg,retreg /* shift retreg with/into carry */
331 ds temp,arg1,temp /* 8th divide step */
332 addc retreg,retreg,retreg /* shift retreg with/into carry */
333 ds temp,arg1,temp /* 9th divide step */
334 addc retreg,retreg,retreg /* shift retreg with/into carry */
335 ds temp,arg1,temp /* 10th divide step */
336 addc retreg,retreg,retreg /* shift retreg with/into carry */
337 ds temp,arg1,temp /* 11th divide step */
338 addc retreg,retreg,retreg /* shift retreg with/into carry */
339 ds temp,arg1,temp /* 12th divide step */
340 addc retreg,retreg,retreg /* shift retreg with/into carry */
341 ds temp,arg1,temp /* 13th divide step */
342 addc retreg,retreg,retreg /* shift retreg with/into carry */
343 ds temp,arg1,temp /* 14th divide step */
344 addc retreg,retreg,retreg /* shift retreg with/into carry */
345 ds temp,arg1,temp /* 15th divide step */
346 addc retreg,retreg,retreg /* shift retreg with/into carry */
347 ds temp,arg1,temp /* 16th divide step */
348 addc retreg,retreg,retreg /* shift retreg with/into carry */
349 ds temp,arg1,temp /* 17th divide step */
350 addc retreg,retreg,retreg /* shift retreg with/into carry */
351 ds temp,arg1,temp /* 18th divide step */
352 addc retreg,retreg,retreg /* shift retreg with/into carry */
353 ds temp,arg1,temp /* 19th divide step */
354 addc retreg,retreg,retreg /* shift retreg with/into carry */
355 ds temp,arg1,temp /* 20th divide step */
356 addc retreg,retreg,retreg /* shift retreg with/into carry */
357 ds temp,arg1,temp /* 21st divide step */
358 addc retreg,retreg,retreg /* shift retreg with/into carry */
359 ds temp,arg1,temp /* 22nd divide step */
360 addc retreg,retreg,retreg /* shift retreg with/into carry */
361 ds temp,arg1,temp /* 23rd divide step */
362 addc retreg,retreg,retreg /* shift retreg with/into carry */
363 ds temp,arg1,temp /* 24th divide step */
364 addc retreg,retreg,retreg /* shift retreg with/into carry */
365 ds temp,arg1,temp /* 25th divide step */
366 addc retreg,retreg,retreg /* shift retreg with/into carry */
367 ds temp,arg1,temp /* 26th divide step */
368 addc retreg,retreg,retreg /* shift retreg with/into carry */
369 ds temp,arg1,temp /* 27th divide step */
370 addc retreg,retreg,retreg /* shift retreg with/into carry */
371 ds temp,arg1,temp /* 28th divide step */
372 addc retreg,retreg,retreg /* shift retreg with/into carry */
373 ds temp,arg1,temp /* 29th divide step */
374 addc retreg,retreg,retreg /* shift retreg with/into carry */
375 ds temp,arg1,temp /* 30th divide step */
376 addc retreg,retreg,retreg /* shift retreg with/into carry */
377 ds temp,arg1,temp /* 31st divide step */
378 addc retreg,retreg,retreg /* shift retreg with/into carry */
379 ds temp,arg1,temp /* 32nd divide step, */
380 addc retreg,retreg,retreg /* shift last retreg bit into retreg */
381 xor,>= arg0,arg1,0 /* get correct sign of quotient */
382 sub 0,retreg,retreg /* based on operand signs */
389 /* Clear the upper 32 bits of the arg1 register. We are working with */
390 /* small divisors (and 32 bit integers) We must not be mislead */
391 /* by "1" bits left in the upper 32 bits. */
396 /* table for divisor == 0,1, ... ,15 */
397 addit,= 0,arg1,r0 /* trap if divisor == 0 */
399 MILLIRET /* divisor == 1 */
401 MILLI_BEN($$divI_2) /* divisor == 2 */
403 MILLI_BEN($$divI_3) /* divisor == 3 */
405 MILLI_BEN($$divI_4) /* divisor == 4 */
407 MILLI_BEN($$divI_5) /* divisor == 5 */
409 MILLI_BEN($$divI_6) /* divisor == 6 */
411 MILLI_BEN($$divI_7) /* divisor == 7 */
413 MILLI_BEN($$divI_8) /* divisor == 8 */
415 MILLI_BEN($$divI_9) /* divisor == 9 */
417 MILLI_BEN($$divI_10) /* divisor == 10 */
419 b LREF(normal) /* divisor == 11 */
421 MILLI_BEN($$divI_12) /* divisor == 12 */
423 b LREF(normal) /* divisor == 13 */
425 MILLI_BEN($$divI_14) /* divisor == 14 */
427 MILLI_BEN($$divI_15) /* divisor == 15 */
431 sub 0,arg0,retreg /* result is negation of dividend */
433 addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */
442 . Single precision divide for unsigned integers.
444 . Quotient is truncated towards zero.
445 . Traps on divide by zero.
451 . sr0 == return space when called externally
458 OTHER REGISTERS AFFECTED:
462 . Causes a trap under the following conditions:
464 . Changes memory at the following places:
469 . Does not create a stack frame.
470 . Suitable for internal or external millicode.
471 . Assumes the special millicode register conventions.
474 . Branchs to other millicode routines using BE:
475 . $$divU_# for 3,5,6,7,9,10,12,14,15
477 . For selected small divisors calls the special divide by constant
478 . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */
481 RDEFINE(retreg,ret1) /* r29 */
485 .export $$divU,millicode
486 .import $$divU_3,millicode
487 .import $$divU_5,millicode
488 .import $$divU_6,millicode
489 .import $$divU_7,millicode
490 .import $$divU_9,millicode
491 .import $$divU_10,millicode
492 .import $$divU_12,millicode
493 .import $$divU_14,millicode
494 .import $$divU_15,millicode
499 /* The subtract is not nullified since it does no harm and can be used
500 by the two cases that branch back to "normal". */
501 ldo -1(arg1),temp /* is there at most one bit set ? */
502 and,= arg1,temp,r0 /* if so, denominator is power of 2 */
504 addit,= 0,arg1,0 /* trap for zero dvr */
506 extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
507 extru retreg,15,16,retreg /* retreg = retreg >> 16 */
508 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
509 ldi 0xcc,temp1 /* setup 0xcc in temp1 */
510 extru,= arg1,23,8,temp /* test denominator with 0xff00 */
511 extru retreg,23,24,retreg /* retreg = retreg >> 8 */
512 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
513 ldi 0xaa,temp /* setup 0xaa in temp */
514 extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
515 extru retreg,27,28,retreg /* retreg = retreg >> 4 */
516 and,= arg1,temp1,r0 /* test denominator with 0xcc */
517 extru retreg,29,30,retreg /* retreg = retreg >> 2 */
518 and,= arg1,temp,r0 /* test denominator with 0xaa */
519 extru retreg,30,31,retreg /* retreg = retreg >> 1 */
523 comib,>= 15,arg1,LREF(special_divisor)
524 subi 0,arg1,temp /* clear carry, negate the divisor */
525 ds r0,temp,r0 /* set V-bit to 1 */
527 add arg0,arg0,retreg /* shift msb bit into carry */
528 ds r0,arg1,temp /* 1st divide step, if no carry */
529 addc retreg,retreg,retreg /* shift retreg with/into carry */
530 ds temp,arg1,temp /* 2nd divide step */
531 addc retreg,retreg,retreg /* shift retreg with/into carry */
532 ds temp,arg1,temp /* 3rd divide step */
533 addc retreg,retreg,retreg /* shift retreg with/into carry */
534 ds temp,arg1,temp /* 4th divide step */
535 addc retreg,retreg,retreg /* shift retreg with/into carry */
536 ds temp,arg1,temp /* 5th divide step */
537 addc retreg,retreg,retreg /* shift retreg with/into carry */
538 ds temp,arg1,temp /* 6th divide step */
539 addc retreg,retreg,retreg /* shift retreg with/into carry */
540 ds temp,arg1,temp /* 7th divide step */
541 addc retreg,retreg,retreg /* shift retreg with/into carry */
542 ds temp,arg1,temp /* 8th divide step */
543 addc retreg,retreg,retreg /* shift retreg with/into carry */
544 ds temp,arg1,temp /* 9th divide step */
545 addc retreg,retreg,retreg /* shift retreg with/into carry */
546 ds temp,arg1,temp /* 10th divide step */
547 addc retreg,retreg,retreg /* shift retreg with/into carry */
548 ds temp,arg1,temp /* 11th divide step */
549 addc retreg,retreg,retreg /* shift retreg with/into carry */
550 ds temp,arg1,temp /* 12th divide step */
551 addc retreg,retreg,retreg /* shift retreg with/into carry */
552 ds temp,arg1,temp /* 13th divide step */
553 addc retreg,retreg,retreg /* shift retreg with/into carry */
554 ds temp,arg1,temp /* 14th divide step */
555 addc retreg,retreg,retreg /* shift retreg with/into carry */
556 ds temp,arg1,temp /* 15th divide step */
557 addc retreg,retreg,retreg /* shift retreg with/into carry */
558 ds temp,arg1,temp /* 16th divide step */
559 addc retreg,retreg,retreg /* shift retreg with/into carry */
560 ds temp,arg1,temp /* 17th divide step */
561 addc retreg,retreg,retreg /* shift retreg with/into carry */
562 ds temp,arg1,temp /* 18th divide step */
563 addc retreg,retreg,retreg /* shift retreg with/into carry */
564 ds temp,arg1,temp /* 19th divide step */
565 addc retreg,retreg,retreg /* shift retreg with/into carry */
566 ds temp,arg1,temp /* 20th divide step */
567 addc retreg,retreg,retreg /* shift retreg with/into carry */
568 ds temp,arg1,temp /* 21st divide step */
569 addc retreg,retreg,retreg /* shift retreg with/into carry */
570 ds temp,arg1,temp /* 22nd divide step */
571 addc retreg,retreg,retreg /* shift retreg with/into carry */
572 ds temp,arg1,temp /* 23rd divide step */
573 addc retreg,retreg,retreg /* shift retreg with/into carry */
574 ds temp,arg1,temp /* 24th divide step */
575 addc retreg,retreg,retreg /* shift retreg with/into carry */
576 ds temp,arg1,temp /* 25th divide step */
577 addc retreg,retreg,retreg /* shift retreg with/into carry */
578 ds temp,arg1,temp /* 26th divide step */
579 addc retreg,retreg,retreg /* shift retreg with/into carry */
580 ds temp,arg1,temp /* 27th divide step */
581 addc retreg,retreg,retreg /* shift retreg with/into carry */
582 ds temp,arg1,temp /* 28th divide step */
583 addc retreg,retreg,retreg /* shift retreg with/into carry */
584 ds temp,arg1,temp /* 29th divide step */
585 addc retreg,retreg,retreg /* shift retreg with/into carry */
586 ds temp,arg1,temp /* 30th divide step */
587 addc retreg,retreg,retreg /* shift retreg with/into carry */
588 ds temp,arg1,temp /* 31st divide step */
589 addc retreg,retreg,retreg /* shift retreg with/into carry */
590 ds temp,arg1,temp /* 32nd divide step, */
592 addc retreg,retreg,retreg /* shift last retreg bit into retreg */
594 /* Handle the cases where divisor is a small constant or has high bit on. */
595 LSYM(special_divisor)
597 /* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */
599 /* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
600 generating such a blr, comib sequence. A problem in nullification. So I
601 rewrote this code. */
604 /* Clear the upper 32 bits of the arg1 register. We are working with
605 small divisors (and 32 bit unsigned integers) We must not be mislead
606 by "1" bits left in the upper 32 bits. */
609 comib,> 0,arg1,LREF(big_divisor)
614 LSYM(zero_divisor) /* this label is here to provide external visibility */
615 addit,= 0,arg1,0 /* trap for zero dvr */
617 MILLIRET /* divisor == 1 */
619 MILLIRET /* divisor == 2 */
620 extru arg0,30,31,retreg
621 MILLI_BEN($$divU_3) /* divisor == 3 */
623 MILLIRET /* divisor == 4 */
624 extru arg0,29,30,retreg
625 MILLI_BEN($$divU_5) /* divisor == 5 */
627 MILLI_BEN($$divU_6) /* divisor == 6 */
629 MILLI_BEN($$divU_7) /* divisor == 7 */
631 MILLIRET /* divisor == 8 */
632 extru arg0,28,29,retreg
633 MILLI_BEN($$divU_9) /* divisor == 9 */
635 MILLI_BEN($$divU_10) /* divisor == 10 */
637 b LREF(normal) /* divisor == 11 */
638 ds r0,temp,r0 /* set V-bit to 1 */
639 MILLI_BEN($$divU_12) /* divisor == 12 */
641 b LREF(normal) /* divisor == 13 */
642 ds r0,temp,r0 /* set V-bit to 1 */
643 MILLI_BEN($$divU_14) /* divisor == 14 */
645 MILLI_BEN($$divU_15) /* divisor == 15 */
648 /* Handle the case where the high bit is on in the divisor.
649 Compute: if( dividend>=divisor) quotient=1; else quotient=0;
650 Note: dividend>==divisor iff dividend-divisor does not borrow
651 and not borrow iff carry. */
665 . $$remI returns the remainder of the division of two signed 32-bit
666 . integers. The sign of the remainder is the same as the sign of
674 . sr0 == return space when called externally
681 OTHER REGISTERS AFFECTED:
685 . Causes a trap under the following conditions: DIVIDE BY ZERO
686 . Changes memory at the following places: NONE
690 . Does not create a stack frame
691 . Is usable for internal or external microcode
694 . Calls other millicode routines via mrp: NONE
695 . Calls other millicode routines: NONE */
707 .export $$remI,MILLICODE
708 .export $$remoI,MILLICODE
709 ldo -1(arg1),tmp /* is there at most one bit set ? */
710 and,<> arg1,tmp,r0 /* if not, don't use power of 2 */
711 addi,> 0,arg1,r0 /* if denominator > 0, use power */
715 comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */
716 and arg0,tmp,retreg /* get the result */
719 subi 0,arg0,arg0 /* negate numerator */
720 and arg0,tmp,retreg /* get the result */
721 subi 0,retreg,retreg /* negate result */
724 addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */
726 b,n LREF(regular_seq)
727 sub r0,arg1,tmp /* make denominator positive */
728 comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */
729 ldo -1(tmp),retreg /* is there at most one bit set ? */
730 and,= tmp,retreg,r0 /* if not, go to regular_seq */
731 b,n LREF(regular_seq)
732 comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */
733 and arg0,retreg,retreg
736 subi 0,arg0,tmp /* test against 0x80000000 */
737 and tmp,retreg,retreg
741 addit,= 0,arg1,0 /* trap if div by zero */
742 add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
743 sub 0,retreg,retreg /* make it positive */
744 sub 0,arg1, tmp /* clear carry, */
745 /* negate the divisor */
746 ds 0, tmp,0 /* set V-bit to the comple- */
747 /* ment of the divisor sign */
748 or 0,0, tmp /* clear tmp */
749 add retreg,retreg,retreg /* shift msb bit into carry */
750 ds tmp,arg1, tmp /* 1st divide step, if no carry */
751 /* out, msb of quotient = 0 */
752 addc retreg,retreg,retreg /* shift retreg with/into carry */
754 ds tmp,arg1, tmp /* 2nd divide step */
755 addc retreg,retreg,retreg /* shift retreg with/into carry */
756 ds tmp,arg1, tmp /* 3rd divide step */
757 addc retreg,retreg,retreg /* shift retreg with/into carry */
758 ds tmp,arg1, tmp /* 4th divide step */
759 addc retreg,retreg,retreg /* shift retreg with/into carry */
760 ds tmp,arg1, tmp /* 5th divide step */
761 addc retreg,retreg,retreg /* shift retreg with/into carry */
762 ds tmp,arg1, tmp /* 6th divide step */
763 addc retreg,retreg,retreg /* shift retreg with/into carry */
764 ds tmp,arg1, tmp /* 7th divide step */
765 addc retreg,retreg,retreg /* shift retreg with/into carry */
766 ds tmp,arg1, tmp /* 8th divide step */
767 addc retreg,retreg,retreg /* shift retreg with/into carry */
768 ds tmp,arg1, tmp /* 9th divide step */
769 addc retreg,retreg,retreg /* shift retreg with/into carry */
770 ds tmp,arg1, tmp /* 10th divide step */
771 addc retreg,retreg,retreg /* shift retreg with/into carry */
772 ds tmp,arg1, tmp /* 11th divide step */
773 addc retreg,retreg,retreg /* shift retreg with/into carry */
774 ds tmp,arg1, tmp /* 12th divide step */
775 addc retreg,retreg,retreg /* shift retreg with/into carry */
776 ds tmp,arg1, tmp /* 13th divide step */
777 addc retreg,retreg,retreg /* shift retreg with/into carry */
778 ds tmp,arg1, tmp /* 14th divide step */
779 addc retreg,retreg,retreg /* shift retreg with/into carry */
780 ds tmp,arg1, tmp /* 15th divide step */
781 addc retreg,retreg,retreg /* shift retreg with/into carry */
782 ds tmp,arg1, tmp /* 16th divide step */
783 addc retreg,retreg,retreg /* shift retreg with/into carry */
784 ds tmp,arg1, tmp /* 17th divide step */
785 addc retreg,retreg,retreg /* shift retreg with/into carry */
786 ds tmp,arg1, tmp /* 18th divide step */
787 addc retreg,retreg,retreg /* shift retreg with/into carry */
788 ds tmp,arg1, tmp /* 19th divide step */
789 addc retreg,retreg,retreg /* shift retreg with/into carry */
790 ds tmp,arg1, tmp /* 20th divide step */
791 addc retreg,retreg,retreg /* shift retreg with/into carry */
792 ds tmp,arg1, tmp /* 21st divide step */
793 addc retreg,retreg,retreg /* shift retreg with/into carry */
794 ds tmp,arg1, tmp /* 22nd divide step */
795 addc retreg,retreg,retreg /* shift retreg with/into carry */
796 ds tmp,arg1, tmp /* 23rd divide step */
797 addc retreg,retreg,retreg /* shift retreg with/into carry */
798 ds tmp,arg1, tmp /* 24th divide step */
799 addc retreg,retreg,retreg /* shift retreg with/into carry */
800 ds tmp,arg1, tmp /* 25th divide step */
801 addc retreg,retreg,retreg /* shift retreg with/into carry */
802 ds tmp,arg1, tmp /* 26th divide step */
803 addc retreg,retreg,retreg /* shift retreg with/into carry */
804 ds tmp,arg1, tmp /* 27th divide step */
805 addc retreg,retreg,retreg /* shift retreg with/into carry */
806 ds tmp,arg1, tmp /* 28th divide step */
807 addc retreg,retreg,retreg /* shift retreg with/into carry */
808 ds tmp,arg1, tmp /* 29th divide step */
809 addc retreg,retreg,retreg /* shift retreg with/into carry */
810 ds tmp,arg1, tmp /* 30th divide step */
811 addc retreg,retreg,retreg /* shift retreg with/into carry */
812 ds tmp,arg1, tmp /* 31st divide step */
813 addc retreg,retreg,retreg /* shift retreg with/into carry */
814 ds tmp,arg1, tmp /* 32nd divide step, */
815 addc retreg,retreg,retreg /* shift last bit into retreg */
816 movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */
817 add,< arg1,0,0 /* if arg1 > 0, add arg1 */
818 add,tr tmp,arg1,retreg /* for correcting remainder tmp */
819 sub tmp,arg1,retreg /* else add absolute value arg1 */
821 add,>= arg0,0,0 /* set sign of remainder */
822 sub 0,retreg,retreg /* to sign of dividend */
835 . Single precision divide for remainder with unsigned binary integers.
837 . The remainder must be dividend-(dividend/divisor)*divisor.
838 . Divide by zero is trapped.
844 . sr0 == return space when called externally
851 OTHER REGISTERS AFFECTED:
855 . Causes a trap under the following conditions: DIVIDE BY ZERO
856 . Changes memory at the following places: NONE
860 . Does not create a stack frame.
861 . Suitable for internal or external millicode.
862 . Assumes the special millicode register conventions.
865 . Calls other millicode routines using mrp: NONE
866 . Calls other millicode routines: NONE */
870 RDEFINE(rmndr,ret1) /* r29 */
873 .export $$remU,millicode
878 ldo -1(arg1),temp /* is there at most one bit set ? */
879 and,= arg1,temp,r0 /* if not, don't use power of 2 */
881 addit,= 0,arg1,r0 /* trap on div by zero */
882 and arg0,temp,rmndr /* get the result for power of 2 */
885 comib,>=,n 0,arg1,LREF(special_case)
886 subi 0,arg1,rmndr /* clear carry, negate the divisor */
887 ds r0,rmndr,r0 /* set V-bit to 1 */
888 add arg0,arg0,temp /* shift msb bit into carry */
889 ds r0,arg1,rmndr /* 1st divide step, if no carry */
890 addc temp,temp,temp /* shift temp with/into carry */
891 ds rmndr,arg1,rmndr /* 2nd divide step */
892 addc temp,temp,temp /* shift temp with/into carry */
893 ds rmndr,arg1,rmndr /* 3rd divide step */
894 addc temp,temp,temp /* shift temp with/into carry */
895 ds rmndr,arg1,rmndr /* 4th divide step */
896 addc temp,temp,temp /* shift temp with/into carry */
897 ds rmndr,arg1,rmndr /* 5th divide step */
898 addc temp,temp,temp /* shift temp with/into carry */
899 ds rmndr,arg1,rmndr /* 6th divide step */
900 addc temp,temp,temp /* shift temp with/into carry */
901 ds rmndr,arg1,rmndr /* 7th divide step */
902 addc temp,temp,temp /* shift temp with/into carry */
903 ds rmndr,arg1,rmndr /* 8th divide step */
904 addc temp,temp,temp /* shift temp with/into carry */
905 ds rmndr,arg1,rmndr /* 9th divide step */
906 addc temp,temp,temp /* shift temp with/into carry */
907 ds rmndr,arg1,rmndr /* 10th divide step */
908 addc temp,temp,temp /* shift temp with/into carry */
909 ds rmndr,arg1,rmndr /* 11th divide step */
910 addc temp,temp,temp /* shift temp with/into carry */
911 ds rmndr,arg1,rmndr /* 12th divide step */
912 addc temp,temp,temp /* shift temp with/into carry */
913 ds rmndr,arg1,rmndr /* 13th divide step */
914 addc temp,temp,temp /* shift temp with/into carry */
915 ds rmndr,arg1,rmndr /* 14th divide step */
916 addc temp,temp,temp /* shift temp with/into carry */
917 ds rmndr,arg1,rmndr /* 15th divide step */
918 addc temp,temp,temp /* shift temp with/into carry */
919 ds rmndr,arg1,rmndr /* 16th divide step */
920 addc temp,temp,temp /* shift temp with/into carry */
921 ds rmndr,arg1,rmndr /* 17th divide step */
922 addc temp,temp,temp /* shift temp with/into carry */
923 ds rmndr,arg1,rmndr /* 18th divide step */
924 addc temp,temp,temp /* shift temp with/into carry */
925 ds rmndr,arg1,rmndr /* 19th divide step */
926 addc temp,temp,temp /* shift temp with/into carry */
927 ds rmndr,arg1,rmndr /* 20th divide step */
928 addc temp,temp,temp /* shift temp with/into carry */
929 ds rmndr,arg1,rmndr /* 21st divide step */
930 addc temp,temp,temp /* shift temp with/into carry */
931 ds rmndr,arg1,rmndr /* 22nd divide step */
932 addc temp,temp,temp /* shift temp with/into carry */
933 ds rmndr,arg1,rmndr /* 23rd divide step */
934 addc temp,temp,temp /* shift temp with/into carry */
935 ds rmndr,arg1,rmndr /* 24th divide step */
936 addc temp,temp,temp /* shift temp with/into carry */
937 ds rmndr,arg1,rmndr /* 25th divide step */
938 addc temp,temp,temp /* shift temp with/into carry */
939 ds rmndr,arg1,rmndr /* 26th divide step */
940 addc temp,temp,temp /* shift temp with/into carry */
941 ds rmndr,arg1,rmndr /* 27th divide step */
942 addc temp,temp,temp /* shift temp with/into carry */
943 ds rmndr,arg1,rmndr /* 28th divide step */
944 addc temp,temp,temp /* shift temp with/into carry */
945 ds rmndr,arg1,rmndr /* 29th divide step */
946 addc temp,temp,temp /* shift temp with/into carry */
947 ds rmndr,arg1,rmndr /* 30th divide step */
948 addc temp,temp,temp /* shift temp with/into carry */
949 ds rmndr,arg1,rmndr /* 31st divide step */
950 addc temp,temp,temp /* shift temp with/into carry */
951 ds rmndr,arg1,rmndr /* 32nd divide step, */
952 comiclr,<= 0,rmndr,r0
953 add rmndr,arg1,rmndr /* correction */
957 /* Putting >= on the last DS and deleting COMICLR does not work! */
959 sub,>>= arg0,arg1,rmndr
977 . $$divI_10 $$divU_10
979 . $$divI_12 $$divU_12
981 . $$divI_14 $$divU_14
982 . $$divI_15 $$divU_15
984 . $$divI_17 $$divU_17
986 . Divide by selected constants for single precision binary integers.
991 . sr0 == return space when called externally
998 OTHER REGISTERS AFFECTED:
1002 . Causes a trap under the following conditions: NONE
1003 . Changes memory at the following places: NONE
1005 PERMISSIBLE CONTEXT:
1007 . Does not create a stack frame.
1008 . Suitable for internal or external millicode.
1009 . Assumes the special millicode register conventions.
1012 . Calls other millicode routines using mrp: NONE
1013 . Calls other millicode routines: NONE */
1016 /* TRUNCATED DIVISION BY SMALL INTEGERS
1018 We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
1021 Let a = floor(z/y), for some choice of z. Note that z will be
1022 chosen so that division by z is cheap.
1024 Let r be the remainder(z/y). In other words, r = z - ay.
1026 Now, our method is to choose a value for b such that
1028 q'(x) = floor((ax+b)/z)
1030 is equal to q(x) over as large a range of x as possible. If the
1031 two are equal over a sufficiently large range, and if it is easy to
1032 form the product (ax), and it is easy to divide by z, then we can
1033 perform the division much faster than the general division algorithm.
1035 So, we want the following to be true:
1037 . For x in the following range:
1043 . k <= (ax+b)/z < (k+1)
1045 We want to determine b such that this is true for all k in the
1046 range {0..K} for some maximum K.
1048 Since (ax+b) is an increasing function of x, we can take each
1049 bound separately to determine the "best" value for b.
1051 (ax+b)/z < (k+1) implies
1053 (a((k+1)y-1)+b < (k+1)z implies
1055 b < a + (k+1)(z-ay) implies
1059 This needs to be true for all k in the range {0..K}. In
1060 particular, it is true for k = 0 and this leads to a maximum
1061 acceptable value for b.
1063 b < a+r or b <= a+r-1
1065 Taking the other bound, we have
1067 k <= (ax+b)/z implies
1069 k <= (aky+b)/z implies
1071 k(z-ay) <= b implies
1075 Clearly, the largest range for k will be achieved by maximizing b,
1076 when r is not zero. When r is zero, then the simplest choice for b
1077 is 0. When r is not 0, set
1081 Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
1082 for all x in the range:
1086 We need to determine what K is. Of our two bounds,
1088 . b < a+(k+1)r is satisfied for all k >= 0, by construction.
1094 This is always true if r = 0. If r is not 0 (the usual case), then
1095 K = floor((a+r-1)/r), is the maximum value for k.
1097 Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
1098 answer for q(x) = floor(x/y) when x is in the range
1100 (0,(K+1)y-1) K = floor((a+r-1)/r)
1102 To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
1103 the formula for q'(x) yields the correct value of q(x) for all x
1104 representable by a single word in HPPA.
1106 We are also constrained in that computing the product (ax), adding
1107 b, and dividing by z must all be done quickly, otherwise we will be
1108 better off going through the general algorithm using the DS
1109 instruction, which uses approximately 70 cycles.
1111 For each y, there is a choice of z which satisfies the constraints
1112 for (K+1)y >= 2**32. We may not, however, be able to satisfy the
1113 timing constraints for arbitrary y. It seems that z being equal to
1114 a power of 2 or a power of 2 minus 1 is as good as we can do, since
1115 it minimizes the time to do division by z. We want the choice of z
1116 to also result in a value for (a) that minimizes the computation of
1117 the product (ax). This is best achieved if (a) has a regular bit
1118 pattern (so the multiplication can be done with shifts and adds).
1119 The value of (a) also needs to be less than 2**32 so the product is
1120 always guaranteed to fit in 2 words.
1122 In actual practice, the following should be done:
1124 1) For negative x, you should take the absolute value and remember
1125 . the fact so that the result can be negated. This obviously does
1126 . not apply in the unsigned case.
1127 2) For even y, you should factor out the power of 2 that divides y
1128 . and divide x by it. You can then proceed by dividing by the
1131 Here is a table of some odd values of y, and corresponding choices
1132 for z which are "good".
1134 y z r a (hex) max x (hex)
1136 3 2**32 1 55555555 100000001
1137 5 2**32 1 33333333 100000003
1138 7 2**24-1 0 249249 (infinite)
1139 9 2**24-1 0 1c71c7 (infinite)
1140 11 2**20-1 0 1745d (infinite)
1141 13 2**24-1 0 13b13b (infinite)
1142 15 2**32 1 11111111 10000000d
1143 17 2**32 1 f0f0f0f 10000000f
1145 If r is 1, then b = a+r-1 = a. This simplifies the computation
1146 of (ax+b), since you can compute (x+1)(a) instead. If r is 0,
1147 then b = 0 is ok to use which simplifies (ax+b).
1149 The bit patterns for 55555555, 33333333, and 11111111 are obviously
1150 very regular. The bit patterns for the other values of a above are:
1154 7 249249 001001001001001001001001 << regular >>
1155 9 1c71c7 000111000111000111000111 << regular >>
1156 11 1745d 000000010111010001011101 << irregular >>
1157 13 13b13b 000100111011000100111011 << irregular >>
1159 The bit patterns for (a) corresponding to (y) of 11 and 13 may be
1160 too irregular to warrant using this method.
1162 When z is a power of 2 minus 1, then the division by z is slightly
1163 more complicated, involving an iterative solution.
1165 The code presented here solves division by 1 through 17, except for
1166 11 and 13. There are algorithms for both signed and unsigned
1171 divisor positive negative unsigned
1186 Now, the algorithm for 7, 9, and 14 is an iterative one. That is,
1187 a loop body is executed until the tentative quotient is 0. The
1188 number of times the loop body is executed varies depending on the
1189 dividend, but is never more than two times. If the dividend is
1190 less than the divisor, then the loop body is not executed at all.
1191 Each iteration adds 4 cycles to the timings.
1193 divisor positive negative unsigned
1195 . 7 19+4n 20+4n 20+4n n = number of iterations
1196 . 9 21+4n 22+4n 21+4n
1197 . 14 21+4n 22+4n 20+4n
1199 To give an idea of how the number of iterations varies, here is a
1200 table of dividend versus number of iterations when dividing by 7.
1202 smallest largest required
1203 dividend dividend iterations
1207 0x1000006 0xffffffff 2
1209 There is some overlap in the range of numbers requiring 1 and 2
1213 RDEFINE(x2,arg0) /* r26 */
1214 RDEFINE(t1,arg1) /* r25 */
1215 RDEFINE(x1,ret1) /* r29 */
1223 /* NONE of these routines require a stack frame
1224 ALL of these routines are unwindable from millicode */
1226 GSYM($$divide_by_constant)
1227 .export $$divide_by_constant,millicode
1228 /* Provides a "nice" label for the code covered by the unwind descriptor
1229 for things like gprof. */
1231 /* DIVISION BY 2 (shift by 1) */
1233 .export $$divI_2,millicode
1237 extrs arg0,30,31,ret1
1240 /* DIVISION BY 4 (shift by 2) */
1242 .export $$divI_4,millicode
1246 extrs arg0,29,30,ret1
1249 /* DIVISION BY 8 (shift by 3) */
1251 .export $$divI_8,millicode
1255 extrs arg0,28,29,ret1
1257 /* DIVISION BY 16 (shift by 4) */
1259 .export $$divI_16,millicode
1263 extrs arg0,27,28,ret1
1265 /****************************************************************************
1267 * DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
1269 * includes 3,5,15,17 and also 6,10,12
1271 ****************************************************************************/
1273 /* DIVISION BY 3 (use z = 2**32; a = 55555555) */
1276 .export $$divI_3,millicode
1277 comb,<,N x2,0,LREF(neg3)
1279 addi 1,x2,x2 /* this can not overflow */
1280 extru x2,1,2,x1 /* multiply by 5 to get started */
1286 subi 1,x2,x2 /* this can not overflow */
1287 extru x2,1,2,x1 /* multiply by 5 to get started */
1293 .export $$divU_3,millicode
1294 addi 1,x2,x2 /* this CAN overflow */
1296 shd x1,x2,30,t1 /* multiply by 5 to get started */
1301 /* DIVISION BY 5 (use z = 2**32; a = 33333333) */
1304 .export $$divI_5,millicode
1305 comb,<,N x2,0,LREF(neg5)
1307 addi 3,x2,t1 /* this can not overflow */
1308 sh1add x2,t1,x2 /* multiply by 3 to get started */
1313 sub 0,x2,x2 /* negate x2 */
1314 addi 1,x2,x2 /* this can not overflow */
1315 shd 0,x2,31,x1 /* get top bit (can be 1) */
1316 sh1add x2,x2,x2 /* multiply by 3 to get started */
1321 .export $$divU_5,millicode
1322 addi 1,x2,x2 /* this CAN overflow */
1324 shd x1,x2,31,t1 /* multiply by 3 to get started */
1329 /* DIVISION BY 6 (shift to divide by 2 then divide by 3) */
1331 .export $$divI_6,millicode
1332 comb,<,N x2,0,LREF(neg6)
1333 extru x2,30,31,x2 /* divide by 2 */
1334 addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */
1335 sh2add x2,t1,x2 /* multiply by 5 to get started */
1340 subi 2,x2,x2 /* negate, divide by 2, and add 1 */
1341 /* negation and adding 1 are done */
1342 /* at the same time by the SUBI */
1345 sh2add x2,x2,x2 /* multiply by 5 to get started */
1350 .export $$divU_6,millicode
1351 extru x2,30,31,x2 /* divide by 2 */
1352 addi 1,x2,x2 /* can not carry */
1353 shd 0,x2,30,x1 /* multiply by 5 to get started */
1358 /* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
1360 .export $$divU_10,millicode
1361 extru x2,30,31,x2 /* divide by 2 */
1362 addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */
1363 sh1add x2,t1,x2 /* multiply by 3 to get started */
1366 shd x1,x2,28,t1 /* multiply by 0x11 */
1371 shd x1,x2,24,t1 /* multiply by 0x101 */
1376 shd x1,x2,16,t1 /* multiply by 0x10001 */
1383 .export $$divI_10,millicode
1384 comb,< x2,0,LREF(neg10)
1386 extru x2,30,31,x2 /* divide by 2 */
1387 addib,TR 1,x2,LREF(pos) /* add 1 (can not overflow) */
1388 sh1add x2,x2,x2 /* multiply by 3 to get started */
1391 subi 2,x2,x2 /* negate, divide by 2, and add 1 */
1392 /* negation and adding 1 are done */
1393 /* at the same time by the SUBI */
1395 sh1add x2,x2,x2 /* multiply by 3 to get started */
1397 shd x1,x2,28,t1 /* multiply by 0x11 */
1402 shd x1,x2,24,t1 /* multiply by 0x101 */
1407 shd x1,x2,16,t1 /* multiply by 0x10001 */
1414 /* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
1416 .export $$divI_12,millicode
1417 comb,< x2,0,LREF(neg12)
1419 extru x2,29,30,x2 /* divide by 4 */
1420 addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */
1421 sh2add x2,x2,x2 /* multiply by 5 to get started */
1424 subi 4,x2,x2 /* negate, divide by 4, and add 1 */
1425 /* negation and adding 1 are done */
1426 /* at the same time by the SUBI */
1429 sh2add x2,x2,x2 /* multiply by 5 to get started */
1432 .export $$divU_12,millicode
1433 extru x2,29,30,x2 /* divide by 4 */
1434 addi 5,x2,t1 /* can not carry */
1435 sh2add x2,t1,x2 /* multiply by 5 to get started */
1439 /* DIVISION BY 15 (use z = 2**32; a = 11111111) */
1441 .export $$divI_15,millicode
1442 comb,< x2,0,LREF(neg15)
1444 addib,tr 1,x2,LREF(pos)+4
1452 .export $$divU_15,millicode
1453 addi 1,x2,x2 /* this CAN overflow */
1457 /* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */
1459 .export $$divI_17,millicode
1460 comb,<,n x2,0,LREF(neg17)
1461 addi 1,x2,x2 /* this can not overflow */
1462 shd 0,x2,28,t1 /* multiply by 0xf to get started */
1469 subi 1,x2,x2 /* this can not overflow */
1470 shd 0,x2,28,t1 /* multiply by 0xf to get started */
1477 .export $$divU_17,millicode
1478 addi 1,x2,x2 /* this CAN overflow */
1480 shd x1,x2,28,t1 /* multiply by 0xf to get started */
1488 /* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
1489 includes 7,9 and also 14
1497 Also, in order to divide by z = 2**24-1, we approximate by dividing
1498 by (z+1) = 2**24 (which is easy), and then correcting.
1503 So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
1504 Then the true remainder of (ax)/z is (q'+r). Repeat the process
1505 with this new remainder, adding the tentative quotients together,
1506 until a tentative quotient is 0 (and then we are done). There is
1507 one last correction to be done. It is possible that (q'+r) = z.
1508 If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But,
1509 in fact, we need to add 1 more to the quotient. Now, it turns
1510 out that this happens if and only if the original value x is
1511 an exact multiple of y. So, to avoid a three instruction test at
1512 the end, instead use 1 instruction to add 1 to x at the beginning. */
1514 /* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
1516 .export $$divI_7,millicode
1517 comb,<,n x2,0,LREF(neg7)
1519 addi 1,x2,x2 /* can not overflow */
1534 /* computed <t1,x2>. Now divide it by (2**24 - 1) */
1537 shd,= t1,x2,24,t1 /* tentative quotient */
1539 addb,tr t1,x1,LREF(2) /* add to previous quotient */
1540 extru x2,31,24,x2 /* new remainder (unadjusted) */
1545 addb,tr t1,x2,LREF(1) /* adjust remainder */
1546 extru,= x2,7,8,t1 /* new quotient */
1549 subi 1,x2,x2 /* negate x2 and add 1 */
1566 /* computed <t1,x2>. Now divide it by (2**24 - 1) */
1569 shd,= t1,x2,24,t1 /* tentative quotient */
1571 addb,tr t1,x1,LREF(4) /* add to previous quotient */
1572 extru x2,31,24,x2 /* new remainder (unadjusted) */
1575 sub 0,x1,x1 /* negate result */
1578 addb,tr t1,x2,LREF(3) /* adjust remainder */
1579 extru,= x2,7,8,t1 /* new quotient */
1582 .export $$divU_7,millicode
1583 addi 1,x2,x2 /* can carry */
1590 /* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
1592 .export $$divI_9,millicode
1593 comb,<,n x2,0,LREF(neg9)
1594 addi 1,x2,x2 /* can not overflow */
1602 subi 1,x2,x2 /* negate and add 1 */
1610 .export $$divU_9,millicode
1611 addi 1,x2,x2 /* can carry */
1619 /* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
1621 .export $$divI_14,millicode
1622 comb,<,n x2,0,LREF(neg14)
1624 .export $$divU_14,millicode
1625 b LREF(7) /* go to 7 case */
1626 extru x2,30,31,x2 /* divide by 2 */
1629 subi 2,x2,x2 /* negate (and add 2) */
1631 extru x2,30,31,x2 /* divide by 2 */
1638 /* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
1639 /******************************************************************************
1640 This routine is used on PA2.0 processors when gcc -mno-fpregs is used
1647 $$mulI multiplies two single word integers, giving a single
1656 sr0 == return space when called externally
1665 OTHER REGISTERS AFFECTED:
1671 Causes a trap under the following conditions: NONE
1672 Changes memory at the following places: NONE
1674 PERMISSIBLE CONTEXT:
1677 Does not create a stack frame
1678 Is usable for internal or external microcode
1682 Calls other millicode routines via mrp: NONE
1683 Calls other millicode routines: NONE
1685 ***************************************************************************/
1693 #define a0__128a0 zdep a0,24,25,a0
1694 #define a0__256a0 zdep a0,23,24,a0
1695 #define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0)
1696 #define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1)
1697 #define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2)
1698 #define b_n_ret_t0 b,n LREF(ret_t0)
1699 #define b_e_shift b LREF(e_shift)
1700 #define b_e_t0ma0 b LREF(e_t0ma0)
1701 #define b_e_t0 b LREF(e_t0)
1702 #define b_e_t0a0 b LREF(e_t0a0)
1703 #define b_e_t02a0 b LREF(e_t02a0)
1704 #define b_e_t04a0 b LREF(e_t04a0)
1705 #define b_e_2t0 b LREF(e_2t0)
1706 #define b_e_2t0a0 b LREF(e_2t0a0)
1707 #define b_e_2t04a0 b LREF(e2t04a0)
1708 #define b_e_3t0 b LREF(e_3t0)
1709 #define b_e_4t0 b LREF(e_4t0)
1710 #define b_e_4t0a0 b LREF(e_4t0a0)
1711 #define b_e_4t08a0 b LREF(e4t08a0)
1712 #define b_e_5t0 b LREF(e_5t0)
1713 #define b_e_8t0 b LREF(e_8t0)
1714 #define b_e_8t0a0 b LREF(e_8t0a0)
1715 #define r__r_a0 add r,a0,r
1716 #define r__r_2a0 sh1add a0,r,r
1717 #define r__r_4a0 sh2add a0,r,r
1718 #define r__r_8a0 sh3add a0,r,r
1719 #define r__r_t0 add r,t0,r
1720 #define r__r_2t0 sh1add t0,r,r
1721 #define r__r_4t0 sh2add t0,r,r
1722 #define r__r_8t0 sh3add t0,r,r
1723 #define t0__3a0 sh1add a0,a0,t0
1724 #define t0__4a0 sh2add a0,0,t0
1725 #define t0__5a0 sh2add a0,a0,t0
1726 #define t0__8a0 sh3add a0,0,t0
1727 #define t0__9a0 sh3add a0,a0,t0
1728 #define t0__16a0 zdep a0,27,28,t0
1729 #define t0__32a0 zdep a0,26,27,t0
1730 #define t0__64a0 zdep a0,25,26,t0
1731 #define t0__128a0 zdep a0,24,25,t0
1732 #define t0__t0ma0 sub t0,a0,t0
1733 #define t0__t0_a0 add t0,a0,t0
1734 #define t0__t0_2a0 sh1add a0,t0,t0
1735 #define t0__t0_4a0 sh2add a0,t0,t0
1736 #define t0__t0_8a0 sh3add a0,t0,t0
1737 #define t0__2t0_a0 sh1add t0,a0,t0
1738 #define t0__3t0 sh1add t0,t0,t0
1739 #define t0__4t0 sh2add t0,0,t0
1740 #define t0__4t0_a0 sh2add t0,a0,t0
1741 #define t0__5t0 sh2add t0,t0,t0
1742 #define t0__8t0 sh3add t0,0,t0
1743 #define t0__8t0_a0 sh3add t0,a0,t0
1744 #define t0__9t0 sh3add t0,t0,t0
1745 #define t0__16t0 zdep t0,27,28,t0
1746 #define t0__32t0 zdep t0,26,27,t0
1747 #define t0__256a0 zdep a0,23,24,t0
1755 .export $$mulI, millicode
1757 combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */
1758 copy 0,r /* zero out the result */
1759 xor a0,a1,a0 /* swap a0 & a1 using the */
1760 xor a0,a1,a1 /* old xor trick */
1763 combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */
1764 zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
1765 sub,> 0,a1,t0 /* otherwise negate both and */
1766 combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */
1768 movb,tr,n t0,a0,LREF(l2) /* 10th inst. */
1770 LSYM(l0) r__r_t0 /* add in this partial product */
1771 LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */
1772 LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
1773 LSYM(l3) blr t0,0 /* case on these 8 bits ****** */
1774 extru a1,23,24,a1 /* a1 >>= 8 ****************** */
1776 /*16 insts before this. */
1777 /* a0 <<= 8 ************************** */
1778 LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop
1779 LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop
1780 LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop
1781 LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0
1782 LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop
1783 LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0
1784 LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1785 LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0
1786 LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop
1787 LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0
1788 LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1789 LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
1790 LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1791 LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
1792 LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1793 LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0
1794 LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1795 LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0
1796 LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1797 LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0
1798 LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1799 LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1800 LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1801 LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
1802 LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1803 LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
1804 LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1805 LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0
1806 LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1807 LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1808 LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
1809 LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1810 LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1811 LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1812 LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1813 LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0
1814 LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1815 LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1816 LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1817 LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
1818 LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1819 LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
1820 LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1821 LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1822 LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1823 LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
1824 LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0
1825 LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0
1826 LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0
1827 LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0
1828 LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
1829 LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0
1830 LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1831 LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1832 LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
1833 LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0
1834 LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1835 LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0
1836 LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1837 LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0
1838 LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1839 LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1840 LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
1841 LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1842 LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1843 LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
1844 LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1845 LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1846 LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1847 LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1848 LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0
1849 LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0
1850 LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1851 LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0
1852 LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1853 LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1854 LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1855 LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1856 LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0
1857 LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
1858 LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0
1859 LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0
1860 LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
1861 LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1862 LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1863 LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
1864 LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1865 LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0
1866 LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1867 LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1868 LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
1869 LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0
1870 LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
1871 LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0
1872 LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0
1873 LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
1874 LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1875 LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1876 LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0
1877 LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
1878 LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
1879 LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
1880 LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
1881 LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0
1882 LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
1883 LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
1884 LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0
1885 LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0
1886 LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1887 LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1888 LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0
1889 LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
1890 LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0
1891 LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0
1892 LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0
1893 LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0
1894 LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0
1895 LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
1896 LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0
1897 LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0
1898 LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
1899 LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
1900 LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
1901 LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
1902 LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
1903 LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
1904 LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
1905 LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1906 LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1907 LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0
1908 LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1909 LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1910 LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1911 LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1912 LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1913 LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0
1914 LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1915 LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1916 LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1917 LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0
1918 LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0
1919 LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0
1920 LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0
1921 LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0
1922 LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0
1923 LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0
1924 LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
1925 LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1926 LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1927 LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1928 LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1929 LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
1930 LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1931 LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1932 LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1933 LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0
1934 LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
1935 LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0
1936 LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0
1937 LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
1938 LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0
1939 LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
1940 LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0
1941 LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0
1942 LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0
1943 LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
1944 LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0
1945 LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
1946 LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
1947 LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0
1948 LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0
1949 LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0
1950 LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0
1951 LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0
1952 LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0
1953 LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0
1954 LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0
1955 LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0
1956 LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0
1957 LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0
1958 LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
1959 LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
1960 LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0
1961 LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0
1962 LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0
1963 LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
1964 LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0
1965 LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0
1966 LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0
1967 LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
1968 LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0
1969 LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0
1970 LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
1971 LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
1972 LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
1973 LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
1974 LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0
1975 LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0
1976 LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
1977 LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
1978 LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0
1979 LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0
1980 LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0
1981 LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0
1982 LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
1983 LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0
1984 LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0
1985 LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
1986 LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0
1987 LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0
1988 LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0
1989 LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0
1990 LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0
1991 LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0
1992 LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0
1993 LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0
1994 LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
1995 LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
1996 LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
1997 LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
1998 LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0
1999 LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0
2000 LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0
2001 LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
2002 LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0
2003 LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
2004 LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0
2005 LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0
2006 LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
2007 LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0
2008 LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0
2009 LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
2010 LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0
2011 LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0
2012 LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0
2013 LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0
2014 LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0
2015 LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0
2016 LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0
2017 LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0
2018 LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0
2019 LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0
2020 LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0
2021 LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0
2022 LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0
2023 LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0
2024 LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0
2025 LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0
2026 LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0
2027 LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0
2028 LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0
2029 LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0
2030 LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
2031 LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0
2032 LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
2033 LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
2034 /*1040 insts before this. */
2035 LSYM(ret_t0) MILLIRET
2037 LSYM(e_shift) a1_ne_0_b_l2
2038 a0__256a0 /* a0 <<= 8 *********** */
2040 LSYM(e_t0ma0) a1_ne_0_b_l0
2044 LSYM(e_t0a0) a1_ne_0_b_l0
2048 LSYM(e_t02a0) a1_ne_0_b_l0
2052 LSYM(e_t04a0) a1_ne_0_b_l0
2056 LSYM(e_2t0) a1_ne_0_b_l1
2059 LSYM(e_2t0a0) a1_ne_0_b_l0
2063 LSYM(e2t04a0) t0__t0_2a0
2067 LSYM(e_3t0) a1_ne_0_b_l0
2071 LSYM(e_4t0) a1_ne_0_b_l1
2074 LSYM(e_4t0a0) a1_ne_0_b_l0
2078 LSYM(e4t08a0) t0__t0_2a0
2082 LSYM(e_5t0) a1_ne_0_b_l0
2086 LSYM(e_8t0) a1_ne_0_b_l1
2089 LSYM(e_8t0a0) a1_ne_0_b_l0