1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
27 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
32 #include "insn-attr.h"
41 #include "integrate.h"
50 #include "target-def.h"
52 #ifndef DO_FRAME_NOTES
53 #ifdef INCOMING_RETURN_ADDR_RTX
54 #define DO_FRAME_NOTES 1
56 #define DO_FRAME_NOTES 0
60 #ifndef FUNC_BEGIN_PROLOG_LABEL
61 #define FUNC_BEGIN_PROLOG_LABEL "LFBP"
64 static inline rtx force_mode PARAMS ((enum machine_mode, rtx));
65 static void pa_combine_instructions PARAMS ((rtx));
66 static int pa_can_combine_p PARAMS ((rtx, rtx, rtx, int, rtx, rtx, rtx));
67 static int forward_branch_p PARAMS ((rtx));
68 static int shadd_constant_p PARAMS ((int));
69 static void pa_add_gc_roots PARAMS ((void));
70 static void mark_deferred_plabels PARAMS ((void *));
71 static void compute_zdepwi_operands PARAMS ((unsigned HOST_WIDE_INT, unsigned *));
72 static int compute_movstrsi_length PARAMS ((rtx));
73 static bool pa_assemble_integer PARAMS ((rtx, unsigned int, int));
74 static void remove_useless_addtr_insns PARAMS ((rtx, int));
75 static void store_reg PARAMS ((int, int, int));
76 static void store_reg_modify PARAMS ((int, int, int));
77 static void load_reg PARAMS ((int, int, int));
78 static void set_reg_plus_d PARAMS ((int, int, int, int));
79 static void pa_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
80 static int pa_adjust_cost PARAMS ((rtx, rtx, rtx, int));
81 static int pa_adjust_priority PARAMS ((rtx, int));
82 static int pa_issue_rate PARAMS ((void));
84 /* Save the operands last given to a compare for use when we
85 generate a scc or bcc insn. */
87 rtx hppa_compare_op0, hppa_compare_op1;
88 enum cmp_type hppa_branch_type;
90 /* Which cpu we are scheduling for. */
91 enum processor_type pa_cpu;
93 /* String to hold which cpu we are scheduling for. */
94 const char *pa_cpu_string;
96 /* Which architecture we are generating code for. */
97 enum architecture_type pa_arch;
99 /* String to hold which architecture we are generating code for. */
100 const char *pa_arch_string;
102 /* Counts for the number of callee-saved general and floating point
103 registers which were saved by the current function's prologue. */
104 static int gr_saved, fr_saved;
106 /* The number of the current function for which profile information
107 is to be collected. These numbers are used to create unique label
108 id's for labels emitted at the beginning of profiled functions. */
109 static unsigned int current_function_number = 0;
111 static rtx find_addr_reg PARAMS ((rtx));
113 /* Keep track of the number of bytes we have output in the CODE subspaces
114 during this compilation so we'll know when to emit inline long-calls. */
116 unsigned int total_code_bytes;
118 /* Variables to handle plabels that we discover are necessary at assembly
119 output time. They are output after the current function. */
121 struct deferred_plabel
125 } *deferred_plabels = 0;
126 int n_deferred_plabels = 0;
128 /* Initialize the GCC target structure. */
130 #undef TARGET_ASM_ALIGNED_HI_OP
131 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
132 #undef TARGET_ASM_ALIGNED_SI_OP
133 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
134 #undef TARGET_ASM_ALIGNED_DI_OP
135 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
136 #undef TARGET_ASM_UNALIGNED_HI_OP
137 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
138 #undef TARGET_ASM_UNALIGNED_SI_OP
139 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
140 #undef TARGET_ASM_UNALIGNED_DI_OP
141 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
142 #undef TARGET_ASM_INTEGER
143 #define TARGET_ASM_INTEGER pa_assemble_integer
145 #undef TARGET_ASM_FUNCTION_PROLOGUE
146 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
147 #undef TARGET_ASM_FUNCTION_EPILOGUE
148 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
150 #undef TARGET_SCHED_ADJUST_COST
151 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
152 #undef TARGET_SCHED_ADJUST_PRIORITY
153 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
154 #undef TARGET_SCHED_ISSUE_RATE
155 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
157 struct gcc_target targetm = TARGET_INITIALIZER;
162 /* Default to 7100LC scheduling. */
163 if (pa_cpu_string && ! strcmp (pa_cpu_string, "7100"))
165 pa_cpu_string = "7100";
166 pa_cpu = PROCESSOR_7100;
168 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "700"))
170 pa_cpu_string = "700";
171 pa_cpu = PROCESSOR_700;
173 else if (pa_cpu_string == NULL
174 || ! strcmp (pa_cpu_string, "7100LC"))
176 pa_cpu_string = "7100LC";
177 pa_cpu = PROCESSOR_7100LC;
179 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "7200"))
181 pa_cpu_string = "7200";
182 pa_cpu = PROCESSOR_7200;
184 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "8000"))
186 pa_cpu_string = "8000";
187 pa_cpu = PROCESSOR_8000;
191 warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, and 8000\n", pa_cpu_string);
194 /* Set the instruction set architecture. */
195 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
197 pa_arch_string = "1.0";
198 pa_arch = ARCHITECTURE_10;
199 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
201 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
203 pa_arch_string = "1.1";
204 pa_arch = ARCHITECTURE_11;
205 target_flags &= ~MASK_PA_20;
206 target_flags |= MASK_PA_11;
208 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
210 pa_arch_string = "2.0";
211 pa_arch = ARCHITECTURE_20;
212 target_flags |= MASK_PA_11 | MASK_PA_20;
214 else if (pa_arch_string)
216 warning ("unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
219 if (flag_pic && TARGET_PORTABLE_RUNTIME)
221 warning ("PIC code generation is not supported in the portable runtime model\n");
224 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
226 warning ("PIC code generation is not compatible with fast indirect calls\n");
229 if (! TARGET_GAS && write_symbols != NO_DEBUG)
231 warning ("-g is only supported when using GAS on this processor,");
232 warning ("-g option disabled");
233 write_symbols = NO_DEBUG;
236 /* We only support the "big PIC" model now. And we always generate PIC
237 code when in 64bit mode. */
238 if (flag_pic == 1 || TARGET_64BIT)
241 /* We can't guarantee that .dword is available for 32-bit targets. */
242 if (UNITS_PER_WORD == 4)
243 targetm.asm_out.aligned_op.di = NULL;
245 /* The unaligned ops are only available when using GAS. */
248 targetm.asm_out.unaligned_op.hi = NULL;
249 targetm.asm_out.unaligned_op.si = NULL;
250 targetm.asm_out.unaligned_op.di = NULL;
253 /* Register global variables with the garbage collector. */
257 /* Return non-zero only if OP is a register of mode MODE,
260 reg_or_0_operand (op, mode)
262 enum machine_mode mode;
264 return (op == CONST0_RTX (mode) || register_operand (op, mode));
267 /* Return non-zero if OP is suitable for use in a call to a named
270 For 2.5 try to eliminate either call_operand_address or
271 function_label_operand, they perform very similar functions. */
273 call_operand_address (op, mode)
275 enum machine_mode mode ATTRIBUTE_UNUSED;
277 return (GET_MODE (op) == word_mode
278 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
281 /* Return 1 if X contains a symbolic expression. We know these
282 expressions will have one of a few well defined forms, so
283 we need only check those forms. */
285 symbolic_expression_p (x)
289 /* Strip off any HIGH. */
290 if (GET_CODE (x) == HIGH)
293 return (symbolic_operand (x, VOIDmode));
297 symbolic_operand (op, mode)
299 enum machine_mode mode ATTRIBUTE_UNUSED;
301 switch (GET_CODE (op))
308 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
309 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
310 && GET_CODE (XEXP (op, 1)) == CONST_INT);
316 /* Return truth value of statement that OP is a symbolic memory
317 operand of mode MODE. */
320 symbolic_memory_operand (op, mode)
322 enum machine_mode mode ATTRIBUTE_UNUSED;
324 if (GET_CODE (op) == SUBREG)
325 op = SUBREG_REG (op);
326 if (GET_CODE (op) != MEM)
329 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
330 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
333 /* Return 1 if the operand is either a register or a memory operand that is
337 reg_or_nonsymb_mem_operand (op, mode)
339 enum machine_mode mode;
341 if (register_operand (op, mode))
344 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
350 /* Return 1 if the operand is either a register, zero, or a memory operand
351 that is not symbolic. */
354 reg_or_0_or_nonsymb_mem_operand (op, mode)
356 enum machine_mode mode;
358 if (register_operand (op, mode))
361 if (op == CONST0_RTX (mode))
364 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
370 /* Return 1 if the operand is a register operand or a non-symbolic memory
371 operand after reload. This predicate is used for branch patterns that
372 internally handle register reloading. We need to accept non-symbolic
373 memory operands after reload to ensure that the pattern is still valid
374 if reload didn't find a hard register for the operand. */
377 reg_before_reload_operand (op, mode)
379 enum machine_mode mode;
381 /* Don't accept a SUBREG since it will need a reload. */
382 if (GET_CODE (op) == SUBREG)
385 if (register_operand (op, mode))
389 && memory_operand (op, mode)
390 && ! symbolic_memory_operand (op, mode))
396 /* Accept any constant that can be moved in one instructions into a
399 cint_ok_for_move (intval)
400 HOST_WIDE_INT intval;
402 /* OK if ldo, ldil, or zdepi, can be used. */
403 return (CONST_OK_FOR_LETTER_P (intval, 'J')
404 || CONST_OK_FOR_LETTER_P (intval, 'N')
405 || CONST_OK_FOR_LETTER_P (intval, 'K'));
408 /* Accept anything that can be moved in one instruction into a general
411 move_operand (op, mode)
413 enum machine_mode mode;
415 if (register_operand (op, mode))
418 if (GET_CODE (op) == CONSTANT_P_RTX)
421 if (GET_CODE (op) == CONST_INT)
422 return cint_ok_for_move (INTVAL (op));
424 if (GET_CODE (op) == SUBREG)
425 op = SUBREG_REG (op);
426 if (GET_CODE (op) != MEM)
431 /* We consider a LO_SUM DLT reference a move_operand now since it has
432 been merged into the normal movsi/movdi patterns. */
433 if (GET_CODE (op) == LO_SUM
434 && GET_CODE (XEXP (op, 0)) == REG
435 && REG_OK_FOR_BASE_P (XEXP (op, 0))
436 && GET_CODE (XEXP (op, 1)) == UNSPEC
437 && GET_MODE (op) == Pmode)
440 /* Since move_operand is only used for source operands, we can always
441 allow scaled indexing! */
442 if (! TARGET_DISABLE_INDEXING
443 && GET_CODE (op) == PLUS
444 && ((GET_CODE (XEXP (op, 0)) == MULT
445 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
446 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
447 && INTVAL (XEXP (XEXP (op, 0), 1))
448 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
449 && GET_CODE (XEXP (op, 1)) == REG)
450 || (GET_CODE (XEXP (op, 1)) == MULT
451 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
452 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
453 && INTVAL (XEXP (XEXP (op, 1), 1))
454 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
455 && GET_CODE (XEXP (op, 0)) == REG)))
458 return memory_address_p (mode, op);
461 /* Accept REG and any CONST_INT that can be moved in one instruction into a
464 reg_or_cint_move_operand (op, mode)
466 enum machine_mode mode;
468 if (register_operand (op, mode))
471 if (GET_CODE (op) == CONST_INT)
472 return cint_ok_for_move (INTVAL (op));
478 pic_label_operand (op, mode)
480 enum machine_mode mode ATTRIBUTE_UNUSED;
485 switch (GET_CODE (op))
491 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
492 && GET_CODE (XEXP (op, 1)) == CONST_INT);
499 fp_reg_operand (op, mode)
501 enum machine_mode mode ATTRIBUTE_UNUSED;
503 return reg_renumber && FP_REG_P (op);
508 /* Return truth value of whether OP can be used as an operand in a
509 three operand arithmetic insn that accepts registers of mode MODE
510 or 14-bit signed integers. */
512 arith_operand (op, mode)
514 enum machine_mode mode;
516 return (register_operand (op, mode)
517 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
520 /* Return truth value of whether OP can be used as an operand in a
521 three operand arithmetic insn that accepts registers of mode MODE
522 or 11-bit signed integers. */
524 arith11_operand (op, mode)
526 enum machine_mode mode;
528 return (register_operand (op, mode)
529 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
532 /* A constant integer suitable for use in a PRE_MODIFY memory
535 pre_cint_operand (op, mode)
537 enum machine_mode mode ATTRIBUTE_UNUSED;
539 return (GET_CODE (op) == CONST_INT
540 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
543 /* A constant integer suitable for use in a POST_MODIFY memory
546 post_cint_operand (op, mode)
548 enum machine_mode mode ATTRIBUTE_UNUSED;
550 return (GET_CODE (op) == CONST_INT
551 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
555 arith_double_operand (op, mode)
557 enum machine_mode mode;
559 return (register_operand (op, mode)
560 || (GET_CODE (op) == CONST_DOUBLE
561 && GET_MODE (op) == mode
562 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
563 && ((CONST_DOUBLE_HIGH (op) >= 0)
564 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
567 /* Return truth value of whether OP is an integer which fits the
568 range constraining immediate operands in three-address insns, or
569 is an integer register. */
572 ireg_or_int5_operand (op, mode)
574 enum machine_mode mode ATTRIBUTE_UNUSED;
576 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
577 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
580 /* Return nonzero if OP is an integer register, else return zero. */
582 ireg_operand (op, mode)
584 enum machine_mode mode ATTRIBUTE_UNUSED;
586 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
589 /* Return truth value of whether OP is an integer which fits the
590 range constraining immediate operands in three-address insns. */
593 int5_operand (op, mode)
595 enum machine_mode mode ATTRIBUTE_UNUSED;
597 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
601 uint5_operand (op, mode)
603 enum machine_mode mode ATTRIBUTE_UNUSED;
605 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
609 int11_operand (op, mode)
611 enum machine_mode mode ATTRIBUTE_UNUSED;
613 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
617 uint32_operand (op, mode)
619 enum machine_mode mode ATTRIBUTE_UNUSED;
621 #if HOST_BITS_PER_WIDE_INT > 32
622 /* All allowed constants will fit a CONST_INT. */
623 return (GET_CODE (op) == CONST_INT
624 && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
626 return (GET_CODE (op) == CONST_INT
627 || (GET_CODE (op) == CONST_DOUBLE
628 && CONST_DOUBLE_HIGH (op) == 0));
633 arith5_operand (op, mode)
635 enum machine_mode mode;
637 return register_operand (op, mode) || int5_operand (op, mode);
640 /* True iff zdepi can be used to generate this CONST_INT.
641 zdepi first sign extends a 5 bit signed number to a given field
642 length, then places this field anywhere in a zero. */
645 unsigned HOST_WIDE_INT x;
647 unsigned HOST_WIDE_INT lsb_mask, t;
649 /* This might not be obvious, but it's at least fast.
650 This function is critical; we don't have the time loops would take. */
652 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
653 /* Return true iff t is a power of two. */
654 return ((t & (t - 1)) == 0);
657 /* True iff depi or extru can be used to compute (reg & mask).
658 Accept bit pattern like these:
664 unsigned HOST_WIDE_INT mask;
667 mask += mask & -mask;
668 return (mask & (mask - 1)) == 0;
671 /* True iff depi or extru can be used to compute (reg & OP). */
673 and_operand (op, mode)
675 enum machine_mode mode;
677 return (register_operand (op, mode)
678 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
681 /* True iff depi can be used to compute (reg | MASK). */
684 unsigned HOST_WIDE_INT mask;
686 mask += mask & -mask;
687 return (mask & (mask - 1)) == 0;
690 /* True iff depi can be used to compute (reg | OP). */
692 ior_operand (op, mode)
694 enum machine_mode mode ATTRIBUTE_UNUSED;
696 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
700 lhs_lshift_operand (op, mode)
702 enum machine_mode mode;
704 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
707 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
708 Such values can be the left hand side x in (x << r), using the zvdepi
711 lhs_lshift_cint_operand (op, mode)
713 enum machine_mode mode ATTRIBUTE_UNUSED;
715 unsigned HOST_WIDE_INT x;
716 if (GET_CODE (op) != CONST_INT)
718 x = INTVAL (op) >> 4;
719 return (x & (x + 1)) == 0;
723 arith32_operand (op, mode)
725 enum machine_mode mode;
727 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
731 pc_or_label_operand (op, mode)
733 enum machine_mode mode ATTRIBUTE_UNUSED;
735 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
738 /* Legitimize PIC addresses. If the address is already
739 position-independent, we return ORIG. Newly generated
740 position-independent addresses go to REG. If we need more
741 than one register, we lose. */
744 legitimize_pic_address (orig, mode, reg)
746 enum machine_mode mode;
750 /* Labels need special handling. */
751 if (pic_label_operand (orig, mode))
753 /* We do not want to go through the movXX expanders here since that
754 would create recursion.
756 Nor do we really want to call a generator for a named pattern
757 since that requires multiple patterns if we want to support
760 So instead we just emit the raw set, which avoids the movXX
761 expanders completely. */
762 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
763 current_function_uses_pic_offset_table = 1;
766 if (GET_CODE (orig) == SYMBOL_REF)
772 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
773 gen_rtx_HIGH (word_mode, orig)));
775 = gen_rtx_MEM (Pmode,
776 gen_rtx_LO_SUM (Pmode, reg,
777 gen_rtx_UNSPEC (Pmode,
781 current_function_uses_pic_offset_table = 1;
782 RTX_UNCHANGING_P (pic_ref) = 1;
783 emit_move_insn (reg, pic_ref);
786 else if (GET_CODE (orig) == CONST)
790 if (GET_CODE (XEXP (orig, 0)) == PLUS
791 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
797 if (GET_CODE (XEXP (orig, 0)) == PLUS)
799 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
800 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
801 base == reg ? 0 : reg);
804 if (GET_CODE (orig) == CONST_INT)
806 if (INT_14_BITS (orig))
807 return plus_constant (base, INTVAL (orig));
808 orig = force_reg (Pmode, orig);
810 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
811 /* Likewise, should we set special REG_NOTEs here? */
816 /* Try machine-dependent ways of modifying an illegitimate address
817 to be legitimate. If we find one, return the new, valid address.
818 This macro is used in only one place: `memory_address' in explow.c.
820 OLDX is the address as it was before break_out_memory_refs was called.
821 In some cases it is useful to look at this to decide what needs to be done.
823 MODE and WIN are passed so that this macro can use
824 GO_IF_LEGITIMATE_ADDRESS.
826 It is always safe for this macro to do nothing. It exists to recognize
827 opportunities to optimize the output.
829 For the PA, transform:
831 memory(X + <large int>)
835 if (<large int> & mask) >= 16
836 Y = (<large int> & ~mask) + mask + 1 Round up.
838 Y = (<large int> & ~mask) Round down.
840 memory (Z + (<large int> - Y));
842 This is for CSE to find several similar references, and only use one Z.
844 X can either be a SYMBOL_REF or REG, but because combine can not
845 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
846 D will not fit in 14 bits.
848 MODE_FLOAT references allow displacements which fit in 5 bits, so use
851 MODE_INT references allow displacements which fit in 14 bits, so use
854 This relies on the fact that most mode MODE_FLOAT references will use FP
855 registers and most mode MODE_INT references will use integer registers.
856 (In the rare case of an FP register used in an integer MODE, we depend
857 on secondary reloads to clean things up.)
860 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
861 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
862 addressing modes to be used).
864 Put X and Z into registers. Then put the entire expression into
868 hppa_legitimize_address (x, oldx, mode)
869 rtx x, oldx ATTRIBUTE_UNUSED;
870 enum machine_mode mode;
875 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
877 /* Strip off CONST. */
878 if (GET_CODE (x) == CONST)
881 /* Special case. Get the SYMBOL_REF into a register and use indexing.
882 That should always be safe. */
883 if (GET_CODE (x) == PLUS
884 && GET_CODE (XEXP (x, 0)) == REG
885 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
887 rtx reg = force_reg (Pmode, XEXP (x, 1));
888 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
891 /* Note we must reject symbols which represent function addresses
892 since the assembler/linker can't handle arithmetic on plabels. */
893 if (GET_CODE (x) == PLUS
894 && GET_CODE (XEXP (x, 1)) == CONST_INT
895 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
896 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
897 || GET_CODE (XEXP (x, 0)) == REG))
899 rtx int_part, ptr_reg;
901 int offset = INTVAL (XEXP (x, 1));
904 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
905 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
907 /* Choose which way to round the offset. Round up if we
908 are >= halfway to the next boundary. */
909 if ((offset & mask) >= ((mask + 1) / 2))
910 newoffset = (offset & ~ mask) + mask + 1;
912 newoffset = (offset & ~ mask);
914 /* If the newoffset will not fit in 14 bits (ldo), then
915 handling this would take 4 or 5 instructions (2 to load
916 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
917 add the new offset and the SYMBOL_REF.) Combine can
918 not handle 4->2 or 5->2 combinations, so do not create
920 if (! VAL_14_BITS_P (newoffset)
921 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
923 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
926 gen_rtx_HIGH (Pmode, const_part));
929 gen_rtx_LO_SUM (Pmode,
930 tmp_reg, const_part));
934 if (! VAL_14_BITS_P (newoffset))
935 int_part = force_reg (Pmode, GEN_INT (newoffset));
937 int_part = GEN_INT (newoffset);
939 ptr_reg = force_reg (Pmode,
941 force_reg (Pmode, XEXP (x, 0)),
944 return plus_constant (ptr_reg, offset - newoffset);
947 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
949 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
950 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
951 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
952 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
953 || GET_CODE (XEXP (x, 1)) == SUBREG)
954 && GET_CODE (XEXP (x, 1)) != CONST)
956 int val = INTVAL (XEXP (XEXP (x, 0), 1));
960 if (GET_CODE (reg1) != REG)
961 reg1 = force_reg (Pmode, force_operand (reg1, 0));
963 reg2 = XEXP (XEXP (x, 0), 0);
964 if (GET_CODE (reg2) != REG)
965 reg2 = force_reg (Pmode, force_operand (reg2, 0));
967 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
974 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
976 Only do so for floating point modes since this is more speculative
977 and we lose if it's an integer store. */
978 if (GET_CODE (x) == PLUS
979 && GET_CODE (XEXP (x, 0)) == PLUS
980 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
981 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
982 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
983 && (mode == SFmode || mode == DFmode))
986 /* First, try and figure out what to use as a base register. */
987 rtx reg1, reg2, base, idx, orig_base;
989 reg1 = XEXP (XEXP (x, 0), 1);
994 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
995 then emit_move_sequence will turn on REG_POINTER so we'll know
996 it's a base register below. */
997 if (GET_CODE (reg1) != REG)
998 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1000 if (GET_CODE (reg2) != REG)
1001 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1003 /* Figure out what the base and index are. */
1005 if (GET_CODE (reg1) == REG
1006 && REG_POINTER (reg1))
1009 orig_base = XEXP (XEXP (x, 0), 1);
1010 idx = gen_rtx_PLUS (Pmode,
1011 gen_rtx_MULT (Pmode,
1012 XEXP (XEXP (XEXP (x, 0), 0), 0),
1013 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1016 else if (GET_CODE (reg2) == REG
1017 && REG_POINTER (reg2))
1020 orig_base = XEXP (x, 1);
1027 /* If the index adds a large constant, try to scale the
1028 constant so that it can be loaded with only one insn. */
1029 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1030 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1031 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1032 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1034 /* Divide the CONST_INT by the scale factor, then add it to A. */
1035 int val = INTVAL (XEXP (idx, 1));
1037 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1038 reg1 = XEXP (XEXP (idx, 0), 0);
1039 if (GET_CODE (reg1) != REG)
1040 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1042 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1044 /* We can now generate a simple scaled indexed address. */
1047 (Pmode, gen_rtx_PLUS (Pmode,
1048 gen_rtx_MULT (Pmode, reg1,
1049 XEXP (XEXP (idx, 0), 1)),
1053 /* If B + C is still a valid base register, then add them. */
1054 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1055 && INTVAL (XEXP (idx, 1)) <= 4096
1056 && INTVAL (XEXP (idx, 1)) >= -4096)
1058 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1061 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1063 reg2 = XEXP (XEXP (idx, 0), 0);
1064 if (GET_CODE (reg2) != CONST_INT)
1065 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1067 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1068 gen_rtx_MULT (Pmode,
1074 /* Get the index into a register, then add the base + index and
1075 return a register holding the result. */
1077 /* First get A into a register. */
1078 reg1 = XEXP (XEXP (idx, 0), 0);
1079 if (GET_CODE (reg1) != REG)
1080 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1082 /* And get B into a register. */
1083 reg2 = XEXP (idx, 1);
1084 if (GET_CODE (reg2) != REG)
1085 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1087 reg1 = force_reg (Pmode,
1088 gen_rtx_PLUS (Pmode,
1089 gen_rtx_MULT (Pmode, reg1,
1090 XEXP (XEXP (idx, 0), 1)),
1093 /* Add the result to our base register and return. */
1094 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1098 /* Uh-oh. We might have an address for x[n-100000]. This needs
1099 special handling to avoid creating an indexed memory address
1100 with x-100000 as the base.
1102 If the constant part is small enough, then it's still safe because
1103 there is a guard page at the beginning and end of the data segment.
1105 Scaled references are common enough that we want to try and rearrange the
1106 terms so that we can use indexing for these addresses too. Only
1107 do the optimization for floatint point modes. */
1109 if (GET_CODE (x) == PLUS
1110 && symbolic_expression_p (XEXP (x, 1)))
1112 /* Ugly. We modify things here so that the address offset specified
1113 by the index expression is computed first, then added to x to form
1114 the entire address. */
1116 rtx regx1, regx2, regy1, regy2, y;
1118 /* Strip off any CONST. */
1120 if (GET_CODE (y) == CONST)
1123 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1125 /* See if this looks like
1126 (plus (mult (reg) (shadd_const))
1127 (const (plus (symbol_ref) (const_int))))
1129 Where const_int is small. In that case the const
1130 expression is a valid pointer for indexing.
1132 If const_int is big, but can be divided evenly by shadd_const
1133 and added to (reg). This allows more scaled indexed addresses. */
1134 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1135 && GET_CODE (XEXP (x, 0)) == MULT
1136 && GET_CODE (XEXP (y, 1)) == CONST_INT
1137 && INTVAL (XEXP (y, 1)) >= -4096
1138 && INTVAL (XEXP (y, 1)) <= 4095
1139 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1140 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1142 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1146 if (GET_CODE (reg1) != REG)
1147 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1149 reg2 = XEXP (XEXP (x, 0), 0);
1150 if (GET_CODE (reg2) != REG)
1151 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1153 return force_reg (Pmode,
1154 gen_rtx_PLUS (Pmode,
1155 gen_rtx_MULT (Pmode,
1160 else if ((mode == DFmode || mode == SFmode)
1161 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1162 && GET_CODE (XEXP (x, 0)) == MULT
1163 && GET_CODE (XEXP (y, 1)) == CONST_INT
1164 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1165 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1166 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1169 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1170 / INTVAL (XEXP (XEXP (x, 0), 1))));
1171 regx2 = XEXP (XEXP (x, 0), 0);
1172 if (GET_CODE (regx2) != REG)
1173 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1174 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1178 gen_rtx_PLUS (Pmode,
1179 gen_rtx_MULT (Pmode, regx2,
1180 XEXP (XEXP (x, 0), 1)),
1181 force_reg (Pmode, XEXP (y, 0))));
1183 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1184 && INTVAL (XEXP (y, 1)) >= -4096
1185 && INTVAL (XEXP (y, 1)) <= 4095)
1187 /* This is safe because of the guard page at the
1188 beginning and end of the data space. Just
1189 return the original address. */
1194 /* Doesn't look like one we can optimize. */
1195 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1196 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1197 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1198 regx1 = force_reg (Pmode,
1199 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1201 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1209 /* For the HPPA, REG and REG+CONST is cost 0
1210 and addresses involving symbolic constants are cost 2.
1212 PIC addresses are very expensive.
1214 It is no coincidence that this has the same structure
1215 as GO_IF_LEGITIMATE_ADDRESS. */
1217 hppa_address_cost (X)
1220 if (GET_CODE (X) == PLUS)
1222 else if (GET_CODE (X) == LO_SUM)
1224 else if (GET_CODE (X) == HIGH)
1229 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1230 new rtx with the correct mode. */
1232 force_mode (mode, orig)
1233 enum machine_mode mode;
1236 if (mode == GET_MODE (orig))
1239 if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
1242 return gen_rtx_REG (mode, REGNO (orig));
1245 /* Emit insns to move operands[1] into operands[0].
1247 Return 1 if we have written out everything that needs to be done to
1248 do the move. Otherwise, return 0 and the caller will emit the move
1251 Note SCRATCH_REG may not be in the proper mode depending on how it
1252 will be used. This routine is resposible for creating a new copy
1253 of SCRATCH_REG in the proper mode. */
1256 emit_move_sequence (operands, mode, scratch_reg)
1258 enum machine_mode mode;
1261 register rtx operand0 = operands[0];
1262 register rtx operand1 = operands[1];
1266 && reload_in_progress && GET_CODE (operand0) == REG
1267 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1268 operand0 = reg_equiv_mem[REGNO (operand0)];
1269 else if (scratch_reg
1270 && reload_in_progress && GET_CODE (operand0) == SUBREG
1271 && GET_CODE (SUBREG_REG (operand0)) == REG
1272 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1274 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1275 the code which tracks sets/uses for delete_output_reload. */
1276 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1277 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1278 SUBREG_BYTE (operand0));
1279 operand0 = alter_subreg (&temp);
1283 && reload_in_progress && GET_CODE (operand1) == REG
1284 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1285 operand1 = reg_equiv_mem[REGNO (operand1)];
1286 else if (scratch_reg
1287 && reload_in_progress && GET_CODE (operand1) == SUBREG
1288 && GET_CODE (SUBREG_REG (operand1)) == REG
1289 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1291 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1292 the code which tracks sets/uses for delete_output_reload. */
1293 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1294 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1295 SUBREG_BYTE (operand1));
1296 operand1 = alter_subreg (&temp);
1299 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1300 && ((tem = find_replacement (&XEXP (operand0, 0)))
1301 != XEXP (operand0, 0)))
1302 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1303 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1304 && ((tem = find_replacement (&XEXP (operand1, 0)))
1305 != XEXP (operand1, 0)))
1306 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1308 /* Handle secondary reloads for loads/stores of FP registers from
1309 REG+D addresses where D does not fit in 5 bits, including
1310 (subreg (mem (addr))) cases. */
1311 if (fp_reg_operand (operand0, mode)
1312 && ((GET_CODE (operand1) == MEM
1313 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1314 || ((GET_CODE (operand1) == SUBREG
1315 && GET_CODE (XEXP (operand1, 0)) == MEM
1316 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1319 if (GET_CODE (operand1) == SUBREG)
1320 operand1 = XEXP (operand1, 0);
1322 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1323 it in WORD_MODE regardless of what mode it was originally given
1325 scratch_reg = force_mode (word_mode, scratch_reg);
1327 /* D might not fit in 14 bits either; for such cases load D into
1329 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1331 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1332 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1334 XEXP (XEXP (operand1, 0), 0),
1338 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1339 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1340 gen_rtx_MEM (mode, scratch_reg)));
1343 else if (fp_reg_operand (operand1, mode)
1344 && ((GET_CODE (operand0) == MEM
1345 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1346 || ((GET_CODE (operand0) == SUBREG)
1347 && GET_CODE (XEXP (operand0, 0)) == MEM
1348 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1351 if (GET_CODE (operand0) == SUBREG)
1352 operand0 = XEXP (operand0, 0);
1354 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1355 it in WORD_MODE regardless of what mode it was originally given
1357 scratch_reg = force_mode (word_mode, scratch_reg);
1359 /* D might not fit in 14 bits either; for such cases load D into
1361 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1363 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1364 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1367 XEXP (XEXP (operand0, 0),
1372 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1373 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1377 /* Handle secondary reloads for loads of FP registers from constant
1378 expressions by forcing the constant into memory.
1380 use scratch_reg to hold the address of the memory location.
1382 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1383 NO_REGS when presented with a const_int and an register class
1384 containing only FP registers. Doing so unfortunately creates
1385 more problems than it solves. Fix this for 2.5. */
1386 else if (fp_reg_operand (operand0, mode)
1387 && CONSTANT_P (operand1)
1392 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1393 it in WORD_MODE regardless of what mode it was originally given
1395 scratch_reg = force_mode (word_mode, scratch_reg);
1397 /* Force the constant into memory and put the address of the
1398 memory location into scratch_reg. */
1399 xoperands[0] = scratch_reg;
1400 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1401 emit_move_sequence (xoperands, Pmode, 0);
1403 /* Now load the destination register. */
1404 emit_insn (gen_rtx_SET (mode, operand0,
1405 gen_rtx_MEM (mode, scratch_reg)));
1408 /* Handle secondary reloads for SAR. These occur when trying to load
1409 the SAR from memory, FP register, or with a constant. */
1410 else if (GET_CODE (operand0) == REG
1411 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1412 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1413 && (GET_CODE (operand1) == MEM
1414 || GET_CODE (operand1) == CONST_INT
1415 || (GET_CODE (operand1) == REG
1416 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1419 /* D might not fit in 14 bits either; for such cases load D into
1421 if (GET_CODE (operand1) == MEM
1422 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1424 /* We are reloading the address into the scratch register, so we
1425 want to make sure the scratch register is a full register. */
1426 scratch_reg = force_mode (word_mode, scratch_reg);
1428 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1429 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1432 XEXP (XEXP (operand1, 0),
1436 /* Now we are going to load the scratch register from memory,
1437 we want to load it in the same width as the original MEM,
1438 which must be the same as the width of the ultimate destination,
1440 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1442 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1447 /* We want to load the scratch register using the same mode as
1448 the ultimate destination. */
1449 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1451 emit_move_insn (scratch_reg, operand1);
1454 /* And emit the insn to set the ultimate destination. We know that
1455 the scratch register has the same mode as the destination at this
1457 emit_move_insn (operand0, scratch_reg);
1460 /* Handle most common case: storing into a register. */
1461 else if (register_operand (operand0, mode))
1463 if (register_operand (operand1, mode)
1464 || (GET_CODE (operand1) == CONST_INT
1465 && cint_ok_for_move (INTVAL (operand1)))
1466 || (operand1 == CONST0_RTX (mode))
1467 || (GET_CODE (operand1) == HIGH
1468 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1469 /* Only `general_operands' can come here, so MEM is ok. */
1470 || GET_CODE (operand1) == MEM)
1472 /* Run this case quickly. */
1473 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1477 else if (GET_CODE (operand0) == MEM)
1479 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1480 && !(reload_in_progress || reload_completed))
1482 rtx temp = gen_reg_rtx (DFmode);
1484 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1485 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1488 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1490 /* Run this case quickly. */
1491 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1494 if (! (reload_in_progress || reload_completed))
1496 operands[0] = validize_mem (operand0);
1497 operands[1] = operand1 = force_reg (mode, operand1);
1501 /* Simplify the source if we need to.
1502 Note we do have to handle function labels here, even though we do
1503 not consider them legitimate constants. Loop optimizations can
1504 call the emit_move_xxx with one as a source. */
1505 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1506 || function_label_operand (operand1, mode)
1507 || (GET_CODE (operand1) == HIGH
1508 && symbolic_operand (XEXP (operand1, 0), mode)))
1512 if (GET_CODE (operand1) == HIGH)
1515 operand1 = XEXP (operand1, 0);
1517 if (symbolic_operand (operand1, mode))
1519 /* Argh. The assembler and linker can't handle arithmetic
1522 So we force the plabel into memory, load operand0 from
1523 the memory location, then add in the constant part. */
1524 if ((GET_CODE (operand1) == CONST
1525 && GET_CODE (XEXP (operand1, 0)) == PLUS
1526 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1527 || function_label_operand (operand1, mode))
1529 rtx temp, const_part;
1531 /* Figure out what (if any) scratch register to use. */
1532 if (reload_in_progress || reload_completed)
1534 scratch_reg = scratch_reg ? scratch_reg : operand0;
1535 /* SCRATCH_REG will hold an address and maybe the actual
1536 data. We want it in WORD_MODE regardless of what mode it
1537 was originally given to us. */
1538 scratch_reg = force_mode (word_mode, scratch_reg);
1541 scratch_reg = gen_reg_rtx (Pmode);
1543 if (GET_CODE (operand1) == CONST)
1545 /* Save away the constant part of the expression. */
1546 const_part = XEXP (XEXP (operand1, 0), 1);
1547 if (GET_CODE (const_part) != CONST_INT)
1550 /* Force the function label into memory. */
1551 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1555 /* No constant part. */
1556 const_part = NULL_RTX;
1558 /* Force the function label into memory. */
1559 temp = force_const_mem (mode, operand1);
1563 /* Get the address of the memory location. PIC-ify it if
1565 temp = XEXP (temp, 0);
1567 temp = legitimize_pic_address (temp, mode, scratch_reg);
1569 /* Put the address of the memory location into our destination
1572 emit_move_sequence (operands, mode, scratch_reg);
1574 /* Now load from the memory location into our destination
1576 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1577 emit_move_sequence (operands, mode, scratch_reg);
1579 /* And add back in the constant part. */
1580 if (const_part != NULL_RTX)
1581 expand_inc (operand0, const_part);
1590 if (reload_in_progress || reload_completed)
1592 temp = scratch_reg ? scratch_reg : operand0;
1593 /* TEMP will hold an address and maybe the actual
1594 data. We want it in WORD_MODE regardless of what mode it
1595 was originally given to us. */
1596 temp = force_mode (word_mode, temp);
1599 temp = gen_reg_rtx (Pmode);
1601 /* (const (plus (symbol) (const_int))) must be forced to
1602 memory during/after reload if the const_int will not fit
1604 if (GET_CODE (operand1) == CONST
1605 && GET_CODE (XEXP (operand1, 0)) == PLUS
1606 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1607 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1608 && (reload_completed || reload_in_progress)
1611 operands[1] = force_const_mem (mode, operand1);
1612 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1614 emit_move_sequence (operands, mode, temp);
1618 operands[1] = legitimize_pic_address (operand1, mode, temp);
1619 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1622 /* On the HPPA, references to data space are supposed to use dp,
1623 register 27, but showing it in the RTL inhibits various cse
1624 and loop optimizations. */
1629 if (reload_in_progress || reload_completed)
1631 temp = scratch_reg ? scratch_reg : operand0;
1632 /* TEMP will hold an address and maybe the actual
1633 data. We want it in WORD_MODE regardless of what mode it
1634 was originally given to us. */
1635 temp = force_mode (word_mode, temp);
1638 temp = gen_reg_rtx (mode);
1640 /* Loading a SYMBOL_REF into a register makes that register
1641 safe to be used as the base in an indexed address.
1643 Don't mark hard registers though. That loses. */
1644 if (GET_CODE (operand0) == REG
1645 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1646 REG_POINTER (operand0) = 1;
1647 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1648 REG_POINTER (temp) = 1;
1650 set = gen_rtx_SET (mode, operand0, temp);
1652 set = gen_rtx_SET (VOIDmode,
1654 gen_rtx_LO_SUM (mode, temp, operand1));
1656 emit_insn (gen_rtx_SET (VOIDmode,
1658 gen_rtx_HIGH (mode, operand1)));
1664 else if (GET_CODE (operand1) != CONST_INT
1665 || ! cint_ok_for_move (INTVAL (operand1)))
1667 rtx extend = NULL_RTX;
1670 if (TARGET_64BIT && GET_CODE (operand1) == CONST_INT
1671 && HOST_BITS_PER_WIDE_INT > 32
1672 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1674 HOST_WIDE_INT val = INTVAL (operand1);
1677 /* Extract the low order 32 bits of the value and sign extend.
1678 If the new value is the same as the original value, we can
1679 can use the original value as-is. If the new value is
1680 different, we use it and insert the most-significant 32-bits
1681 of the original value into the final result. */
1682 nval = ((val & (((HOST_WIDE_INT) 2 << 31) - 1))
1683 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1686 #if HOST_BITS_PER_WIDE_INT > 32
1687 extend = GEN_INT (val >> 32);
1689 operand1 = GEN_INT (nval);
1693 if (reload_in_progress || reload_completed)
1696 temp = gen_reg_rtx (mode);
1698 if (GET_CODE (operand1) == CONST_INT)
1700 /* Directly break constant into low and high parts. This
1701 provides better optimization opportunities because various
1702 passes recognize constants split with PLUS but not LO_SUM.
1703 We use a 14-bit signed low part except when the addition
1704 of 0x4000 to the high part might change the sign of the
1706 HOST_WIDE_INT value = INTVAL (operand1);
1707 HOST_WIDE_INT low = value & 0x3fff;
1708 HOST_WIDE_INT high = value & ~ 0x3fff;
1712 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1720 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1721 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1725 emit_insn (gen_rtx_SET (VOIDmode, temp,
1726 gen_rtx_HIGH (mode, operand1)));
1727 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1730 emit_move_insn (operands[0], operands[1]);
1732 if (extend != NULL_RTX)
1733 emit_insn (gen_insv (operands[0], GEN_INT (32), const0_rtx,
1739 /* Now have insn-emit do whatever it normally does. */
1743 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1744 it will need a link/runtime reloc). */
1752 switch (TREE_CODE (exp))
1759 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1760 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1765 case NON_LVALUE_EXPR:
1766 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1772 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1773 if (TREE_VALUE (link) != 0)
1774 reloc |= reloc_needed (TREE_VALUE (link));
1787 /* Does operand (which is a symbolic_operand) live in text space? If
1788 so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
1791 read_only_operand (operand, mode)
1793 enum machine_mode mode ATTRIBUTE_UNUSED;
1795 if (GET_CODE (operand) == CONST)
1796 operand = XEXP (XEXP (operand, 0), 0);
1799 if (GET_CODE (operand) == SYMBOL_REF)
1800 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1804 if (GET_CODE (operand) == SYMBOL_REF)
1805 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1811 /* Return the best assembler insn template
1812 for moving operands[1] into operands[0] as a fullword. */
1814 singlemove_string (operands)
1817 HOST_WIDE_INT intval;
1819 if (GET_CODE (operands[0]) == MEM)
1820 return "stw %r1,%0";
1821 if (GET_CODE (operands[1]) == MEM)
1823 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1828 if (GET_MODE (operands[1]) != SFmode)
1831 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1833 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1834 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1836 operands[1] = GEN_INT (i);
1837 /* Fall through to CONST_INT case. */
1839 if (GET_CODE (operands[1]) == CONST_INT)
1841 intval = INTVAL (operands[1]);
1843 if (VAL_14_BITS_P (intval))
1845 else if ((intval & 0x7ff) == 0)
1846 return "ldil L'%1,%0";
1847 else if (zdepi_cint_p (intval))
1848 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
1850 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1852 return "copy %1,%0";
1856 /* Compute position (in OP[1]) and width (in OP[2])
1857 useful for copying IMM to a register using the zdepi
1858 instructions. Store the immediate value to insert in OP[0]. */
1860 compute_zdepwi_operands (imm, op)
1861 unsigned HOST_WIDE_INT imm;
1866 /* Find the least significant set bit in IMM. */
1867 for (lsb = 0; lsb < 32; lsb++)
1874 /* Choose variants based on *sign* of the 5-bit field. */
1875 if ((imm & 0x10) == 0)
1876 len = (lsb <= 28) ? 4 : 32 - lsb;
1879 /* Find the width of the bitstring in IMM. */
1880 for (len = 5; len < 32; len++)
1882 if ((imm & (1 << len)) == 0)
1886 /* Sign extend IMM as a 5-bit value. */
1887 imm = (imm & 0xf) - 0x10;
1895 /* Compute position (in OP[1]) and width (in OP[2])
1896 useful for copying IMM to a register using the depdi,z
1897 instructions. Store the immediate value to insert in OP[0]. */
1899 compute_zdepdi_operands (imm, op)
1900 unsigned HOST_WIDE_INT imm;
1903 HOST_WIDE_INT lsb, len;
1905 /* Find the least significant set bit in IMM. */
1906 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
1913 /* Choose variants based on *sign* of the 5-bit field. */
1914 if ((imm & 0x10) == 0)
1915 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
1916 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
1919 /* Find the width of the bitstring in IMM. */
1920 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
1922 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
1926 /* Sign extend IMM as a 5-bit value. */
1927 imm = (imm & 0xf) - 0x10;
1935 /* Output assembler code to perform a doubleword move insn
1936 with operands OPERANDS. */
1939 output_move_double (operands)
1942 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
1944 rtx addreg0 = 0, addreg1 = 0;
1946 /* First classify both operands. */
1948 if (REG_P (operands[0]))
1950 else if (offsettable_memref_p (operands[0]))
1952 else if (GET_CODE (operands[0]) == MEM)
1957 if (REG_P (operands[1]))
1959 else if (CONSTANT_P (operands[1]))
1961 else if (offsettable_memref_p (operands[1]))
1963 else if (GET_CODE (operands[1]) == MEM)
1968 /* Check for the cases that the operand constraints are not
1969 supposed to allow to happen. Abort if we get one,
1970 because generating code for these cases is painful. */
1972 if (optype0 != REGOP && optype1 != REGOP)
1975 /* Handle auto decrementing and incrementing loads and stores
1976 specifically, since the structure of the function doesn't work
1977 for them without major modification. Do it better when we learn
1978 this port about the general inc/dec addressing of PA.
1979 (This was written by tege. Chide him if it doesn't work.) */
1981 if (optype0 == MEMOP)
1983 /* We have to output the address syntax ourselves, since print_operand
1984 doesn't deal with the addresses we want to use. Fix this later. */
1986 rtx addr = XEXP (operands[0], 0);
1987 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1989 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
1991 operands[0] = XEXP (addr, 0);
1992 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1995 if (!reg_overlap_mentioned_p (high_reg, addr))
1997 /* No overlap between high target register and address
1998 register. (We do this in a non-obvious way to
1999 save a register file writeback) */
2000 if (GET_CODE (addr) == POST_INC)
2001 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2002 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2007 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2009 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2011 operands[0] = XEXP (addr, 0);
2012 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
2015 if (!reg_overlap_mentioned_p (high_reg, addr))
2017 /* No overlap between high target register and address
2018 register. (We do this in a non-obvious way to
2019 save a register file writeback) */
2020 if (GET_CODE (addr) == PRE_INC)
2021 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2022 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2028 if (optype1 == MEMOP)
2030 /* We have to output the address syntax ourselves, since print_operand
2031 doesn't deal with the addresses we want to use. Fix this later. */
2033 rtx addr = XEXP (operands[1], 0);
2034 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2036 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2038 operands[1] = XEXP (addr, 0);
2039 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2042 if (!reg_overlap_mentioned_p (high_reg, addr))
2044 /* No overlap between high target register and address
2045 register. (We do this in a non-obvious way to
2046 save a register file writeback) */
2047 if (GET_CODE (addr) == POST_INC)
2048 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2049 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2053 /* This is an undefined situation. We should load into the
2054 address register *and* update that register. Probably
2055 we don't need to handle this at all. */
2056 if (GET_CODE (addr) == POST_INC)
2057 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2058 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2061 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2063 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2065 operands[1] = XEXP (addr, 0);
2066 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
2069 if (!reg_overlap_mentioned_p (high_reg, addr))
2071 /* No overlap between high target register and address
2072 register. (We do this in a non-obvious way to
2073 save a register file writeback) */
2074 if (GET_CODE (addr) == PRE_INC)
2075 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2076 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2080 /* This is an undefined situation. We should load into the
2081 address register *and* update that register. Probably
2082 we don't need to handle this at all. */
2083 if (GET_CODE (addr) == PRE_INC)
2084 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2085 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2088 else if (GET_CODE (addr) == PLUS
2089 && GET_CODE (XEXP (addr, 0)) == MULT)
2091 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2093 if (!reg_overlap_mentioned_p (high_reg, addr))
2097 xoperands[0] = high_reg;
2098 xoperands[1] = XEXP (addr, 1);
2099 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2100 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2101 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2103 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2109 xoperands[0] = high_reg;
2110 xoperands[1] = XEXP (addr, 1);
2111 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2112 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2113 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2115 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2120 /* If an operand is an unoffsettable memory ref, find a register
2121 we can increment temporarily to make it refer to the second word. */
2123 if (optype0 == MEMOP)
2124 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2126 if (optype1 == MEMOP)
2127 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2129 /* Ok, we can do one word at a time.
2130 Normally we do the low-numbered word first.
2132 In either case, set up in LATEHALF the operands to use
2133 for the high-numbered word and in some cases alter the
2134 operands in OPERANDS to be suitable for the low-numbered word. */
2136 if (optype0 == REGOP)
2137 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2138 else if (optype0 == OFFSOP)
2139 latehalf[0] = adjust_address (operands[0], SImode, 4);
2141 latehalf[0] = operands[0];
2143 if (optype1 == REGOP)
2144 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2145 else if (optype1 == OFFSOP)
2146 latehalf[1] = adjust_address (operands[1], SImode, 4);
2147 else if (optype1 == CNSTOP)
2148 split_double (operands[1], &operands[1], &latehalf[1]);
2150 latehalf[1] = operands[1];
2152 /* If the first move would clobber the source of the second one,
2153 do them in the other order.
2155 This can happen in two cases:
2157 mem -> register where the first half of the destination register
2158 is the same register used in the memory's address. Reload
2159 can create such insns.
2161 mem in this case will be either register indirect or register
2162 indirect plus a valid offset.
2164 register -> register move where REGNO(dst) == REGNO(src + 1)
2165 someone (Tim/Tege?) claimed this can happen for parameter loads.
2167 Handle mem -> register case first. */
2168 if (optype0 == REGOP
2169 && (optype1 == MEMOP || optype1 == OFFSOP)
2170 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2173 /* Do the late half first. */
2175 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2176 output_asm_insn (singlemove_string (latehalf), latehalf);
2180 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2181 return singlemove_string (operands);
2184 /* Now handle register -> register case. */
2185 if (optype0 == REGOP && optype1 == REGOP
2186 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2188 output_asm_insn (singlemove_string (latehalf), latehalf);
2189 return singlemove_string (operands);
2192 /* Normal case: do the two words, low-numbered first. */
2194 output_asm_insn (singlemove_string (operands), operands);
2196 /* Make any unoffsettable addresses point at high-numbered word. */
2198 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2200 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2203 output_asm_insn (singlemove_string (latehalf), latehalf);
2205 /* Undo the adds we just did. */
2207 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2209 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2215 output_fp_move_double (operands)
2218 if (FP_REG_P (operands[0]))
2220 if (FP_REG_P (operands[1])
2221 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2222 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2224 output_asm_insn ("fldd%F1 %1,%0", operands);
2226 else if (FP_REG_P (operands[1]))
2228 output_asm_insn ("fstd%F0 %1,%0", operands);
2230 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2232 if (GET_CODE (operands[0]) == REG)
2235 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2236 xoperands[0] = operands[0];
2237 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2239 /* This is a pain. You have to be prepared to deal with an
2240 arbitrary address here including pre/post increment/decrement.
2242 so avoid this in the MD. */
2250 /* Return a REG that occurs in ADDR with coefficient 1.
2251 ADDR can be effectively incremented by incrementing REG. */
2254 find_addr_reg (addr)
2257 while (GET_CODE (addr) == PLUS)
2259 if (GET_CODE (XEXP (addr, 0)) == REG)
2260 addr = XEXP (addr, 0);
2261 else if (GET_CODE (XEXP (addr, 1)) == REG)
2262 addr = XEXP (addr, 1);
2263 else if (CONSTANT_P (XEXP (addr, 0)))
2264 addr = XEXP (addr, 1);
2265 else if (CONSTANT_P (XEXP (addr, 1)))
2266 addr = XEXP (addr, 0);
2270 if (GET_CODE (addr) == REG)
2275 /* Emit code to perform a block move.
2277 OPERANDS[0] is the destination pointer as a REG, clobbered.
2278 OPERANDS[1] is the source pointer as a REG, clobbered.
2279 OPERANDS[2] is a register for temporary storage.
2280 OPERANDS[4] is the size as a CONST_INT
2281 OPERANDS[3] is a register for temporary storage.
2282 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2283 OPERANDS[6] is another temporary register. */
2286 output_block_move (operands, size_is_constant)
2288 int size_is_constant ATTRIBUTE_UNUSED;
2290 int align = INTVAL (operands[5]);
2291 unsigned long n_bytes = INTVAL (operands[4]);
2293 /* We can't move more than four bytes at a time because the PA
2294 has no longer integer move insns. (Could use fp mem ops?) */
2298 /* Note that we know each loop below will execute at least twice
2299 (else we would have open-coded the copy). */
2303 /* Pre-adjust the loop counter. */
2304 operands[4] = GEN_INT (n_bytes - 8);
2305 output_asm_insn ("ldi %4,%2", operands);
2308 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2309 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2310 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2311 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2312 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2314 /* Handle the residual. There could be up to 7 bytes of
2315 residual to copy! */
2316 if (n_bytes % 8 != 0)
2318 operands[4] = GEN_INT (n_bytes % 4);
2319 if (n_bytes % 8 >= 4)
2320 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2321 if (n_bytes % 4 != 0)
2322 output_asm_insn ("ldw 0(%1),%6", operands);
2323 if (n_bytes % 8 >= 4)
2324 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2325 if (n_bytes % 4 != 0)
2326 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2331 /* Pre-adjust the loop counter. */
2332 operands[4] = GEN_INT (n_bytes - 4);
2333 output_asm_insn ("ldi %4,%2", operands);
2336 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2337 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2338 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2339 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2340 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2342 /* Handle the residual. */
2343 if (n_bytes % 4 != 0)
2345 if (n_bytes % 4 >= 2)
2346 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2347 if (n_bytes % 2 != 0)
2348 output_asm_insn ("ldb 0(%1),%6", operands);
2349 if (n_bytes % 4 >= 2)
2350 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2351 if (n_bytes % 2 != 0)
2352 output_asm_insn ("stb %6,0(%0)", operands);
2357 /* Pre-adjust the loop counter. */
2358 operands[4] = GEN_INT (n_bytes - 2);
2359 output_asm_insn ("ldi %4,%2", operands);
2362 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2363 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2364 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2365 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2366 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2368 /* Handle the residual. */
2369 if (n_bytes % 2 != 0)
2371 output_asm_insn ("ldb 0(%1),%3", operands);
2372 output_asm_insn ("stb %3,0(%0)", operands);
2381 /* Count the number of insns necessary to handle this block move.
2383 Basic structure is the same as emit_block_move, except that we
2384 count insns rather than emit them. */
2387 compute_movstrsi_length (insn)
2390 rtx pat = PATTERN (insn);
2391 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2392 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2393 unsigned int n_insns = 0;
2395 /* We can't move more than four bytes at a time because the PA
2396 has no longer integer move insns. (Could use fp mem ops?) */
2400 /* The basic copying loop. */
2404 if (n_bytes % (2 * align) != 0)
2406 if ((n_bytes % (2 * align)) >= align)
2409 if ((n_bytes % align) != 0)
2413 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2419 output_and (operands)
2422 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2424 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2425 int ls0, ls1, ms0, p, len;
2427 for (ls0 = 0; ls0 < 32; ls0++)
2428 if ((mask & (1 << ls0)) == 0)
2431 for (ls1 = ls0; ls1 < 32; ls1++)
2432 if ((mask & (1 << ls1)) != 0)
2435 for (ms0 = ls1; ms0 < 32; ms0++)
2436 if ((mask & (1 << ms0)) == 0)
2449 operands[2] = GEN_INT (len);
2450 return "{extru|extrw,u} %1,31,%2,%0";
2454 /* We could use this `depi' for the case above as well, but `depi'
2455 requires one more register file access than an `extru'. */
2460 operands[2] = GEN_INT (p);
2461 operands[3] = GEN_INT (len);
2462 return "{depi|depwi} 0,%2,%3,%0";
2466 return "and %1,%2,%0";
2469 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2470 storing the result in operands[0]. */
2472 output_64bit_and (operands)
2475 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2477 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2478 int ls0, ls1, ms0, p, len;
2480 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2481 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2484 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2485 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2488 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2489 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2492 if (ms0 != HOST_BITS_PER_WIDE_INT)
2495 if (ls1 == HOST_BITS_PER_WIDE_INT)
2502 operands[2] = GEN_INT (len);
2503 return "extrd,u %1,63,%2,%0";
2507 /* We could use this `depi' for the case above as well, but `depi'
2508 requires one more register file access than an `extru'. */
2513 operands[2] = GEN_INT (p);
2514 operands[3] = GEN_INT (len);
2515 return "depdi 0,%2,%3,%0";
2519 return "and %1,%2,%0";
2523 output_ior (operands)
2526 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2527 int bs0, bs1, p, len;
2529 if (INTVAL (operands[2]) == 0)
2530 return "copy %1,%0";
2532 for (bs0 = 0; bs0 < 32; bs0++)
2533 if ((mask & (1 << bs0)) != 0)
2536 for (bs1 = bs0; bs1 < 32; bs1++)
2537 if ((mask & (1 << bs1)) == 0)
2540 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2546 operands[2] = GEN_INT (p);
2547 operands[3] = GEN_INT (len);
2548 return "{depi|depwi} -1,%2,%3,%0";
2551 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2552 storing the result in operands[0]. */
2554 output_64bit_ior (operands)
2557 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2558 int bs0, bs1, p, len;
2560 if (INTVAL (operands[2]) == 0)
2561 return "copy %1,%0";
2563 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2564 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2567 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2568 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2571 if (bs1 != HOST_BITS_PER_WIDE_INT
2572 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2578 operands[2] = GEN_INT (p);
2579 operands[3] = GEN_INT (len);
2580 return "depdi -1,%2,%3,%0";
2583 /* Target hook for assembling integer objects. This code handles
2584 aligned SI and DI integers specially, since function references must
2585 be preceded by P%. */
2588 pa_assemble_integer (x, size, aligned_p)
2593 if (size == UNITS_PER_WORD && aligned_p
2594 && function_label_operand (x, VOIDmode))
2596 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
2597 output_addr_const (asm_out_file, x);
2598 fputc ('\n', asm_out_file);
2601 return default_assemble_integer (x, size, aligned_p);
2604 /* Output an ascii string. */
2606 output_ascii (file, p, size)
2613 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2615 /* The HP assembler can only take strings of 256 characters at one
2616 time. This is a limitation on input line length, *not* the
2617 length of the string. Sigh. Even worse, it seems that the
2618 restriction is in number of input characters (see \xnn &
2619 \whatever). So we have to do this very carefully. */
2621 fputs ("\t.STRING \"", file);
2624 for (i = 0; i < size; i += 4)
2628 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2630 register unsigned int c = (unsigned char) p[i + io];
2632 if (c == '\"' || c == '\\')
2633 partial_output[co++] = '\\';
2634 if (c >= ' ' && c < 0177)
2635 partial_output[co++] = c;
2639 partial_output[co++] = '\\';
2640 partial_output[co++] = 'x';
2641 hexd = c / 16 - 0 + '0';
2643 hexd -= '9' - 'a' + 1;
2644 partial_output[co++] = hexd;
2645 hexd = c % 16 - 0 + '0';
2647 hexd -= '9' - 'a' + 1;
2648 partial_output[co++] = hexd;
2651 if (chars_output + co > 243)
2653 fputs ("\"\n\t.STRING \"", file);
2656 fwrite (partial_output, 1, (size_t) co, file);
2660 fputs ("\"\n", file);
2663 /* Try to rewrite floating point comparisons & branches to avoid
2664 useless add,tr insns.
2666 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2667 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2668 first attempt to remove useless add,tr insns. It is zero
2669 for the second pass as reorg sometimes leaves bogus REG_DEAD
2672 When CHECK_NOTES is zero we can only eliminate add,tr insns
2673 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2676 remove_useless_addtr_insns (insns, check_notes)
2681 static int pass = 0;
2683 /* This is fairly cheap, so always run it when optimizing. */
2687 int fbranch_count = 0;
2689 /* Walk all the insns in this function looking for fcmp & fbranch
2690 instructions. Keep track of how many of each we find. */
2691 insns = get_insns ();
2692 for (insn = insns; insn; insn = next_insn (insn))
2696 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2697 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2700 tmp = PATTERN (insn);
2702 /* It must be a set. */
2703 if (GET_CODE (tmp) != SET)
2706 /* If the destination is CCFP, then we've found an fcmp insn. */
2707 tmp = SET_DEST (tmp);
2708 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2714 tmp = PATTERN (insn);
2715 /* If this is an fbranch instruction, bump the fbranch counter. */
2716 if (GET_CODE (tmp) == SET
2717 && SET_DEST (tmp) == pc_rtx
2718 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2719 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2720 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2721 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2729 /* Find all floating point compare + branch insns. If possible,
2730 reverse the comparison & the branch to avoid add,tr insns. */
2731 for (insn = insns; insn; insn = next_insn (insn))
2735 /* Ignore anything that isn't an INSN. */
2736 if (GET_CODE (insn) != INSN)
2739 tmp = PATTERN (insn);
2741 /* It must be a set. */
2742 if (GET_CODE (tmp) != SET)
2745 /* The destination must be CCFP, which is register zero. */
2746 tmp = SET_DEST (tmp);
2747 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2750 /* INSN should be a set of CCFP.
2752 See if the result of this insn is used in a reversed FP
2753 conditional branch. If so, reverse our condition and
2754 the branch. Doing so avoids useless add,tr insns. */
2755 next = next_insn (insn);
2758 /* Jumps, calls and labels stop our search. */
2759 if (GET_CODE (next) == JUMP_INSN
2760 || GET_CODE (next) == CALL_INSN
2761 || GET_CODE (next) == CODE_LABEL)
2764 /* As does another fcmp insn. */
2765 if (GET_CODE (next) == INSN
2766 && GET_CODE (PATTERN (next)) == SET
2767 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2768 && REGNO (SET_DEST (PATTERN (next))) == 0)
2771 next = next_insn (next);
2774 /* Is NEXT_INSN a branch? */
2776 && GET_CODE (next) == JUMP_INSN)
2778 rtx pattern = PATTERN (next);
2780 /* If it a reversed fp conditional branch (eg uses add,tr)
2781 and CCFP dies, then reverse our conditional and the branch
2782 to avoid the add,tr. */
2783 if (GET_CODE (pattern) == SET
2784 && SET_DEST (pattern) == pc_rtx
2785 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2786 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2787 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2788 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2789 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2790 && (fcmp_count == fbranch_count
2792 && find_regno_note (next, REG_DEAD, 0))))
2794 /* Reverse the branch. */
2795 tmp = XEXP (SET_SRC (pattern), 1);
2796 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2797 XEXP (SET_SRC (pattern), 2) = tmp;
2798 INSN_CODE (next) = -1;
2800 /* Reverse our condition. */
2801 tmp = PATTERN (insn);
2802 PUT_CODE (XEXP (tmp, 1),
2803 (reverse_condition_maybe_unordered
2804 (GET_CODE (XEXP (tmp, 1)))));
2814 /* You may have trouble believing this, but this is the 32 bit HP-PA
2819 Variable arguments (optional; any number may be allocated)
2821 SP-(4*(N+9)) arg word N
2826 Fixed arguments (must be allocated; may remain unused)
2835 SP-32 External Data Pointer (DP)
2837 SP-24 External/stub RP (RP')
2841 SP-8 Calling Stub RP (RP'')
2846 SP-0 Stack Pointer (points to next available address)
2850 /* This function saves registers as follows. Registers marked with ' are
2851 this function's registers (as opposed to the previous function's).
2852 If a frame_pointer isn't needed, r4 is saved as a general register;
2853 the space for the frame pointer is still allocated, though, to keep
2859 SP (FP') Previous FP
2860 SP + 4 Alignment filler (sigh)
2861 SP + 8 Space for locals reserved here.
2865 SP + n All call saved register used.
2869 SP + o All call saved fp registers used.
2873 SP + p (SP') points to next available address.
2877 /* Global variables set by output_function_prologue(). */
2878 /* Size of frame. Need to know this to emit return insns from
2880 static int actual_fsize;
2881 static int local_fsize, save_fregs;
2883 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2884 Handle case where DISP > 8k by using the add_high_const patterns.
2886 Note in DISP > 8k case, we will leave the high part of the address
2887 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2890 store_reg (reg, disp, base)
2891 int reg, disp, base;
2893 rtx insn, dest, src, basereg;
2895 src = gen_rtx_REG (word_mode, reg);
2896 basereg = gen_rtx_REG (Pmode, base);
2897 if (VAL_14_BITS_P (disp))
2899 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
2900 insn = emit_move_insn (dest, src);
2904 rtx delta = GEN_INT (disp);
2905 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
2906 rtx tmpreg = gen_rtx_REG (Pmode, 1);
2907 emit_move_insn (tmpreg, high);
2908 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
2909 insn = emit_move_insn (dest, src);
2913 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2914 gen_rtx_SET (VOIDmode,
2915 gen_rtx_MEM (word_mode,
2916 gen_rtx_PLUS (word_mode, basereg,
2924 RTX_FRAME_RELATED_P (insn) = 1;
2927 /* Emit RTL to store REG at the memory location specified by BASE and then
2928 add MOD to BASE. MOD must be <= 8k. */
2931 store_reg_modify (base, reg, mod)
2934 rtx insn, basereg, srcreg, delta;
2936 if (! VAL_14_BITS_P (mod))
2939 basereg = gen_rtx_REG (Pmode, base);
2940 srcreg = gen_rtx_REG (word_mode, reg);
2941 delta = GEN_INT (mod);
2943 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
2946 RTX_FRAME_RELATED_P (insn) = 1;
2948 /* RTX_FRAME_RELATED_P must be set on each frame related set
2949 in a parallel with more than one element. Don't set
2950 RTX_FRAME_RELATED_P in the first set if reg is temporary
2951 register 1. The effect of this operation is recorded in
2952 the initial copy. */
2955 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
2956 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2960 /* The first element of a PARALLEL is always processed if it is
2961 a SET. Thus, we need an expression list for this case. */
2963 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2964 gen_rtx_SET (VOIDmode, basereg,
2965 gen_rtx_PLUS (word_mode, basereg, delta)),
2971 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
2972 where DISP > 8k by using the add_high_const patterns. NOTE indicates
2973 whether to add a frame note or not.
2975 In the DISP > 8k case, we leave the high part of the address in %r1.
2976 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
2979 set_reg_plus_d (reg, base, disp, note)
2980 int reg, base, disp, note;
2984 if (VAL_14_BITS_P (disp))
2986 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
2987 plus_constant (gen_rtx_REG (Pmode, base), disp));
2991 rtx basereg = gen_rtx_REG (Pmode, base);
2992 rtx delta = GEN_INT (disp);
2994 emit_move_insn (gen_rtx_REG (Pmode, 1),
2995 gen_rtx_PLUS (Pmode, basereg,
2996 gen_rtx_HIGH (Pmode, delta)));
2997 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
2998 gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 1),
3002 if (DO_FRAME_NOTES && note)
3003 RTX_FRAME_RELATED_P (insn) = 1;
3007 compute_frame_size (size, fregs_live)
3013 /* Space for frame pointer + filler. If any frame is allocated
3014 we need to add this in because of STARTING_FRAME_OFFSET.
3016 Similar code also appears in hppa_expand_prologue. Change both
3017 of them at the same time. */
3018 fsize = size + (size || frame_pointer_needed ? STARTING_FRAME_OFFSET : 0);
3020 /* If the current function calls __builtin_eh_return, then we need
3021 to allocate stack space for registers that will hold data for
3022 the exception handler. */
3023 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3027 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3029 fsize += i * UNITS_PER_WORD;
3032 /* Account for space used by the callee general register saves. */
3033 for (i = 18; i >= 3; i--)
3034 if (regs_ever_live[i])
3035 fsize += UNITS_PER_WORD;
3037 /* Round the stack. */
3038 fsize = (fsize + 7) & ~7;
3040 /* Account for space used by the callee floating point register saves. */
3041 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3042 if (regs_ever_live[i]
3043 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3048 /* We always save both halves of the FP register, so always
3049 increment the frame size by 8 bytes. */
3053 /* The various ABIs include space for the outgoing parameters in the
3054 size of the current function's stack frame. */
3055 fsize += current_function_outgoing_args_size;
3057 /* Allocate space for the fixed frame marker. This space must be
3058 allocated for any function that makes calls or otherwise allocates
3060 if (!current_function_is_leaf || fsize)
3061 fsize += TARGET_64BIT ? 16 : 32;
3063 return (fsize + STACK_BOUNDARY - 1) & ~(STACK_BOUNDARY - 1);
3066 /* Generate the assembly code for function entry. FILE is a stdio
3067 stream to output the code to. SIZE is an int: how many units of
3068 temporary storage to allocate.
3070 Refer to the array `regs_ever_live' to determine which registers to
3071 save; `regs_ever_live[I]' is nonzero if register number I is ever
3072 used in the function. This function is responsible for knowing
3073 which registers should not be saved even if used. */
3075 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3076 of memory. If any fpu reg is used in the function, we allocate
3077 such a block here, at the bottom of the frame, just in case it's needed.
3079 If this function is a leaf procedure, then we may choose not
3080 to do a "save" insn. The decision about whether or not
3081 to do this is made in regclass.c. */
3084 pa_output_function_prologue (file, size)
3086 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3088 /* The function's label and associated .PROC must never be
3089 separated and must be output *after* any profiling declarations
3090 to avoid changing spaces/subspaces within a procedure. */
3091 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3092 fputs ("\t.PROC\n", file);
3094 /* hppa_expand_prologue does the dirty work now. We just need
3095 to output the assembler directives which denote the start
3097 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
3098 if (regs_ever_live[2])
3099 fputs (",CALLS,SAVE_RP", file);
3101 fputs (",NO_CALLS", file);
3103 if (frame_pointer_needed)
3104 fputs (",SAVE_SP", file);
3106 /* Pass on information about the number of callee register saves
3107 performed in the prologue.
3109 The compiler is supposed to pass the highest register number
3110 saved, the assembler then has to adjust that number before
3111 entering it into the unwind descriptor (to account for any
3112 caller saved registers with lower register numbers than the
3113 first callee saved register). */
3115 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3118 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3120 fputs ("\n\t.ENTRY\n", file);
3122 /* When profiling, we need a local label at the beginning of the
3123 prologue because GAS can't handle the difference of a global symbol
3124 and a local symbol. */
3125 if (current_function_profile)
3127 ASM_OUTPUT_INTERNAL_LABEL (file, FUNC_BEGIN_PROLOG_LABEL,
3128 current_function_number);
3129 current_function_number++;
3132 /* If we're using GAS and not using the portable runtime model, then
3133 we don't need to accumulate the total number of code bytes. */
3134 if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
3135 total_code_bytes = 0;
3136 else if (INSN_ADDRESSES_SET_P ())
3138 unsigned int old_total = total_code_bytes;
3140 total_code_bytes += INSN_ADDRESSES (INSN_UID (get_last_insn ()));
3141 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
3143 /* Be prepared to handle overflows. */
3144 if (old_total > total_code_bytes)
3145 total_code_bytes = -1;
3148 total_code_bytes = -1;
3150 remove_useless_addtr_insns (get_insns (), 0);
3154 hppa_expand_prologue ()
3156 int size = get_frame_size ();
3157 int merge_sp_adjust_with_store = 0;
3165 /* Allocate space for frame pointer + filler. If any frame is allocated
3166 we need to add this in because of STARTING_FRAME_OFFSET.
3168 Similar code also appears in compute_frame_size. Change both
3169 of them at the same time. */
3170 local_fsize = size + (size || frame_pointer_needed
3171 ? STARTING_FRAME_OFFSET : 0);
3173 actual_fsize = compute_frame_size (size, &save_fregs);
3175 /* Compute a few things we will use often. */
3176 tmpreg = gen_rtx_REG (word_mode, 1);
3178 /* Save RP first. The calling conventions manual states RP will
3179 always be stored into the caller's frame at sp - 20 or sp - 16
3180 depending on which ABI is in use. */
3181 if (regs_ever_live[2] || current_function_calls_eh_return)
3182 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3184 /* Allocate the local frame and set up the frame pointer if needed. */
3185 if (actual_fsize != 0)
3187 if (frame_pointer_needed)
3189 /* Copy the old frame pointer temporarily into %r1. Set up the
3190 new stack pointer, then store away the saved old frame pointer
3191 into the stack at sp and at the same time update the stack
3192 pointer by actual_fsize bytes. Two versions, first
3193 handles small (<8k) frames. The second handles large (>=8k)
3195 insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3198 /* We need to record the frame pointer save here since the
3199 new frame pointer is set in the following insn. */
3200 RTX_FRAME_RELATED_P (insn) = 1;
3202 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3203 gen_rtx_SET (VOIDmode,
3204 gen_rtx_MEM (word_mode, stack_pointer_rtx),
3209 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3211 RTX_FRAME_RELATED_P (insn) = 1;
3213 if (VAL_14_BITS_P (actual_fsize))
3214 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3217 /* It is incorrect to store the saved frame pointer at *sp,
3218 then increment sp (writes beyond the current stack boundary).
3220 So instead use stwm to store at *sp and post-increment the
3221 stack pointer as an atomic operation. Then increment sp to
3222 finish allocating the new frame. */
3223 int adjust1 = 8192 - 64;
3224 int adjust2 = actual_fsize - adjust1;
3226 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3227 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3231 /* Prevent register spills from being scheduled before the
3232 stack pointer is raised. Necessary as we will be storing
3233 registers using the frame pointer as a base register, and
3234 we happen to set fp before raising sp. */
3235 emit_insn (gen_blockage ());
3237 /* no frame pointer needed. */
3240 /* In some cases we can perform the first callee register save
3241 and allocating the stack frame at the same time. If so, just
3242 make a note of it and defer allocating the frame until saving
3243 the callee registers. */
3244 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3245 merge_sp_adjust_with_store = 1;
3246 /* Can not optimize. Adjust the stack frame by actual_fsize
3249 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3254 /* Normal register save.
3256 Do not save the frame pointer in the frame_pointer_needed case. It
3257 was done earlier. */
3258 if (frame_pointer_needed)
3260 offset = local_fsize;
3262 /* Saving the EH return data registers in the frame is the simplest
3263 way to get the frame unwind information emitted. We put them
3264 just before the general registers. */
3265 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3267 unsigned int i, regno;
3271 regno = EH_RETURN_DATA_REGNO (i);
3272 if (regno == INVALID_REGNUM)
3275 store_reg (regno, offset, FRAME_POINTER_REGNUM);
3276 offset += UNITS_PER_WORD;
3280 for (i = 18; i >= 4; i--)
3281 if (regs_ever_live[i] && ! call_used_regs[i])
3283 store_reg (i, offset, FRAME_POINTER_REGNUM);
3284 offset += UNITS_PER_WORD;
3287 /* Account for %r3 which is saved in a special place. */
3290 /* No frame pointer needed. */
3293 offset = local_fsize - actual_fsize;
3295 /* Saving the EH return data registers in the frame is the simplest
3296 way to get the frame unwind information emitted. */
3297 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3299 unsigned int i, regno;
3303 regno = EH_RETURN_DATA_REGNO (i);
3304 if (regno == INVALID_REGNUM)
3307 /* If merge_sp_adjust_with_store is nonzero, then we can
3308 optimize the first save. */
3309 if (merge_sp_adjust_with_store)
3311 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3312 merge_sp_adjust_with_store = 0;
3315 store_reg (regno, offset, STACK_POINTER_REGNUM);
3316 offset += UNITS_PER_WORD;
3320 for (i = 18; i >= 3; i--)
3321 if (regs_ever_live[i] && ! call_used_regs[i])
3323 /* If merge_sp_adjust_with_store is nonzero, then we can
3324 optimize the first GR save. */
3325 if (merge_sp_adjust_with_store)
3327 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3328 merge_sp_adjust_with_store = 0;
3331 store_reg (i, offset, STACK_POINTER_REGNUM);
3332 offset += UNITS_PER_WORD;
3336 /* If we wanted to merge the SP adjustment with a GR save, but we never
3337 did any GR saves, then just emit the adjustment here. */
3338 if (merge_sp_adjust_with_store)
3339 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3343 /* The hppa calling conventions say that %r19, the pic offset
3344 register, is saved at sp - 32 (in this function's frame)
3345 when generating PIC code. FIXME: What is the correct thing
3346 to do for functions which make no calls and allocate no
3347 frame? Do we need to allocate a frame, or can we just omit
3348 the save? For now we'll just omit the save. */
3349 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3350 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
3352 /* Align pointer properly (doubleword boundary). */
3353 offset = (offset + 7) & ~7;
3355 /* Floating point register store. */
3360 /* First get the frame or stack pointer to the start of the FP register
3362 if (frame_pointer_needed)
3364 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3365 base = frame_pointer_rtx;
3369 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3370 base = stack_pointer_rtx;
3373 /* Now actually save the FP registers. */
3374 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3376 if (regs_ever_live[i]
3377 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3379 rtx addr, insn, reg;
3380 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3381 reg = gen_rtx_REG (DFmode, i);
3382 insn = emit_move_insn (addr, reg);
3385 RTX_FRAME_RELATED_P (insn) = 1;
3388 rtx mem = gen_rtx_MEM (DFmode,
3389 plus_constant (base, offset));
3391 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3392 gen_rtx_SET (VOIDmode, mem, reg),
3397 rtx meml = gen_rtx_MEM (SFmode,
3398 plus_constant (base, offset));
3399 rtx memr = gen_rtx_MEM (SFmode,
3400 plus_constant (base, offset + 4));
3401 rtx regl = gen_rtx_REG (SFmode, i);
3402 rtx regr = gen_rtx_REG (SFmode, i + 1);
3403 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3404 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3407 RTX_FRAME_RELATED_P (setl) = 1;
3408 RTX_FRAME_RELATED_P (setr) = 1;
3409 vec = gen_rtvec (2, setl, setr);
3411 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3412 gen_rtx_SEQUENCE (VOIDmode, vec),
3416 offset += GET_MODE_SIZE (DFmode);
3422 /* FIXME: expand_call and expand_millicode_call need to be fixed to
3423 prevent insns with frame notes being scheduled in the delay slot
3424 of calls. This causes problems because the dwarf2 output code
3425 processes the insn list serially. For now, limit the migration
3426 of prologue insns with a blockage. */
3428 emit_insn (gen_blockage ());
3431 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3432 Handle case where DISP > 8k by using the add_high_const patterns. */
3435 load_reg (reg, disp, base)
3436 int reg, disp, base;
3438 rtx src, dest, basereg;
3440 dest = gen_rtx_REG (word_mode, reg);
3441 basereg = gen_rtx_REG (Pmode, base);
3442 if (VAL_14_BITS_P (disp))
3444 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3445 emit_move_insn (dest, src);
3449 rtx delta = GEN_INT (disp);
3450 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3451 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3452 emit_move_insn (tmpreg, high);
3453 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3454 emit_move_insn (dest, src);
3458 /* This function generates the assembly code for function exit.
3459 Args are as for output_function_prologue ().
3461 The function epilogue should not depend on the current stack
3462 pointer! It should use the frame pointer only. This is mandatory
3463 because of alloca; we also take advantage of it to omit stack
3464 adjustments before returning. */
3467 pa_output_function_epilogue (file, size)
3469 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3471 rtx insn = get_last_insn ();
3473 /* hppa_expand_epilogue does the dirty work now. We just need
3474 to output the assembler directives which denote the end
3477 To make debuggers happy, emit a nop if the epilogue was completely
3478 eliminated due to a volatile call as the last insn in the
3479 current function. That way the return address (in %r2) will
3480 always point to a valid instruction in the current function. */
3482 /* Get the last real insn. */
3483 if (GET_CODE (insn) == NOTE)
3484 insn = prev_real_insn (insn);
3486 /* If it is a sequence, then look inside. */
3487 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3488 insn = XVECEXP (PATTERN (insn), 0, 0);
3490 /* If insn is a CALL_INSN, then it must be a call to a volatile
3491 function (otherwise there would be epilogue insns). */
3492 if (insn && GET_CODE (insn) == CALL_INSN)
3493 fputs ("\tnop\n", file);
3495 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3499 hppa_expand_epilogue ()
3503 int merge_sp_adjust_with_load = 0;
3506 /* We will use this often. */
3507 tmpreg = gen_rtx_REG (word_mode, 1);
3509 /* Try to restore RP early to avoid load/use interlocks when
3510 RP gets used in the return (bv) instruction. This appears to still
3511 be necessary even when we schedule the prologue and epilogue. */
3512 if (regs_ever_live [2] || current_function_calls_eh_return)
3514 ret_off = TARGET_64BIT ? -16 : -20;
3515 if (frame_pointer_needed)
3517 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
3522 /* No frame pointer, and stack is smaller than 8k. */
3523 if (VAL_14_BITS_P (ret_off - actual_fsize))
3525 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
3531 /* General register restores. */
3532 if (frame_pointer_needed)
3534 offset = local_fsize;
3536 /* If the current function calls __builtin_eh_return, then we need
3537 to restore the saved EH data registers. */
3538 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3540 unsigned int i, regno;
3544 regno = EH_RETURN_DATA_REGNO (i);
3545 if (regno == INVALID_REGNUM)
3548 load_reg (regno, offset, FRAME_POINTER_REGNUM);
3549 offset += UNITS_PER_WORD;
3553 for (i = 18; i >= 4; i--)
3554 if (regs_ever_live[i] && ! call_used_regs[i])
3556 load_reg (i, offset, FRAME_POINTER_REGNUM);
3557 offset += UNITS_PER_WORD;
3562 offset = local_fsize - actual_fsize;
3564 /* If the current function calls __builtin_eh_return, then we need
3565 to restore the saved EH data registers. */
3566 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3568 unsigned int i, regno;
3572 regno = EH_RETURN_DATA_REGNO (i);
3573 if (regno == INVALID_REGNUM)
3576 /* Only for the first load.
3577 merge_sp_adjust_with_load holds the register load
3578 with which we will merge the sp adjustment. */
3579 if (merge_sp_adjust_with_load == 0
3581 && VAL_14_BITS_P (-actual_fsize))
3582 merge_sp_adjust_with_load = regno;
3584 load_reg (regno, offset, STACK_POINTER_REGNUM);
3585 offset += UNITS_PER_WORD;
3589 for (i = 18; i >= 3; i--)
3591 if (regs_ever_live[i] && ! call_used_regs[i])
3593 /* Only for the first load.
3594 merge_sp_adjust_with_load holds the register load
3595 with which we will merge the sp adjustment. */
3596 if (merge_sp_adjust_with_load == 0
3598 && VAL_14_BITS_P (-actual_fsize))
3599 merge_sp_adjust_with_load = i;
3601 load_reg (i, offset, STACK_POINTER_REGNUM);
3602 offset += UNITS_PER_WORD;
3607 /* Align pointer properly (doubleword boundary). */
3608 offset = (offset + 7) & ~7;
3610 /* FP register restores. */
3613 /* Adjust the register to index off of. */
3614 if (frame_pointer_needed)
3615 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3617 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3619 /* Actually do the restores now. */
3620 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3621 if (regs_ever_live[i]
3622 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3624 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3625 rtx dest = gen_rtx_REG (DFmode, i);
3626 emit_move_insn (dest, src);
3630 /* Emit a blockage insn here to keep these insns from being moved to
3631 an earlier spot in the epilogue, or into the main instruction stream.
3633 This is necessary as we must not cut the stack back before all the
3634 restores are finished. */
3635 emit_insn (gen_blockage ());
3637 /* Reset stack pointer (and possibly frame pointer). The stack
3638 pointer is initially set to fp + 64 to avoid a race condition. */
3639 if (frame_pointer_needed)
3641 rtx delta = GEN_INT (-64);
3643 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
3644 emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
3646 /* If we were deferring a callee register restore, do it now. */
3647 else if (merge_sp_adjust_with_load)
3649 rtx delta = GEN_INT (-actual_fsize);
3650 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
3652 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
3654 else if (actual_fsize != 0)
3655 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3658 /* If we haven't restored %r2 yet (no frame pointer, and a stack
3659 frame greater than 8k), do so now. */
3661 load_reg (2, ret_off, STACK_POINTER_REGNUM);
3663 if (DO_FRAME_NOTES && current_function_calls_eh_return)
3665 rtx sa = EH_RETURN_STACKADJ_RTX;
3667 emit_insn (gen_blockage ());
3668 emit_insn (TARGET_64BIT
3669 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
3670 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
3675 hppa_pic_save_rtx ()
3677 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
3681 hppa_profile_hook (label_no)
3682 int label_no ATTRIBUTE_UNUSED;
3684 rtx begin_label_rtx, call_insn;
3685 char begin_label_name[16];
3687 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
3688 current_function_number);
3689 begin_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (begin_label_name));
3692 emit_move_insn (arg_pointer_rtx,
3693 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
3696 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
3698 #ifndef NO_PROFILE_COUNTERS
3700 rtx count_label_rtx, addr, r24;
3701 char count_label_name[16];
3703 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
3704 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
3710 current_function_uses_pic_offset_table = 1;
3711 tmpreg = gen_rtx_REG (Pmode, 1);
3712 emit_move_insn (tmpreg,
3713 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
3714 gen_rtx_HIGH (Pmode, count_label_rtx)));
3715 addr = gen_rtx_MEM (Pmode,
3716 gen_rtx_LO_SUM (Pmode, tmpreg, count_label_rtx));
3720 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3721 emit_move_insn (tmpreg, gen_rtx_HIGH (Pmode, count_label_rtx));
3722 addr = gen_rtx_LO_SUM (Pmode, tmpreg, count_label_rtx);
3724 r24 = gen_rtx_REG (Pmode, 24);
3725 emit_move_insn (r24, addr);
3727 /* %r25 is set from within the output pattern. */
3729 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3730 GEN_INT (TARGET_64BIT ? 24 : 12),
3733 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
3736 /* %r25 is set from within the output pattern. */
3738 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3739 GEN_INT (TARGET_64BIT ? 16 : 8),
3743 /* Indicate the _mcount call cannot throw, nor will it execute a
3745 REG_NOTES (call_insn)
3746 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
3750 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
3752 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
3754 emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
3758 /* Fetch the return address for the frame COUNT steps up from
3759 the current frame, after the prologue. FRAMEADDR is the
3760 frame pointer of the COUNT frame.
3762 We want to ignore any export stub remnants here. To handle this,
3763 we examine the code at the return address, and if it is an export
3764 stub, we return a memory rtx for the stub return address stored
3767 The value returned is used in two different ways:
3769 1. To find a function's caller.
3771 2. To change the return address for a function.
3773 This function handles most instances of case 1; however, it will
3774 fail if there are two levels of stubs to execute on the return
3775 path. The only way I believe that can happen is if the return value
3776 needs a parameter relocation, which never happens for C code.
3778 This function handles most instances of case 2; however, it will
3779 fail if we did not originally have stub code on the return path
3780 but will need stub code on the new return path. This can happen if
3781 the caller & callee are both in the main program, but the new
3782 return location is in a shared library. */
3785 return_addr_rtx (count, frameaddr)
3797 rp = get_hard_reg_initial_val (Pmode, 2);
3799 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
3802 saved_rp = gen_reg_rtx (Pmode);
3803 emit_move_insn (saved_rp, rp);
3805 /* Get pointer to the instruction stream. We have to mask out the
3806 privilege level from the two low order bits of the return address
3807 pointer here so that ins will point to the start of the first
3808 instruction that would have been executed if we returned. */
3809 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
3810 label = gen_label_rtx ();
3812 /* Check the instruction stream at the normal return address for the
3815 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3816 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3817 0x00011820 | stub+16: mtsp r1,sr0
3818 0xe0400002 | stub+20: be,n 0(sr0,rp)
3820 If it is an export stub, than our return address is really in
3823 emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
3824 NULL_RTX, SImode, 1);
3825 emit_jump_insn (gen_bne (label));
3827 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
3828 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
3829 emit_jump_insn (gen_bne (label));
3831 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
3832 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
3833 emit_jump_insn (gen_bne (label));
3835 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
3836 GEN_INT (0xe0400002), NE, NULL_RTX, SImode, 1);
3838 /* If there is no export stub then just use the value saved from
3839 the return pointer register. */
3841 emit_jump_insn (gen_bne (label));
3843 /* Here we know that our return address points to an export
3844 stub. We don't want to return the address of the export stub,
3845 but rather the return address of the export stub. That return
3846 address is stored at -24[frameaddr]. */
3848 emit_move_insn (saved_rp,
3850 memory_address (Pmode,
3851 plus_constant (frameaddr,
3858 /* This is only valid once reload has completed because it depends on
3859 knowing exactly how much (if any) frame there is and...
3861 It's only valid if there is no frame marker to de-allocate and...
3863 It's only valid if %r2 hasn't been saved into the caller's frame
3864 (we're not profiling and %r2 isn't live anywhere). */
3866 hppa_can_use_return_insn_p ()
3868 return (reload_completed
3869 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3870 && ! regs_ever_live[2]
3871 && ! frame_pointer_needed);
3875 emit_bcond_fp (code, operand0)
3879 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
3880 gen_rtx_IF_THEN_ELSE (VOIDmode,
3881 gen_rtx_fmt_ee (code,
3883 gen_rtx_REG (CCFPmode, 0),
3885 gen_rtx_LABEL_REF (VOIDmode, operand0),
3891 gen_cmp_fp (code, operand0, operand1)
3893 rtx operand0, operand1;
3895 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
3896 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
3899 /* Adjust the cost of a scheduling dependency. Return the new cost of
3900 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3903 pa_adjust_cost (insn, link, dep_insn, cost)
3909 enum attr_type attr_type;
3911 /* Don't adjust costs for a pa8000 chip. */
3912 if (pa_cpu >= PROCESSOR_8000)
3915 if (! recog_memoized (insn))
3918 attr_type = get_attr_type (insn);
3920 if (REG_NOTE_KIND (link) == 0)
3922 /* Data dependency; DEP_INSN writes a register that INSN reads some
3925 if (attr_type == TYPE_FPSTORE)
3927 rtx pat = PATTERN (insn);
3928 rtx dep_pat = PATTERN (dep_insn);
3929 if (GET_CODE (pat) == PARALLEL)
3931 /* This happens for the fstXs,mb patterns. */
3932 pat = XVECEXP (pat, 0, 0);
3934 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3935 /* If this happens, we have to extend this to schedule
3936 optimally. Return 0 for now. */
3939 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
3941 if (! recog_memoized (dep_insn))
3943 /* DEP_INSN is writing its result to the register
3944 being stored in the fpstore INSN. */
3945 switch (get_attr_type (dep_insn))
3948 /* This cost 3 cycles, not 2 as the md says for the
3957 case TYPE_FPSQRTSGL:
3958 case TYPE_FPSQRTDBL:
3959 /* In these important cases, we save one cycle compared to
3960 when flop instruction feed each other. */
3969 /* For other data dependencies, the default cost specified in the
3973 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3975 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3978 if (attr_type == TYPE_FPLOAD)
3980 rtx pat = PATTERN (insn);
3981 rtx dep_pat = PATTERN (dep_insn);
3982 if (GET_CODE (pat) == PARALLEL)
3984 /* This happens for the fldXs,mb patterns. */
3985 pat = XVECEXP (pat, 0, 0);
3987 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3988 /* If this happens, we have to extend this to schedule
3989 optimally. Return 0 for now. */
3992 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3994 if (! recog_memoized (dep_insn))
3996 switch (get_attr_type (dep_insn))
4003 case TYPE_FPSQRTSGL:
4004 case TYPE_FPSQRTDBL:
4005 /* A fpload can't be issued until one cycle before a
4006 preceding arithmetic operation has finished if
4007 the target of the fpload is any of the sources
4008 (or destination) of the arithmetic operation. */
4016 else if (attr_type == TYPE_FPALU)
4018 rtx pat = PATTERN (insn);
4019 rtx dep_pat = PATTERN (dep_insn);
4020 if (GET_CODE (pat) == PARALLEL)
4022 /* This happens for the fldXs,mb patterns. */
4023 pat = XVECEXP (pat, 0, 0);
4025 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4026 /* If this happens, we have to extend this to schedule
4027 optimally. Return 0 for now. */
4030 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4032 if (! recog_memoized (dep_insn))
4034 switch (get_attr_type (dep_insn))
4038 case TYPE_FPSQRTSGL:
4039 case TYPE_FPSQRTDBL:
4040 /* An ALU flop can't be issued until two cycles before a
4041 preceding divide or sqrt operation has finished if
4042 the target of the ALU flop is any of the sources
4043 (or destination) of the divide or sqrt operation. */
4052 /* For other anti dependencies, the cost is 0. */
4055 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4057 /* Output dependency; DEP_INSN writes a register that INSN writes some
4059 if (attr_type == TYPE_FPLOAD)
4061 rtx pat = PATTERN (insn);
4062 rtx dep_pat = PATTERN (dep_insn);
4063 if (GET_CODE (pat) == PARALLEL)
4065 /* This happens for the fldXs,mb patterns. */
4066 pat = XVECEXP (pat, 0, 0);
4068 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4069 /* If this happens, we have to extend this to schedule
4070 optimally. Return 0 for now. */
4073 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4075 if (! recog_memoized (dep_insn))
4077 switch (get_attr_type (dep_insn))
4084 case TYPE_FPSQRTSGL:
4085 case TYPE_FPSQRTDBL:
4086 /* A fpload can't be issued until one cycle before a
4087 preceding arithmetic operation has finished if
4088 the target of the fpload is the destination of the
4089 arithmetic operation. */
4097 else if (attr_type == TYPE_FPALU)
4099 rtx pat = PATTERN (insn);
4100 rtx dep_pat = PATTERN (dep_insn);
4101 if (GET_CODE (pat) == PARALLEL)
4103 /* This happens for the fldXs,mb patterns. */
4104 pat = XVECEXP (pat, 0, 0);
4106 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4107 /* If this happens, we have to extend this to schedule
4108 optimally. Return 0 for now. */
4111 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4113 if (! recog_memoized (dep_insn))
4115 switch (get_attr_type (dep_insn))
4119 case TYPE_FPSQRTSGL:
4120 case TYPE_FPSQRTDBL:
4121 /* An ALU flop can't be issued until two cycles before a
4122 preceding divide or sqrt operation has finished if
4123 the target of the ALU flop is also the target of
4124 the divide or sqrt operation. */
4133 /* For other output dependencies, the cost is 0. */
4140 /* Adjust scheduling priorities. We use this to try and keep addil
4141 and the next use of %r1 close together. */
4143 pa_adjust_priority (insn, priority)
4147 rtx set = single_set (insn);
4151 src = SET_SRC (set);
4152 dest = SET_DEST (set);
4153 if (GET_CODE (src) == LO_SUM
4154 && symbolic_operand (XEXP (src, 1), VOIDmode)
4155 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4158 else if (GET_CODE (src) == MEM
4159 && GET_CODE (XEXP (src, 0)) == LO_SUM
4160 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4161 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4164 else if (GET_CODE (dest) == MEM
4165 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4166 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4167 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4173 /* The 700 can only issue a single insn at a time.
4174 The 7XXX processors can issue two insns at a time.
4175 The 8000 can issue 4 insns at a time. */
4181 case PROCESSOR_700: return 1;
4182 case PROCESSOR_7100: return 2;
4183 case PROCESSOR_7100LC: return 2;
4184 case PROCESSOR_7200: return 2;
4185 case PROCESSOR_8000: return 4;
4194 /* Return any length adjustment needed by INSN which already has its length
4195 computed as LENGTH. Return zero if no adjustment is necessary.
4197 For the PA: function calls, millicode calls, and backwards short
4198 conditional branches with unfilled delay slots need an adjustment by +1
4199 (to account for the NOP which will be inserted into the instruction stream).
4201 Also compute the length of an inline block move here as it is too
4202 complicated to express as a length attribute in pa.md. */
4204 pa_adjust_insn_length (insn, length)
4208 rtx pat = PATTERN (insn);
4210 /* Call insns which are *not* indirect and have unfilled delay slots. */
4211 if (GET_CODE (insn) == CALL_INSN)
4214 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
4215 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
4217 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
4218 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
4224 /* Jumps inside switch tables which have unfilled delay slots
4225 also need adjustment. */
4226 else if (GET_CODE (insn) == JUMP_INSN
4227 && simplejump_p (insn)
4228 && GET_MODE (insn) == SImode)
4230 /* Millicode insn with an unfilled delay slot. */
4231 else if (GET_CODE (insn) == INSN
4232 && GET_CODE (pat) != SEQUENCE
4233 && GET_CODE (pat) != USE
4234 && GET_CODE (pat) != CLOBBER
4235 && get_attr_type (insn) == TYPE_MILLI)
4237 /* Block move pattern. */
4238 else if (GET_CODE (insn) == INSN
4239 && GET_CODE (pat) == PARALLEL
4240 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4241 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4242 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4243 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4244 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4245 return compute_movstrsi_length (insn) - 4;
4246 /* Conditional branch with an unfilled delay slot. */
4247 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4249 /* Adjust a short backwards conditional with an unfilled delay slot. */
4250 if (GET_CODE (pat) == SET
4252 && ! forward_branch_p (insn))
4254 else if (GET_CODE (pat) == PARALLEL
4255 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4258 /* Adjust dbra insn with short backwards conditional branch with
4259 unfilled delay slot -- only for case where counter is in a
4260 general register register. */
4261 else if (GET_CODE (pat) == PARALLEL
4262 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4263 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4264 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4266 && ! forward_branch_p (insn))
4274 /* Print operand X (an rtx) in assembler syntax to file FILE.
4275 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4276 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4279 print_operand (file, x, code)
4287 /* Output a 'nop' if there's nothing for the delay slot. */
4288 if (dbr_sequence_length () == 0)
4289 fputs ("\n\tnop", file);
4292 /* Output an nullification completer if there's nothing for the */
4293 /* delay slot or nullification is requested. */
4294 if (dbr_sequence_length () == 0 ||
4296 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4300 /* Print out the second register name of a register pair.
4301 I.e., R (6) => 7. */
4302 fputs (reg_names[REGNO (x) + 1], file);
4305 /* A register or zero. */
4307 || (x == CONST0_RTX (DFmode))
4308 || (x == CONST0_RTX (SFmode)))
4310 fputs ("%r0", file);
4316 /* A register or zero (floating point). */
4318 || (x == CONST0_RTX (DFmode))
4319 || (x == CONST0_RTX (SFmode)))
4321 fputs ("%fr0", file);
4330 xoperands[0] = XEXP (XEXP (x, 0), 0);
4331 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4332 output_global_address (file, xoperands[1], 0);
4333 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4337 case 'C': /* Plain (C)ondition */
4339 switch (GET_CODE (x))
4342 fputs ("=", file); break;
4344 fputs ("<>", file); break;
4346 fputs (">", file); break;
4348 fputs (">=", file); break;
4350 fputs (">>=", file); break;
4352 fputs (">>", file); break;
4354 fputs ("<", file); break;
4356 fputs ("<=", file); break;
4358 fputs ("<<=", file); break;
4360 fputs ("<<", file); break;
4365 case 'N': /* Condition, (N)egated */
4366 switch (GET_CODE (x))
4369 fputs ("<>", file); break;
4371 fputs ("=", file); break;
4373 fputs ("<=", file); break;
4375 fputs ("<", file); break;
4377 fputs ("<<", file); break;
4379 fputs ("<<=", file); break;
4381 fputs (">=", file); break;
4383 fputs (">", file); break;
4385 fputs (">>", file); break;
4387 fputs (">>=", file); break;
4392 /* For floating point comparisons. Note that the output
4393 predicates are the complement of the desired mode. */
4395 switch (GET_CODE (x))
4398 fputs ("!=", file); break;
4400 fputs ("=", file); break;
4402 fputs ("!>", file); break;
4404 fputs ("!>=", file); break;
4406 fputs ("!<", file); break;
4408 fputs ("!<=", file); break;
4410 fputs ("!<>", file); break;
4412 fputs (">", file); break;
4414 fputs (">=", file); break;
4416 fputs ("<", file); break;
4418 fputs ("<=", file); break;
4420 fputs ("<>", file); break;
4422 fputs ("<=>", file); break;
4424 fputs ("!<=>", file); break;
4429 case 'S': /* Condition, operands are (S)wapped. */
4430 switch (GET_CODE (x))
4433 fputs ("=", file); break;
4435 fputs ("<>", file); break;
4437 fputs ("<", file); break;
4439 fputs ("<=", file); break;
4441 fputs ("<<=", file); break;
4443 fputs ("<<", file); break;
4445 fputs (">", file); break;
4447 fputs (">=", file); break;
4449 fputs (">>=", file); break;
4451 fputs (">>", file); break;
4456 case 'B': /* Condition, (B)oth swapped and negate. */
4457 switch (GET_CODE (x))
4460 fputs ("<>", file); break;
4462 fputs ("=", file); break;
4464 fputs (">=", file); break;
4466 fputs (">", file); break;
4468 fputs (">>", file); break;
4470 fputs (">>=", file); break;
4472 fputs ("<=", file); break;
4474 fputs ("<", file); break;
4476 fputs ("<<", file); break;
4478 fputs ("<<=", file); break;
4484 if (GET_CODE (x) == CONST_INT)
4486 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4491 if (GET_CODE (x) == CONST_INT)
4493 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4498 if (GET_CODE (x) == CONST_INT)
4500 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4505 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
4507 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4512 if (GET_CODE (x) == CONST_INT)
4514 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4519 if (GET_CODE (x) == CONST_INT)
4521 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4526 if (GET_CODE (x) == CONST_INT)
4531 switch (GET_CODE (XEXP (x, 0)))
4535 if (ASSEMBLER_DIALECT == 0)
4536 fputs ("s,mb", file);
4538 fputs (",mb", file);
4542 if (ASSEMBLER_DIALECT == 0)
4543 fputs ("s,ma", file);
4545 fputs (",ma", file);
4548 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4549 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4551 if (ASSEMBLER_DIALECT == 0)
4552 fputs ("x,s", file);
4556 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4560 if (code == 'F' && ASSEMBLER_DIALECT == 0)
4566 output_global_address (file, x, 0);
4569 output_global_address (file, x, 1);
4571 case 0: /* Don't do anything special */
4576 compute_zdepwi_operands (INTVAL (x), op);
4577 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4583 compute_zdepdi_operands (INTVAL (x), op);
4584 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4588 /* We can get here from a .vtable_inherit due to our
4589 CONSTANT_ADDRESS_P rejecting perfectly good constant
4595 if (GET_CODE (x) == REG)
4597 fputs (reg_names [REGNO (x)], file);
4598 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
4604 && GET_MODE_SIZE (GET_MODE (x)) <= 4
4605 && (REGNO (x) & 1) == 0)
4608 else if (GET_CODE (x) == MEM)
4610 int size = GET_MODE_SIZE (GET_MODE (x));
4611 rtx base = NULL_RTX;
4612 switch (GET_CODE (XEXP (x, 0)))
4616 base = XEXP (XEXP (x, 0), 0);
4617 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
4621 base = XEXP (XEXP (x, 0), 0);
4622 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
4625 if (GET_CODE (XEXP (x, 0)) == PLUS
4626 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
4627 fprintf (file, "%s(%s)",
4628 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
4629 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
4630 else if (GET_CODE (XEXP (x, 0)) == PLUS
4631 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4632 fprintf (file, "%s(%s)",
4633 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
4634 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
4636 output_address (XEXP (x, 0));
4641 output_addr_const (file, x);
4644 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
4647 output_global_address (file, x, round_constant)
4653 /* Imagine (high (const (plus ...))). */
4654 if (GET_CODE (x) == HIGH)
4657 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
4658 assemble_name (file, XSTR (x, 0));
4659 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
4661 assemble_name (file, XSTR (x, 0));
4662 fputs ("-$global$", file);
4664 else if (GET_CODE (x) == CONST)
4666 const char *sep = "";
4667 int offset = 0; /* assembler wants -$global$ at end */
4668 rtx base = NULL_RTX;
4670 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4672 base = XEXP (XEXP (x, 0), 0);
4673 output_addr_const (file, base);
4675 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
4676 offset = INTVAL (XEXP (XEXP (x, 0), 0));
4679 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
4681 base = XEXP (XEXP (x, 0), 1);
4682 output_addr_const (file, base);
4684 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
4685 offset = INTVAL (XEXP (XEXP (x, 0), 1));
4688 /* How bogus. The compiler is apparently responsible for
4689 rounding the constant if it uses an LR field selector.
4691 The linker and/or assembler seem a better place since
4692 they have to do this kind of thing already.
4694 If we fail to do this, HP's optimizing linker may eliminate
4695 an addil, but not update the ldw/stw/ldo instruction that
4696 uses the result of the addil. */
4698 offset = ((offset + 0x1000) & ~0x1fff);
4700 if (GET_CODE (XEXP (x, 0)) == PLUS)
4710 else if (GET_CODE (XEXP (x, 0)) == MINUS
4711 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4715 if (!read_only_operand (base, VOIDmode) && !flag_pic)
4716 fputs ("-$global$", file);
4718 fprintf (file, "%s%d", sep, offset);
4721 output_addr_const (file, x);
4725 output_deferred_plabels (file)
4729 /* If we have deferred plabels, then we need to switch into the data
4730 section and align it to a 4 byte boundary before we output the
4731 deferred plabels. */
4732 if (n_deferred_plabels)
4735 ASM_OUTPUT_ALIGN (file, 2);
4738 /* Now output the deferred plabels. */
4739 for (i = 0; i < n_deferred_plabels; i++)
4741 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
4742 assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
4747 /* HP's millicode routines mean something special to the assembler.
4748 Keep track of which ones we have used. */
4750 enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
4751 static void import_milli PARAMS ((enum millicodes));
4752 static char imported[(int) end1000];
4753 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
4754 static const char import_string[] = ".IMPORT $$....,MILLICODE";
4755 #define MILLI_START 10
4759 enum millicodes code;
4761 char str[sizeof (import_string)];
4763 if (!imported[(int) code])
4765 imported[(int) code] = 1;
4766 strcpy (str, import_string);
4767 strncpy (str + MILLI_START, milli_names[(int) code], 4);
4768 output_asm_insn (str, 0);
4772 /* The register constraints have put the operands and return value in
4773 the proper registers. */
4776 output_mul_insn (unsignedp, insn)
4777 int unsignedp ATTRIBUTE_UNUSED;
4780 import_milli (mulI);
4781 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
4784 /* Emit the rtl for doing a division by a constant. */
4786 /* Do magic division millicodes exist for this value? */
4787 static const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
4790 /* We'll use an array to keep track of the magic millicodes and
4791 whether or not we've used them already. [n][0] is signed, [n][1] is
4794 static int div_milli[16][2];
4797 div_operand (op, mode)
4799 enum machine_mode mode;
4801 return (mode == SImode
4802 && ((GET_CODE (op) == REG && REGNO (op) == 25)
4803 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
4804 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
4808 emit_hpdiv_const (operands, unsignedp)
4812 if (GET_CODE (operands[2]) == CONST_INT
4813 && INTVAL (operands[2]) > 0
4814 && INTVAL (operands[2]) < 16
4815 && magic_milli[INTVAL (operands[2])])
4817 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
4819 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
4822 (PARALLEL, VOIDmode,
4823 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
4824 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4826 gen_rtx_REG (SImode, 26),
4828 gen_rtx_CLOBBER (VOIDmode, operands[4]),
4829 gen_rtx_CLOBBER (VOIDmode, operands[3]),
4830 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
4831 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
4832 gen_rtx_CLOBBER (VOIDmode, ret))));
4833 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
4840 output_div_insn (operands, unsignedp, insn)
4847 /* If the divisor is a constant, try to use one of the special
4849 if (GET_CODE (operands[0]) == CONST_INT)
4851 static char buf[100];
4852 divisor = INTVAL (operands[0]);
4853 if (!div_milli[divisor][unsignedp])
4855 div_milli[divisor][unsignedp] = 1;
4857 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
4859 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
4863 sprintf (buf, "$$divU_");
4864 sprintf (buf + 7, HOST_WIDE_INT_PRINT_DEC, INTVAL (operands[0]));
4865 return output_millicode_call (insn,
4866 gen_rtx_SYMBOL_REF (SImode, buf));
4870 sprintf (buf, "$$divI_");
4871 sprintf (buf + 7, HOST_WIDE_INT_PRINT_DEC, INTVAL (operands[0]));
4872 return output_millicode_call (insn,
4873 gen_rtx_SYMBOL_REF (SImode, buf));
4876 /* Divisor isn't a special constant. */
4881 import_milli (divU);
4882 return output_millicode_call (insn,
4883 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
4887 import_milli (divI);
4888 return output_millicode_call (insn,
4889 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
4894 /* Output a $$rem millicode to do mod. */
4897 output_mod_insn (unsignedp, insn)
4903 import_milli (remU);
4904 return output_millicode_call (insn,
4905 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
4909 import_milli (remI);
4910 return output_millicode_call (insn,
4911 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
4916 output_arg_descriptor (call_insn)
4919 const char *arg_regs[4];
4920 enum machine_mode arg_mode;
4922 int i, output_flag = 0;
4925 /* We neither need nor want argument location descriptors for the
4926 64bit runtime environment or the ELF32 environment. */
4927 if (TARGET_64BIT || TARGET_ELF32)
4930 for (i = 0; i < 4; i++)
4933 /* Specify explicitly that no argument relocations should take place
4934 if using the portable runtime calling conventions. */
4935 if (TARGET_PORTABLE_RUNTIME)
4937 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
4942 if (GET_CODE (call_insn) != CALL_INSN)
4944 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
4946 rtx use = XEXP (link, 0);
4948 if (! (GET_CODE (use) == USE
4949 && GET_CODE (XEXP (use, 0)) == REG
4950 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4953 arg_mode = GET_MODE (XEXP (use, 0));
4954 regno = REGNO (XEXP (use, 0));
4955 if (regno >= 23 && regno <= 26)
4957 arg_regs[26 - regno] = "GR";
4958 if (arg_mode == DImode)
4959 arg_regs[25 - regno] = "GR";
4961 else if (regno >= 32 && regno <= 39)
4963 if (arg_mode == SFmode)
4964 arg_regs[(regno - 32) / 2] = "FR";
4967 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
4968 arg_regs[(regno - 34) / 2] = "FR";
4969 arg_regs[(regno - 34) / 2 + 1] = "FU";
4971 arg_regs[(regno - 34) / 2] = "FU";
4972 arg_regs[(regno - 34) / 2 + 1] = "FR";
4977 fputs ("\t.CALL ", asm_out_file);
4978 for (i = 0; i < 4; i++)
4983 fputc (',', asm_out_file);
4984 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
4987 fputc ('\n', asm_out_file);
4990 /* Return the class of any secondary reload register that is needed to
4991 move IN into a register in class CLASS using mode MODE.
4993 Profiling has showed this routine and its descendants account for
4994 a significant amount of compile time (~7%). So it has been
4995 optimized to reduce redundant computations and eliminate useless
4998 It might be worthwhile to try and make this a leaf function too. */
5001 secondary_reload_class (class, mode, in)
5002 enum reg_class class;
5003 enum machine_mode mode;
5006 int regno, is_symbolic;
5008 /* Trying to load a constant into a FP register during PIC code
5009 generation will require %r1 as a scratch register. */
5011 && GET_MODE_CLASS (mode) == MODE_INT
5012 && FP_REG_CLASS_P (class)
5013 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
5016 /* Profiling showed the PA port spends about 1.3% of its compilation
5017 time in true_regnum from calls inside secondary_reload_class. */
5019 if (GET_CODE (in) == REG)
5022 if (regno >= FIRST_PSEUDO_REGISTER)
5023 regno = true_regnum (in);
5025 else if (GET_CODE (in) == SUBREG)
5026 regno = true_regnum (in);
5030 /* If we have something like (mem (mem (...)), we can safely assume the
5031 inner MEM will end up in a general register after reloading, so there's
5032 no need for a secondary reload. */
5033 if (GET_CODE (in) == MEM
5034 && GET_CODE (XEXP (in, 0)) == MEM)
5037 /* Handle out of range displacement for integer mode loads/stores of
5039 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5040 && GET_MODE_CLASS (mode) == MODE_INT
5041 && FP_REG_CLASS_P (class))
5042 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5043 return GENERAL_REGS;
5045 /* A SAR<->FP register copy requires a secondary register (GPR) as
5046 well as secondary memory. */
5047 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5048 && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5049 || (class == SHIFT_REGS && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5050 return GENERAL_REGS;
5052 if (GET_CODE (in) == HIGH)
5055 /* Profiling has showed GCC spends about 2.6% of its compilation
5056 time in symbolic_operand from calls inside secondary_reload_class.
5058 We use an inline copy and only compute its return value once to avoid
5060 switch (GET_CODE (in))
5070 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
5071 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
5072 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
5082 && read_only_operand (in, VOIDmode))
5085 if (class != R1_REGS && is_symbolic)
5092 function_arg_padding (mode, type)
5093 enum machine_mode mode;
5098 if (mode == BLKmode)
5100 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
5101 size = int_size_in_bytes (type) * BITS_PER_UNIT;
5103 return upward; /* Don't know if this is right, but */
5104 /* same as old definition. */
5107 size = GET_MODE_BITSIZE (mode);
5108 if (size < PARM_BOUNDARY)
5110 else if (size % PARM_BOUNDARY)
5117 /* Do what is necessary for `va_start'. We look at the current function
5118 to determine if stdargs or varargs is used and fill in an initial
5119 va_list. A pointer to this constructor is returned. */
5122 hppa_builtin_saveregs ()
5125 tree fntype = TREE_TYPE (current_function_decl);
5126 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5127 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5128 != void_type_node)))
5129 ? UNITS_PER_WORD : 0);
5132 offset = plus_constant (current_function_arg_offset_rtx, argadj);
5134 offset = current_function_arg_offset_rtx;
5140 /* Adjust for varargs/stdarg differences. */
5142 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5144 offset = current_function_arg_offset_rtx;
5146 /* We need to save %r26 .. %r19 inclusive starting at offset -64
5147 from the incoming arg pointer and growing to larger addresses. */
5148 for (i = 26, off = -64; i >= 19; i--, off += 8)
5149 emit_move_insn (gen_rtx_MEM (word_mode,
5150 plus_constant (arg_pointer_rtx, off)),
5151 gen_rtx_REG (word_mode, i));
5153 /* The incoming args pointer points just beyond the flushback area;
5154 normally this is not a serious concern. However, when we are doing
5155 varargs/stdargs we want to make the arg pointer point to the start
5156 of the incoming argument area. */
5157 emit_move_insn (virtual_incoming_args_rtx,
5158 plus_constant (arg_pointer_rtx, -64));
5160 /* Now return a pointer to the first anonymous argument. */
5161 return copy_to_reg (expand_binop (Pmode, add_optab,
5162 virtual_incoming_args_rtx,
5163 offset, 0, 0, OPTAB_LIB_WIDEN));
5166 /* Store general registers on the stack. */
5167 dest = gen_rtx_MEM (BLKmode,
5168 plus_constant (current_function_internal_arg_pointer,
5170 set_mem_alias_set (dest, get_varargs_alias_set ());
5171 set_mem_align (dest, BITS_PER_WORD);
5172 move_block_from_reg (23, dest, 4, 4 * UNITS_PER_WORD);
5174 /* move_block_from_reg will emit code to store the argument registers
5175 individually as scalar stores.
5177 However, other insns may later load from the same addresses for
5178 a structure load (passing a struct to a varargs routine).
5180 The alias code assumes that such aliasing can never happen, so we
5181 have to keep memory referencing insns from moving up beyond the
5182 last argument register store. So we emit a blockage insn here. */
5183 emit_insn (gen_blockage ());
5185 return copy_to_reg (expand_binop (Pmode, add_optab,
5186 current_function_internal_arg_pointer,
5187 offset, 0, 0, OPTAB_LIB_WIDEN));
5191 hppa_va_start (stdarg_p, valist, nextarg)
5192 int stdarg_p ATTRIBUTE_UNUSED;
5196 nextarg = expand_builtin_saveregs ();
5197 std_expand_builtin_va_start (1, valist, nextarg);
5201 hppa_va_arg (valist, type)
5204 HOST_WIDE_INT align, size, ofs;
5209 /* Every argument in PA64 is passed by value (including large structs).
5210 Arguments with size greater than 8 must be aligned 0 MOD 16. */
5212 size = int_size_in_bytes (type);
5213 if (size > UNITS_PER_WORD)
5215 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5216 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
5217 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
5218 build_int_2 (-2 * UNITS_PER_WORD, -1));
5219 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5220 TREE_SIDE_EFFECTS (t) = 1;
5221 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5223 return std_expand_builtin_va_arg (valist, type);
5226 /* Compute the rounded size of the type. */
5227 align = PARM_BOUNDARY / BITS_PER_UNIT;
5228 size = int_size_in_bytes (type);
5230 ptr = build_pointer_type (type);
5232 /* "Large" types are passed by reference. */
5235 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
5236 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
5237 TREE_SIDE_EFFECTS (t) = 1;
5239 pptr = build_pointer_type (ptr);
5240 t = build1 (NOP_EXPR, pptr, t);
5241 TREE_SIDE_EFFECTS (t) = 1;
5243 t = build1 (INDIRECT_REF, ptr, t);
5244 TREE_SIDE_EFFECTS (t) = 1;
5248 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
5249 build_int_2 (-size, -1));
5251 /* Copied from va-pa.h, but we probably don't need to align
5252 to word size, since we generate and preserve that invariant. */
5253 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
5254 build_int_2 ((size > 4 ? -8 : -4), -1));
5256 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
5257 TREE_SIDE_EFFECTS (t) = 1;
5259 ofs = (8 - size) % 4;
5262 t = build (PLUS_EXPR, TREE_TYPE (valist), t, build_int_2 (ofs, 0));
5263 TREE_SIDE_EFFECTS (t) = 1;
5266 t = build1 (NOP_EXPR, ptr, t);
5267 TREE_SIDE_EFFECTS (t) = 1;
5271 return expand_expr (t, NULL_RTX, Pmode, EXPAND_NORMAL);
5276 /* This routine handles all the normal conditional branch sequences we
5277 might need to generate. It handles compare immediate vs compare
5278 register, nullification of delay slots, varying length branches,
5279 negated branches, and all combinations of the above. It returns the
5280 output appropriate to emit the branch corresponding to all given
5284 output_cbranch (operands, nullify, length, negated, insn)
5286 int nullify, length, negated;
5289 static char buf[100];
5292 /* A conditional branch to the following instruction (eg the delay slot) is
5293 asking for a disaster. This can happen when not optimizing.
5295 In such cases it is safe to emit nothing. */
5297 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5300 /* If this is a long branch with its delay slot unfilled, set `nullify'
5301 as it can nullify the delay slot and save a nop. */
5302 if (length == 8 && dbr_sequence_length () == 0)
5305 /* If this is a short forward conditional branch which did not get
5306 its delay slot filled, the delay slot can still be nullified. */
5307 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5308 nullify = forward_branch_p (insn);
5310 /* A forward branch over a single nullified insn can be done with a
5311 comclr instruction. This avoids a single cycle penalty due to
5312 mis-predicted branch if we fall through (branch not taken). */
5314 && next_real_insn (insn) != 0
5315 && get_attr_length (next_real_insn (insn)) == 4
5316 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5322 /* All short conditional branches except backwards with an unfilled
5326 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5328 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5329 if (GET_MODE (operands[1]) == DImode)
5332 strcat (buf, "%B3");
5334 strcat (buf, "%S3");
5336 strcat (buf, " %2,%r1,%%r0");
5338 strcat (buf, ",n %2,%r1,%0");
5340 strcat (buf, " %2,%r1,%0");
5343 /* All long conditionals. Note an short backward branch with an
5344 unfilled delay slot is treated just like a long backward branch
5345 with an unfilled delay slot. */
5347 /* Handle weird backwards branch with a filled delay slot
5348 with is nullified. */
5349 if (dbr_sequence_length () != 0
5350 && ! forward_branch_p (insn)
5353 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5354 if (GET_MODE (operands[1]) == DImode)
5357 strcat (buf, "%S3");
5359 strcat (buf, "%B3");
5360 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
5362 /* Handle short backwards branch with an unfilled delay slot.
5363 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
5364 taken and untaken branches. */
5365 else if (dbr_sequence_length () == 0
5366 && ! forward_branch_p (insn)
5367 && INSN_ADDRESSES_SET_P ()
5368 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5369 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5371 strcpy (buf, "{com%I2b,|cmp%I2b,}");
5372 if (GET_MODE (operands[1]) == DImode)
5375 strcat (buf, "%B3 %2,%r1,%0%#");
5377 strcat (buf, "%S3 %2,%r1,%0%#");
5381 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
5382 if (GET_MODE (operands[1]) == DImode)
5385 strcat (buf, "%S3");
5387 strcat (buf, "%B3");
5389 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
5391 strcat (buf, " %2,%r1,%%r0\n\tb %0");
5396 /* Very long branch. Right now we only handle these when not
5397 optimizing. See "jump" pattern in pa.md for details. */
5401 /* Create a reversed conditional branch which branches around
5402 the following insns. */
5404 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+20|cmp%I2b,%S3,n %2,%r1,.+20}");
5406 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+20|cmp%I2b,%B3,n %2,%r1,.+20}");
5407 if (GET_MODE (operands[1]) == DImode)
5411 "{com%I2b,*%S3,n %2,%r1,.+20|cmp%I2b,*%S3,n %2,%r1,.+20}");
5414 "{com%I2b,*%B3,n %2,%r1,.+20|cmp%I2b,*%B3,n %2,%r1,.+20}");
5416 output_asm_insn (buf, operands);
5418 /* Output an insn to save %r1. */
5419 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
5421 /* Now output a very long branch to the original target. */
5422 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands);
5424 /* Now restore the value of %r1 in the delay slot. We're not
5425 optimizing so we know nothing else can be in the delay slot. */
5426 return "ldw -16(%%r30),%%r1";
5429 /* Very long branch when generating PIC code. Right now we only
5430 handle these when not optimizing. See "jump" pattern in pa.md
5435 /* Create a reversed conditional branch which branches around
5436 the following insns. */
5438 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+28|cmp%I2b,%S3,n %2,%r1,.+28}");
5440 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+28|cmp%I2b,%B3,n %2,%r1,.+28}");
5441 if (GET_MODE (operands[1]) == DImode)
5444 strcpy (buf, "{com%I2b,*%S3,n %2,%r1,.+28|cmp%I2b,*%S3,n %2,%r1,.+28}");
5446 strcpy (buf, "{com%I2b,*%B3,n %2,%r1,.+28|cmp%I2b,*%B3,n %2,%r1,.+28}");
5448 output_asm_insn (buf, operands);
5450 /* Output an insn to save %r1. */
5451 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
5453 /* Now output a very long PIC branch to the original target. */
5457 xoperands[0] = operands[0];
5458 xoperands[1] = operands[1];
5459 xoperands[2] = operands[2];
5460 xoperands[3] = operands[3];
5461 xoperands[4] = gen_label_rtx ();
5463 output_asm_insn ("{bl|b,l} .+8,%%r1\n\taddil L'%l0-%l4,%%r1",
5465 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5466 CODE_LABEL_NUMBER (xoperands[4]));
5467 output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1\n\tbv %%r0(%%r1)",
5471 /* Now restore the value of %r1 in the delay slot. We're not
5472 optimizing so we know nothing else can be in the delay slot. */
5473 return "ldw -16(%%r30),%%r1";
5481 /* This routine handles all the branch-on-bit conditional branch sequences we
5482 might need to generate. It handles nullification of delay slots,
5483 varying length branches, negated branches and all combinations of the
5484 above. it returns the appropriate output template to emit the branch. */
5487 output_bb (operands, nullify, length, negated, insn, which)
5488 rtx *operands ATTRIBUTE_UNUSED;
5489 int nullify, length, negated;
5493 static char buf[100];
5496 /* A conditional branch to the following instruction (eg the delay slot) is
5497 asking for a disaster. I do not think this can happen as this pattern
5498 is only used when optimizing; jump optimization should eliminate the
5499 jump. But be prepared just in case. */
5501 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5504 /* If this is a long branch with its delay slot unfilled, set `nullify'
5505 as it can nullify the delay slot and save a nop. */
5506 if (length == 8 && dbr_sequence_length () == 0)
5509 /* If this is a short forward conditional branch which did not get
5510 its delay slot filled, the delay slot can still be nullified. */
5511 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5512 nullify = forward_branch_p (insn);
5514 /* A forward branch over a single nullified insn can be done with a
5515 extrs instruction. This avoids a single cycle penalty due to
5516 mis-predicted branch if we fall through (branch not taken). */
5519 && next_real_insn (insn) != 0
5520 && get_attr_length (next_real_insn (insn)) == 4
5521 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5528 /* All short conditional branches except backwards with an unfilled
5532 strcpy (buf, "{extrs,|extrw,s,}");
5534 strcpy (buf, "bb,");
5535 if (useskip && GET_MODE (operands[0]) == DImode)
5536 strcpy (buf, "extrd,s,*");
5537 else if (GET_MODE (operands[0]) == DImode)
5538 strcpy (buf, "bb,*");
5539 if ((which == 0 && negated)
5540 || (which == 1 && ! negated))
5545 strcat (buf, " %0,%1,1,%%r0");
5546 else if (nullify && negated)
5547 strcat (buf, ",n %0,%1,%3");
5548 else if (nullify && ! negated)
5549 strcat (buf, ",n %0,%1,%2");
5550 else if (! nullify && negated)
5551 strcat (buf, "%0,%1,%3");
5552 else if (! nullify && ! negated)
5553 strcat (buf, " %0,%1,%2");
5556 /* All long conditionals. Note an short backward branch with an
5557 unfilled delay slot is treated just like a long backward branch
5558 with an unfilled delay slot. */
5560 /* Handle weird backwards branch with a filled delay slot
5561 with is nullified. */
5562 if (dbr_sequence_length () != 0
5563 && ! forward_branch_p (insn)
5566 strcpy (buf, "bb,");
5567 if (GET_MODE (operands[0]) == DImode)
5569 if ((which == 0 && negated)
5570 || (which == 1 && ! negated))
5575 strcat (buf, ",n %0,%1,.+12\n\tb %3");
5577 strcat (buf, ",n %0,%1,.+12\n\tb %2");
5579 /* Handle short backwards branch with an unfilled delay slot.
5580 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5581 taken and untaken branches. */
5582 else if (dbr_sequence_length () == 0
5583 && ! forward_branch_p (insn)
5584 && INSN_ADDRESSES_SET_P ()
5585 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5586 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5588 strcpy (buf, "bb,");
5589 if (GET_MODE (operands[0]) == DImode)
5591 if ((which == 0 && negated)
5592 || (which == 1 && ! negated))
5597 strcat (buf, " %0,%1,%3%#");
5599 strcat (buf, " %0,%1,%2%#");
5603 strcpy (buf, "{extrs,|extrw,s,}");
5604 if (GET_MODE (operands[0]) == DImode)
5605 strcpy (buf, "extrd,s,*");
5606 if ((which == 0 && negated)
5607 || (which == 1 && ! negated))
5611 if (nullify && negated)
5612 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
5613 else if (nullify && ! negated)
5614 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
5616 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
5618 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
5628 /* This routine handles all the branch-on-variable-bit conditional branch
5629 sequences we might need to generate. It handles nullification of delay
5630 slots, varying length branches, negated branches and all combinations
5631 of the above. it returns the appropriate output template to emit the
5635 output_bvb (operands, nullify, length, negated, insn, which)
5636 rtx *operands ATTRIBUTE_UNUSED;
5637 int nullify, length, negated;
5641 static char buf[100];
5644 /* A conditional branch to the following instruction (eg the delay slot) is
5645 asking for a disaster. I do not think this can happen as this pattern
5646 is only used when optimizing; jump optimization should eliminate the
5647 jump. But be prepared just in case. */
5649 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5652 /* If this is a long branch with its delay slot unfilled, set `nullify'
5653 as it can nullify the delay slot and save a nop. */
5654 if (length == 8 && dbr_sequence_length () == 0)
5657 /* If this is a short forward conditional branch which did not get
5658 its delay slot filled, the delay slot can still be nullified. */
5659 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5660 nullify = forward_branch_p (insn);
5662 /* A forward branch over a single nullified insn can be done with a
5663 extrs instruction. This avoids a single cycle penalty due to
5664 mis-predicted branch if we fall through (branch not taken). */
5667 && next_real_insn (insn) != 0
5668 && get_attr_length (next_real_insn (insn)) == 4
5669 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5676 /* All short conditional branches except backwards with an unfilled
5680 strcpy (buf, "{vextrs,|extrw,s,}");
5682 strcpy (buf, "{bvb,|bb,}");
5683 if (useskip && GET_MODE (operands[0]) == DImode)
5684 strcpy (buf, "extrd,s,*}");
5685 else if (GET_MODE (operands[0]) == DImode)
5686 strcpy (buf, "bb,*");
5687 if ((which == 0 && negated)
5688 || (which == 1 && ! negated))
5693 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
5694 else if (nullify && negated)
5695 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
5696 else if (nullify && ! negated)
5697 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
5698 else if (! nullify && negated)
5699 strcat (buf, "{%0,%3|%0,%%sar,%3}");
5700 else if (! nullify && ! negated)
5701 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
5704 /* All long conditionals. Note an short backward branch with an
5705 unfilled delay slot is treated just like a long backward branch
5706 with an unfilled delay slot. */
5708 /* Handle weird backwards branch with a filled delay slot
5709 with is nullified. */
5710 if (dbr_sequence_length () != 0
5711 && ! forward_branch_p (insn)
5714 strcpy (buf, "{bvb,|bb,}");
5715 if (GET_MODE (operands[0]) == DImode)
5717 if ((which == 0 && negated)
5718 || (which == 1 && ! negated))
5723 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
5725 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
5727 /* Handle short backwards branch with an unfilled delay slot.
5728 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5729 taken and untaken branches. */
5730 else if (dbr_sequence_length () == 0
5731 && ! forward_branch_p (insn)
5732 && INSN_ADDRESSES_SET_P ()
5733 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5734 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5736 strcpy (buf, "{bvb,|bb,}");
5737 if (GET_MODE (operands[0]) == DImode)
5739 if ((which == 0 && negated)
5740 || (which == 1 && ! negated))
5745 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
5747 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
5751 strcpy (buf, "{vextrs,|extrw,s,}");
5752 if (GET_MODE (operands[0]) == DImode)
5753 strcpy (buf, "extrd,s,*");
5754 if ((which == 0 && negated)
5755 || (which == 1 && ! negated))
5759 if (nullify && negated)
5760 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
5761 else if (nullify && ! negated)
5762 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
5764 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
5766 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
5776 /* Return the output template for emitting a dbra type insn.
5778 Note it may perform some output operations on its own before
5779 returning the final output string. */
5781 output_dbra (operands, insn, which_alternative)
5784 int which_alternative;
5787 /* A conditional branch to the following instruction (eg the delay slot) is
5788 asking for a disaster. Be prepared! */
5790 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5792 if (which_alternative == 0)
5793 return "ldo %1(%0),%0";
5794 else if (which_alternative == 1)
5796 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
5797 output_asm_insn ("ldw -16(%%r30),%4", operands);
5798 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
5799 return "{fldws|fldw} -16(%%r30),%0";
5803 output_asm_insn ("ldw %0,%4", operands);
5804 return "ldo %1(%4),%4\n\tstw %4,%0";
5808 if (which_alternative == 0)
5810 int nullify = INSN_ANNULLED_BRANCH_P (insn);
5811 int length = get_attr_length (insn);
5813 /* If this is a long branch with its delay slot unfilled, set `nullify'
5814 as it can nullify the delay slot and save a nop. */
5815 if (length == 8 && dbr_sequence_length () == 0)
5818 /* If this is a short forward conditional branch which did not get
5819 its delay slot filled, the delay slot can still be nullified. */
5820 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5821 nullify = forward_branch_p (insn);
5823 /* Handle short versions first. */
5824 if (length == 4 && nullify)
5825 return "addib,%C2,n %1,%0,%3";
5826 else if (length == 4 && ! nullify)
5827 return "addib,%C2 %1,%0,%3";
5828 else if (length == 8)
5830 /* Handle weird backwards branch with a fulled delay slot
5831 which is nullified. */
5832 if (dbr_sequence_length () != 0
5833 && ! forward_branch_p (insn)
5835 return "addib,%N2,n %1,%0,.+12\n\tb %3";
5836 /* Handle short backwards branch with an unfilled delay slot.
5837 Using a addb;nop rather than addi;bl saves 1 cycle for both
5838 taken and untaken branches. */
5839 else if (dbr_sequence_length () == 0
5840 && ! forward_branch_p (insn)
5841 && INSN_ADDRESSES_SET_P ()
5842 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5843 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5844 return "addib,%C2 %1,%0,%3%#";
5846 /* Handle normal cases. */
5848 return "addi,%N2 %1,%0,%0\n\tb,n %3";
5850 return "addi,%N2 %1,%0,%0\n\tb %3";
5855 /* Deal with gross reload from FP register case. */
5856 else if (which_alternative == 1)
5858 /* Move loop counter from FP register to MEM then into a GR,
5859 increment the GR, store the GR into MEM, and finally reload
5860 the FP register from MEM from within the branch's delay slot. */
5861 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
5863 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
5864 if (get_attr_length (insn) == 24)
5865 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
5867 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
5869 /* Deal with gross reload from memory case. */
5872 /* Reload loop counter from memory, the store back to memory
5873 happens in the branch's delay slot. */
5874 output_asm_insn ("ldw %0,%4", operands);
5875 if (get_attr_length (insn) == 12)
5876 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
5878 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
5882 /* Return the output template for emitting a dbra type insn.
5884 Note it may perform some output operations on its own before
5885 returning the final output string. */
5887 output_movb (operands, insn, which_alternative, reverse_comparison)
5890 int which_alternative;
5891 int reverse_comparison;
5894 /* A conditional branch to the following instruction (eg the delay slot) is
5895 asking for a disaster. Be prepared! */
5897 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5899 if (which_alternative == 0)
5900 return "copy %1,%0";
5901 else if (which_alternative == 1)
5903 output_asm_insn ("stw %1,-16(%%r30)", operands);
5904 return "{fldws|fldw} -16(%%r30),%0";
5906 else if (which_alternative == 2)
5912 /* Support the second variant. */
5913 if (reverse_comparison)
5914 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
5916 if (which_alternative == 0)
5918 int nullify = INSN_ANNULLED_BRANCH_P (insn);
5919 int length = get_attr_length (insn);
5921 /* If this is a long branch with its delay slot unfilled, set `nullify'
5922 as it can nullify the delay slot and save a nop. */
5923 if (length == 8 && dbr_sequence_length () == 0)
5926 /* If this is a short forward conditional branch which did not get
5927 its delay slot filled, the delay slot can still be nullified. */
5928 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5929 nullify = forward_branch_p (insn);
5931 /* Handle short versions first. */
5932 if (length == 4 && nullify)
5933 return "movb,%C2,n %1,%0,%3";
5934 else if (length == 4 && ! nullify)
5935 return "movb,%C2 %1,%0,%3";
5936 else if (length == 8)
5938 /* Handle weird backwards branch with a filled delay slot
5939 which is nullified. */
5940 if (dbr_sequence_length () != 0
5941 && ! forward_branch_p (insn)
5943 return "movb,%N2,n %1,%0,.+12\n\tb %3";
5945 /* Handle short backwards branch with an unfilled delay slot.
5946 Using a movb;nop rather than or;bl saves 1 cycle for both
5947 taken and untaken branches. */
5948 else if (dbr_sequence_length () == 0
5949 && ! forward_branch_p (insn)
5950 && INSN_ADDRESSES_SET_P ()
5951 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5952 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5953 return "movb,%C2 %1,%0,%3%#";
5954 /* Handle normal cases. */
5956 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
5958 return "or,%N2 %1,%%r0,%0\n\tb %3";
5963 /* Deal with gross reload from FP register case. */
5964 else if (which_alternative == 1)
5966 /* Move loop counter from FP register to MEM then into a GR,
5967 increment the GR, store the GR into MEM, and finally reload
5968 the FP register from MEM from within the branch's delay slot. */
5969 output_asm_insn ("stw %1,-16(%%r30)", operands);
5970 if (get_attr_length (insn) == 12)
5971 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
5973 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
5975 /* Deal with gross reload from memory case. */
5976 else if (which_alternative == 2)
5978 /* Reload loop counter from memory, the store back to memory
5979 happens in the branch's delay slot. */
5980 if (get_attr_length (insn) == 8)
5981 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
5983 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
5985 /* Handle SAR as a destination. */
5988 if (get_attr_length (insn) == 8)
5989 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
5991 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
5996 /* INSN is a millicode call. It may have an unconditional jump in its delay
5999 CALL_DEST is the routine we are calling. */
6002 output_millicode_call (insn, call_dest)
6010 xoperands[3] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
6012 /* Handle common case -- empty delay slot or no jump in the delay slot,
6013 and we're sure that the branch will reach the beginning of the $CODE$
6015 if ((dbr_sequence_length () == 0
6016 && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28))
6017 || (dbr_sequence_length () != 0
6018 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6019 && get_attr_length (insn) == 4))
6021 xoperands[0] = call_dest;
6022 output_asm_insn ("{bl|b,l} %0,%3%#", xoperands);
6026 /* This call may not reach the beginning of the $CODE$ subspace. */
6027 if (get_attr_length (insn) > 4)
6029 int delay_insn_deleted = 0;
6031 /* We need to emit an inline long-call branch. */
6032 if (dbr_sequence_length () != 0
6033 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6035 /* A non-jump insn in the delay slot. By definition we can
6036 emit this insn before the call. */
6037 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
6039 /* Now delete the delay insn. */
6040 PUT_CODE (NEXT_INSN (insn), NOTE);
6041 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6042 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6043 delay_insn_deleted = 1;
6046 /* PIC long millicode call sequence. */
6049 xoperands[0] = call_dest;
6050 xoperands[1] = gen_label_rtx ();
6051 /* Get our address + 8 into %r1. */
6052 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6054 /* Add %r1 to the offset of our target from the next insn. */
6055 output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
6056 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6057 CODE_LABEL_NUMBER (xoperands[1]));
6058 output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
6060 /* Get the return address into %r31. */
6061 output_asm_insn ("blr 0,%3", xoperands);
6063 /* Branch to our target which is in %r1. */
6064 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
6066 /* Empty delay slot. Note this insn gets fetched twice and
6067 executed once. To be safe we use a nop. */
6068 output_asm_insn ("nop", xoperands);
6070 /* Pure portable runtime doesn't allow be/ble; we also don't have
6071 PIC support in the assembler/linker, so this sequence is needed. */
6072 else if (TARGET_PORTABLE_RUNTIME)
6074 xoperands[0] = call_dest;
6075 /* Get the address of our target into %r29. */
6076 output_asm_insn ("ldil L%%%0,%%r29", xoperands);
6077 output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
6079 /* Get our return address into %r31. */
6080 output_asm_insn ("blr %%r0,%3", xoperands);
6082 /* Jump to our target address in %r29. */
6083 output_asm_insn ("bv,n %%r0(%%r29)", xoperands);
6085 /* Empty delay slot. Note this insn gets fetched twice and
6086 executed once. To be safe we use a nop. */
6087 output_asm_insn ("nop", xoperands);
6089 /* If we're allowed to use be/ble instructions, then this is the
6090 best sequence to use for a long millicode call. */
6093 xoperands[0] = call_dest;
6094 output_asm_insn ("ldil L%%%0,%3", xoperands);
6095 output_asm_insn ("{ble|be,l} R%%%0(%%sr4,%3)", xoperands);
6096 output_asm_insn ("nop", xoperands);
6099 /* If we had a jump in the call's delay slot, output it now. */
6100 if (dbr_sequence_length () != 0
6101 && !delay_insn_deleted)
6103 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6104 output_asm_insn ("b,n %0", xoperands);
6106 /* Now delete the delay insn. */
6107 PUT_CODE (NEXT_INSN (insn), NOTE);
6108 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6109 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6114 /* This call has an unconditional jump in its delay slot and the
6115 call is known to reach its target or the beginning of the current
6118 /* Use the containing sequence insn's address. */
6119 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6121 distance = INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6122 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8;
6124 /* If the branch was too far away, emit a normal call followed
6125 by a nop, followed by the unconditional branch.
6127 If the branch is close, then adjust %r2 from within the
6128 call's delay slot. */
6130 xoperands[0] = call_dest;
6131 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6132 if (! VAL_14_BITS_P (distance))
6133 output_asm_insn ("{bl|b,l} %0,%3\n\tnop\n\tb,n %1", xoperands);
6136 xoperands[2] = gen_label_rtx ();
6137 output_asm_insn ("\n\t{bl|b,l} %0,%3\n\tldo %1-%2(%3),%3",
6139 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6140 CODE_LABEL_NUMBER (xoperands[2]));
6143 /* Delete the jump. */
6144 PUT_CODE (NEXT_INSN (insn), NOTE);
6145 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6146 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6150 extern struct obstack permanent_obstack;
6152 /* INSN is either a function call. It may have an unconditional jump
6155 CALL_DEST is the routine we are calling. */
6158 output_call (insn, call_dest, sibcall)
6167 /* Handle common case -- empty delay slot or no jump in the delay slot,
6168 and we're sure that the branch will reach the beginning of the $CODE$
6170 if ((dbr_sequence_length () == 0
6171 && get_attr_length (insn) == 12)
6172 || (dbr_sequence_length () != 0
6173 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
6174 && get_attr_length (insn) == 8))
6176 xoperands[0] = call_dest;
6177 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
6178 output_asm_insn ("{bl|b,l} %0,%1%#", xoperands);
6182 /* This call may not reach the beginning of the $CODE$ subspace. */
6183 if (get_attr_length (insn) > 12)
6185 int delay_insn_deleted = 0;
6189 /* We need to emit an inline long-call branch. Furthermore,
6190 because we're changing a named function call into an indirect
6191 function call well after the parameters have been set up, we
6192 need to make sure any FP args appear in both the integer
6193 and FP registers. Also, we need move any delay slot insn
6194 out of the delay slot. And finally, we can't rely on the linker
6195 being able to fix the call to $$dyncall! -- Yuk!. */
6196 if (dbr_sequence_length () != 0
6197 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
6199 /* A non-jump insn in the delay slot. By definition we can
6200 emit this insn before the call (and in fact before argument
6202 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
6204 /* Now delete the delay insn. */
6205 PUT_CODE (NEXT_INSN (insn), NOTE);
6206 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6207 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6208 delay_insn_deleted = 1;
6211 /* Now copy any FP arguments into integer registers. */
6212 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6214 int arg_mode, regno;
6215 rtx use = XEXP (link, 0);
6216 if (! (GET_CODE (use) == USE
6217 && GET_CODE (XEXP (use, 0)) == REG
6218 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6221 arg_mode = GET_MODE (XEXP (use, 0));
6222 regno = REGNO (XEXP (use, 0));
6223 /* Is it a floating point register? */
6224 if (regno >= 32 && regno <= 39)
6226 /* Copy from the FP register into an integer register
6228 if (arg_mode == SFmode)
6230 xoperands[0] = XEXP (use, 0);
6231 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6232 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)",
6234 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6238 xoperands[0] = XEXP (use, 0);
6239 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6240 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)",
6242 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6243 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6248 /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
6249 we don't have any direct calls in that case. */
6252 const char *name = XSTR (call_dest, 0);
6254 /* See if we have already put this function on the list
6255 of deferred plabels. This list is generally small,
6256 so a liner search is not too ugly. If it proves too
6257 slow replace it with something faster. */
6258 for (i = 0; i < n_deferred_plabels; i++)
6259 if (strcmp (name, deferred_plabels[i].name) == 0)
6262 /* If the deferred plabel list is empty, or this entry was
6263 not found on the list, create a new entry on the list. */
6264 if (deferred_plabels == NULL || i == n_deferred_plabels)
6266 const char *real_name;
6268 if (deferred_plabels == 0)
6269 deferred_plabels = (struct deferred_plabel *)
6270 xmalloc (1 * sizeof (struct deferred_plabel));
6272 deferred_plabels = (struct deferred_plabel *)
6273 xrealloc (deferred_plabels,
6274 ((n_deferred_plabels + 1)
6275 * sizeof (struct deferred_plabel)));
6277 i = n_deferred_plabels++;
6278 deferred_plabels[i].internal_label = gen_label_rtx ();
6279 deferred_plabels[i].name = obstack_alloc (&permanent_obstack,
6281 strcpy (deferred_plabels[i].name, name);
6283 /* Gross. We have just implicitly taken the address of this
6284 function, mark it as such. */
6285 STRIP_NAME_ENCODING (real_name, name);
6286 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
6289 /* We have to load the address of the function using a procedure
6290 label (plabel). Inline plabels can lose for PIC and other
6291 cases, so avoid them by creating a 32bit plabel in the data
6295 xoperands[0] = deferred_plabels[i].internal_label;
6296 xoperands[1] = gen_label_rtx ();
6298 output_asm_insn ("addil LT%%%0,%%r19", xoperands);
6299 output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
6300 output_asm_insn ("ldw 0(%%r22),%%r22", xoperands);
6302 /* Get our address + 8 into %r1. */
6303 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6305 /* Add %r1 to the offset of dyncall from the next insn. */
6306 output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
6307 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6308 CODE_LABEL_NUMBER (xoperands[1]));
6309 output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
6311 /* Get the return address into %r31. */
6312 output_asm_insn ("blr %%r0,%%r31", xoperands);
6314 /* Branch to our target which is in %r1. */
6315 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6319 /* This call never returns, so we do not need to fix the
6321 output_asm_insn ("nop", xoperands);
6325 /* Copy the return address into %r2 also. */
6326 output_asm_insn ("copy %%r31,%%r2", xoperands);
6331 xoperands[0] = deferred_plabels[i].internal_label;
6333 /* Get the address of our target into %r22. */
6334 output_asm_insn ("addil LR%%%0-$global$,%%r27", xoperands);
6335 output_asm_insn ("ldw RR%%%0-$global$(%%r1),%%r22", xoperands);
6337 /* Get the high part of the address of $dyncall into %r2, then
6338 add in the low part in the branch instruction. */
6339 output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
6340 output_asm_insn ("{ble|be,l} R%%$$dyncall(%%sr4,%%r2)",
6345 /* This call never returns, so we do not need to fix the
6347 output_asm_insn ("nop", xoperands);
6351 /* Copy the return address into %r2 also. */
6352 output_asm_insn ("copy %%r31,%%r2", xoperands);
6357 /* If we had a jump in the call's delay slot, output it now. */
6358 if (dbr_sequence_length () != 0
6359 && !delay_insn_deleted)
6361 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6362 output_asm_insn ("b,n %0", xoperands);
6364 /* Now delete the delay insn. */
6365 PUT_CODE (NEXT_INSN (insn), NOTE);
6366 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6367 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6372 /* This call has an unconditional jump in its delay slot and the
6373 call is known to reach its target or the beginning of the current
6376 /* Use the containing sequence insn's address. */
6377 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
6379 distance = INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
6380 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8;
6382 /* If the branch was too far away, emit a normal call followed
6383 by a nop, followed by the unconditional branch. We also don't
6384 adjust %r2 when generating dwarf2 frame or unwind info since
6385 the adjustment confuses the dwarf2 output.
6387 If the branch is close, then adjust %r2 from within the
6388 call's delay slot. */
6390 xoperands[0] = call_dest;
6391 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6392 if (DO_FRAME_NOTES || ! VAL_14_BITS_P (distance))
6393 output_asm_insn ("{bl|b,l} %0,%%r2\n\tnop\n\tb,n %1", xoperands);
6396 xoperands[3] = gen_label_rtx ();
6397 output_asm_insn ("\n\t{bl|b,l} %0,%%r2\n\tldo %1-%3(%%r2),%%r2",
6399 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
6400 CODE_LABEL_NUMBER (xoperands[3]));
6403 /* Delete the jump. */
6404 PUT_CODE (NEXT_INSN (insn), NOTE);
6405 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6406 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6410 /* In HPUX 8.0's shared library scheme, special relocations are needed
6411 for function labels if they might be passed to a function
6412 in a shared library (because shared libraries don't live in code
6413 space), and special magic is needed to construct their address. */
6416 hppa_encode_label (sym)
6419 const char *str = XSTR (sym, 0);
6420 int len = strlen (str) + 1;
6423 p = newstr = alloca (len + 1);
6432 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
6436 function_label_operand (op, mode)
6438 enum machine_mode mode ATTRIBUTE_UNUSED;
6440 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
6443 /* Returns 1 if OP is a function label involved in a simple addition
6444 with a constant. Used to keep certain patterns from matching
6445 during instruction combination. */
6447 is_function_label_plus_const (op)
6450 /* Strip off any CONST. */
6451 if (GET_CODE (op) == CONST)
6454 return (GET_CODE (op) == PLUS
6455 && function_label_operand (XEXP (op, 0), Pmode)
6456 && GET_CODE (XEXP (op, 1)) == CONST_INT);
6459 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
6460 use in fmpyadd instructions. */
6462 fmpyaddoperands (operands)
6465 enum machine_mode mode = GET_MODE (operands[0]);
6467 /* Must be a floating point mode. */
6468 if (mode != SFmode && mode != DFmode)
6471 /* All modes must be the same. */
6472 if (! (mode == GET_MODE (operands[1])
6473 && mode == GET_MODE (operands[2])
6474 && mode == GET_MODE (operands[3])
6475 && mode == GET_MODE (operands[4])
6476 && mode == GET_MODE (operands[5])))
6479 /* All operands must be registers. */
6480 if (! (GET_CODE (operands[1]) == REG
6481 && GET_CODE (operands[2]) == REG
6482 && GET_CODE (operands[3]) == REG
6483 && GET_CODE (operands[4]) == REG
6484 && GET_CODE (operands[5]) == REG))
6487 /* Only 2 real operands to the addition. One of the input operands must
6488 be the same as the output operand. */
6489 if (! rtx_equal_p (operands[3], operands[4])
6490 && ! rtx_equal_p (operands[3], operands[5]))
6493 /* Inout operand of add can not conflict with any operands from multiply. */
6494 if (rtx_equal_p (operands[3], operands[0])
6495 || rtx_equal_p (operands[3], operands[1])
6496 || rtx_equal_p (operands[3], operands[2]))
6499 /* multiply can not feed into addition operands. */
6500 if (rtx_equal_p (operands[4], operands[0])
6501 || rtx_equal_p (operands[5], operands[0]))
6504 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
6506 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
6507 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
6508 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
6509 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
6510 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
6511 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
6514 /* Passed. Operands are suitable for fmpyadd. */
6518 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
6519 use in fmpysub instructions. */
6521 fmpysuboperands (operands)
6524 enum machine_mode mode = GET_MODE (operands[0]);
6526 /* Must be a floating point mode. */
6527 if (mode != SFmode && mode != DFmode)
6530 /* All modes must be the same. */
6531 if (! (mode == GET_MODE (operands[1])
6532 && mode == GET_MODE (operands[2])
6533 && mode == GET_MODE (operands[3])
6534 && mode == GET_MODE (operands[4])
6535 && mode == GET_MODE (operands[5])))
6538 /* All operands must be registers. */
6539 if (! (GET_CODE (operands[1]) == REG
6540 && GET_CODE (operands[2]) == REG
6541 && GET_CODE (operands[3]) == REG
6542 && GET_CODE (operands[4]) == REG
6543 && GET_CODE (operands[5]) == REG))
6546 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
6547 operation, so operands[4] must be the same as operand[3]. */
6548 if (! rtx_equal_p (operands[3], operands[4]))
6551 /* multiply can not feed into subtraction. */
6552 if (rtx_equal_p (operands[5], operands[0]))
6555 /* Inout operand of sub can not conflict with any operands from multiply. */
6556 if (rtx_equal_p (operands[3], operands[0])
6557 || rtx_equal_p (operands[3], operands[1])
6558 || rtx_equal_p (operands[3], operands[2]))
6561 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
6563 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
6564 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
6565 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
6566 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
6567 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
6568 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
6571 /* Passed. Operands are suitable for fmpysub. */
6576 plus_xor_ior_operator (op, mode)
6578 enum machine_mode mode ATTRIBUTE_UNUSED;
6580 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
6581 || GET_CODE (op) == IOR);
6584 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
6585 constants for shadd instructions. */
6587 shadd_constant_p (val)
6590 if (val == 2 || val == 4 || val == 8)
6596 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
6597 the valid constant for shadd instructions. */
6599 shadd_operand (op, mode)
6601 enum machine_mode mode ATTRIBUTE_UNUSED;
6603 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
6606 /* Return 1 if OP is valid as a base register in a reg + reg address. */
6609 basereg_operand (op, mode)
6611 enum machine_mode mode;
6613 /* cse will create some unscaled indexed addresses, however; it
6614 generally isn't a win on the PA, so avoid creating unscaled
6615 indexed addresses until after cse is finished. */
6616 if (!cse_not_expected)
6619 /* Allow any register when TARGET_NO_SPACE_REGS is in effect since
6620 we don't have to worry about the braindamaged implicit space
6621 register selection from the basereg. */
6622 if (TARGET_NO_SPACE_REGS)
6623 return (GET_CODE (op) == REG);
6625 /* While it's always safe to index off the frame pointer, it's not
6626 always profitable, particularly when the frame pointer is being
6628 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
6631 return (GET_CODE (op) == REG
6633 && register_operand (op, mode));
6636 /* Return 1 if this operand is anything other than a hard register. */
6639 non_hard_reg_operand (op, mode)
6641 enum machine_mode mode ATTRIBUTE_UNUSED;
6643 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
6646 /* Return 1 if INSN branches forward. Should be using insn_addresses
6647 to avoid walking through all the insns... */
6649 forward_branch_p (insn)
6652 rtx label = JUMP_LABEL (insn);
6659 insn = NEXT_INSN (insn);
6662 return (insn == label);
6665 /* Return 1 if OP is an equality comparison, else return 0. */
6667 eq_neq_comparison_operator (op, mode)
6669 enum machine_mode mode ATTRIBUTE_UNUSED;
6671 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
6674 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
6676 movb_comparison_operator (op, mode)
6678 enum machine_mode mode ATTRIBUTE_UNUSED;
6680 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
6681 || GET_CODE (op) == LT || GET_CODE (op) == GE);
6684 /* Return 1 if INSN is in the delay slot of a call instruction. */
6686 jump_in_call_delay (insn)
6690 if (GET_CODE (insn) != JUMP_INSN)
6693 if (PREV_INSN (insn)
6694 && PREV_INSN (PREV_INSN (insn))
6695 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
6697 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
6699 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
6700 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
6707 /* Output an unconditional move and branch insn. */
6710 output_parallel_movb (operands, length)
6714 /* These are the cases in which we win. */
6716 return "mov%I1b,tr %1,%0,%2";
6718 /* None of these cases wins, but they don't lose either. */
6719 if (dbr_sequence_length () == 0)
6721 /* Nothing in the delay slot, fake it by putting the combined
6722 insn (the copy or add) in the delay slot of a bl. */
6723 if (GET_CODE (operands[1]) == CONST_INT)
6724 return "b %2\n\tldi %1,%0";
6726 return "b %2\n\tcopy %1,%0";
6730 /* Something in the delay slot, but we've got a long branch. */
6731 if (GET_CODE (operands[1]) == CONST_INT)
6732 return "ldi %1,%0\n\tb %2";
6734 return "copy %1,%0\n\tb %2";
6738 /* Output an unconditional add and branch insn. */
6741 output_parallel_addb (operands, length)
6745 /* To make life easy we want operand0 to be the shared input/output
6746 operand and operand1 to be the readonly operand. */
6747 if (operands[0] == operands[1])
6748 operands[1] = operands[2];
6750 /* These are the cases in which we win. */
6752 return "add%I1b,tr %1,%0,%3";
6754 /* None of these cases win, but they don't lose either. */
6755 if (dbr_sequence_length () == 0)
6757 /* Nothing in the delay slot, fake it by putting the combined
6758 insn (the copy or add) in the delay slot of a bl. */
6759 return "b %3\n\tadd%I1 %1,%0,%0";
6763 /* Something in the delay slot, but we've got a long branch. */
6764 return "add%I1 %1,%0,%0\n\tb %3";
6768 /* Return nonzero if INSN (a jump insn) immediately follows a call to
6769 a named function. This is used to discourage creating parallel movb/addb
6770 insns since a jump which immediately follows a call can execute in the
6771 delay slot of the call.
6773 It is also used to avoid filling the delay slot of a jump which
6774 immediately follows a call since the jump can usually be eliminated
6775 completely by modifying RP in the delay slot of the call. */
6778 following_call (insn)
6781 /* We do not parallel movb,addb or place jumps into call delay slots when
6782 optimizing for the PA8000. */
6783 if (pa_cpu != PROCESSOR_8000)
6786 /* Find the previous real insn, skipping NOTEs. */
6787 insn = PREV_INSN (insn);
6788 while (insn && GET_CODE (insn) == NOTE)
6789 insn = PREV_INSN (insn);
6791 /* Check for CALL_INSNs and millicode calls. */
6793 && ((GET_CODE (insn) == CALL_INSN
6794 && get_attr_type (insn) != TYPE_DYNCALL)
6795 || (GET_CODE (insn) == INSN
6796 && GET_CODE (PATTERN (insn)) != SEQUENCE
6797 && GET_CODE (PATTERN (insn)) != USE
6798 && GET_CODE (PATTERN (insn)) != CLOBBER
6799 && get_attr_type (insn) == TYPE_MILLI)))
6805 /* We use this hook to perform a PA specific optimization which is difficult
6806 to do in earlier passes.
6808 We want the delay slots of branches within jump tables to be filled.
6809 None of the compiler passes at the moment even has the notion that a
6810 PA jump table doesn't contain addresses, but instead contains actual
6813 Because we actually jump into the table, the addresses of each entry
6814 must stay constant in relation to the beginning of the table (which
6815 itself must stay constant relative to the instruction to jump into
6816 it). I don't believe we can guarantee earlier passes of the compiler
6817 will adhere to those rules.
6819 So, late in the compilation process we find all the jump tables, and
6820 expand them into real code -- eg each entry in the jump table vector
6821 will get an appropriate label followed by a jump to the final target.
6823 Reorg and the final jump pass can then optimize these branches and
6824 fill their delay slots. We end up with smaller, more efficient code.
6826 The jump instructions within the table are special; we must be able
6827 to identify them during assembly output (if the jumps don't get filled
6828 we need to emit a nop rather than nullifying the delay slot)). We
6829 identify jumps in switch tables by marking the SET with DImode.
6831 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
6832 insns. This serves two purposes, first it prevents jump.c from
6833 noticing that the last N entries in the table jump to the instruction
6834 immediately after the table and deleting the jumps. Second, those
6835 insns mark where we should emit .begin_brtab and .end_brtab directives
6836 when using GAS (allows for better link time optimizations). */
6844 remove_useless_addtr_insns (insns, 1);
6846 if (pa_cpu < PROCESSOR_8000)
6847 pa_combine_instructions (get_insns ());
6850 /* This is fairly cheap, so always run it if optimizing. */
6851 if (optimize > 0 && !TARGET_BIG_SWITCH)
6853 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
6854 insns = get_insns ();
6855 for (insn = insns; insn; insn = NEXT_INSN (insn))
6857 rtx pattern, tmp, location;
6858 unsigned int length, i;
6860 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
6861 if (GET_CODE (insn) != JUMP_INSN
6862 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
6863 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
6866 /* Emit marker for the beginning of the branch table. */
6867 emit_insn_before (gen_begin_brtab (), insn);
6869 pattern = PATTERN (insn);
6870 location = PREV_INSN (insn);
6871 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
6873 for (i = 0; i < length; i++)
6875 /* Emit a label before each jump to keep jump.c from
6876 removing this code. */
6877 tmp = gen_label_rtx ();
6878 LABEL_NUSES (tmp) = 1;
6879 emit_label_after (tmp, location);
6880 location = NEXT_INSN (location);
6882 if (GET_CODE (pattern) == ADDR_VEC)
6884 /* Emit the jump itself. */
6885 tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0));
6886 tmp = emit_jump_insn_after (tmp, location);
6887 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
6888 /* It is easy to rely on the branch table markers
6889 during assembly output to trigger the correct code
6890 for a switch table jump with an unfilled delay slot,
6892 However, that requires state and assumes that we look
6895 We can't make such assumptions when computing the length
6896 of instructions. Ugh. We could walk the insn chain to
6897 determine if this instruction is in a branch table, but
6898 that can get rather expensive, particularly during the
6899 branch shortening phase of the compiler.
6901 So instead we mark this jump as being special. This is
6902 far from ideal and knows that no code after this will
6903 muck around with the mode of the JUMP_INSN itself. */
6904 PUT_MODE (tmp, SImode);
6905 LABEL_NUSES (JUMP_LABEL (tmp))++;
6906 location = NEXT_INSN (location);
6910 /* Emit the jump itself. */
6911 tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0));
6912 tmp = emit_jump_insn_after (tmp, location);
6913 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
6914 /* It is easy to rely on the branch table markers
6915 during assembly output to trigger the correct code
6916 for a switch table jump with an unfilled delay slot,
6918 However, that requires state and assumes that we look
6921 We can't make such assumptions when computing the length
6922 of instructions. Ugh. We could walk the insn chain to
6923 determine if this instruction is in a branch table, but
6924 that can get rather expensive, particularly during the
6925 branch shortening phase of the compiler.
6927 So instead we mark this jump as being special. This is
6928 far from ideal and knows that no code after this will
6929 muck around with the mode of the JUMP_INSN itself. */
6930 PUT_MODE (tmp, SImode);
6931 LABEL_NUSES (JUMP_LABEL (tmp))++;
6932 location = NEXT_INSN (location);
6935 /* Emit a BARRIER after the jump. */
6936 emit_barrier_after (location);
6937 location = NEXT_INSN (location);
6940 /* Emit marker for the end of the branch table. */
6941 emit_insn_before (gen_end_brtab (), location);
6942 location = NEXT_INSN (location);
6943 emit_barrier_after (location);
6945 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
6951 /* Sill need an end_brtab insn. */
6952 insns = get_insns ();
6953 for (insn = insns; insn; insn = NEXT_INSN (insn))
6955 /* Find an ADDR_VEC insn. */
6956 if (GET_CODE (insn) != JUMP_INSN
6957 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
6958 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
6961 /* Now generate markers for the beginning and end of the
6963 emit_insn_before (gen_begin_brtab (), insn);
6964 emit_insn_after (gen_end_brtab (), insn);
6969 /* The PA has a number of odd instructions which can perform multiple
6970 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
6971 it may be profitable to combine two instructions into one instruction
6972 with two outputs. It's not profitable PA2.0 machines because the
6973 two outputs would take two slots in the reorder buffers.
6975 This routine finds instructions which can be combined and combines
6976 them. We only support some of the potential combinations, and we
6977 only try common ways to find suitable instructions.
6979 * addb can add two registers or a register and a small integer
6980 and jump to a nearby (+-8k) location. Normally the jump to the
6981 nearby location is conditional on the result of the add, but by
6982 using the "true" condition we can make the jump unconditional.
6983 Thus addb can perform two independent operations in one insn.
6985 * movb is similar to addb in that it can perform a reg->reg
6986 or small immediate->reg copy and jump to a nearby (+-8k location).
6988 * fmpyadd and fmpysub can perform a FP multiply and either an
6989 FP add or FP sub if the operands of the multiply and add/sub are
6990 independent (there are other minor restrictions). Note both
6991 the fmpy and fadd/fsub can in theory move to better spots according
6992 to data dependencies, but for now we require the fmpy stay at a
6995 * Many of the memory operations can perform pre & post updates
6996 of index registers. GCC's pre/post increment/decrement addressing
6997 is far too simple to take advantage of all the possibilities. This
6998 pass may not be suitable since those insns may not be independent.
7000 * comclr can compare two ints or an int and a register, nullify
7001 the following instruction and zero some other register. This
7002 is more difficult to use as it's harder to find an insn which
7003 will generate a comclr than finding something like an unconditional
7004 branch. (conditional moves & long branches create comclr insns).
7006 * Most arithmetic operations can conditionally skip the next
7007 instruction. They can be viewed as "perform this operation
7008 and conditionally jump to this nearby location" (where nearby
7009 is an insns away). These are difficult to use due to the
7010 branch length restrictions. */
7013 pa_combine_instructions (insns)
7014 rtx insns ATTRIBUTE_UNUSED;
7018 /* This can get expensive since the basic algorithm is on the
7019 order of O(n^2) (or worse). Only do it for -O2 or higher
7020 levels of optimization. */
7024 /* Walk down the list of insns looking for "anchor" insns which
7025 may be combined with "floating" insns. As the name implies,
7026 "anchor" instructions don't move, while "floating" insns may
7028 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
7029 new = make_insn_raw (new);
7031 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
7033 enum attr_pa_combine_type anchor_attr;
7034 enum attr_pa_combine_type floater_attr;
7036 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
7037 Also ignore any special USE insns. */
7038 if ((GET_CODE (anchor) != INSN
7039 && GET_CODE (anchor) != JUMP_INSN
7040 && GET_CODE (anchor) != CALL_INSN)
7041 || GET_CODE (PATTERN (anchor)) == USE
7042 || GET_CODE (PATTERN (anchor)) == CLOBBER
7043 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
7044 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
7047 anchor_attr = get_attr_pa_combine_type (anchor);
7048 /* See if anchor is an insn suitable for combination. */
7049 if (anchor_attr == PA_COMBINE_TYPE_FMPY
7050 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
7051 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
7052 && ! forward_branch_p (anchor)))
7056 for (floater = PREV_INSN (anchor);
7058 floater = PREV_INSN (floater))
7060 if (GET_CODE (floater) == NOTE
7061 || (GET_CODE (floater) == INSN
7062 && (GET_CODE (PATTERN (floater)) == USE
7063 || GET_CODE (PATTERN (floater)) == CLOBBER)))
7066 /* Anything except a regular INSN will stop our search. */
7067 if (GET_CODE (floater) != INSN
7068 || GET_CODE (PATTERN (floater)) == ADDR_VEC
7069 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
7075 /* See if FLOATER is suitable for combination with the
7077 floater_attr = get_attr_pa_combine_type (floater);
7078 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
7079 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
7080 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7081 && floater_attr == PA_COMBINE_TYPE_FMPY))
7083 /* If ANCHOR and FLOATER can be combined, then we're
7084 done with this pass. */
7085 if (pa_can_combine_p (new, anchor, floater, 0,
7086 SET_DEST (PATTERN (floater)),
7087 XEXP (SET_SRC (PATTERN (floater)), 0),
7088 XEXP (SET_SRC (PATTERN (floater)), 1)))
7092 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
7093 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
7095 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
7097 if (pa_can_combine_p (new, anchor, floater, 0,
7098 SET_DEST (PATTERN (floater)),
7099 XEXP (SET_SRC (PATTERN (floater)), 0),
7100 XEXP (SET_SRC (PATTERN (floater)), 1)))
7105 if (pa_can_combine_p (new, anchor, floater, 0,
7106 SET_DEST (PATTERN (floater)),
7107 SET_SRC (PATTERN (floater)),
7108 SET_SRC (PATTERN (floater))))
7114 /* If we didn't find anything on the backwards scan try forwards. */
7116 && (anchor_attr == PA_COMBINE_TYPE_FMPY
7117 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
7119 for (floater = anchor; floater; floater = NEXT_INSN (floater))
7121 if (GET_CODE (floater) == NOTE
7122 || (GET_CODE (floater) == INSN
7123 && (GET_CODE (PATTERN (floater)) == USE
7124 || GET_CODE (PATTERN (floater)) == CLOBBER)))
7128 /* Anything except a regular INSN will stop our search. */
7129 if (GET_CODE (floater) != INSN
7130 || GET_CODE (PATTERN (floater)) == ADDR_VEC
7131 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
7137 /* See if FLOATER is suitable for combination with the
7139 floater_attr = get_attr_pa_combine_type (floater);
7140 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
7141 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
7142 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7143 && floater_attr == PA_COMBINE_TYPE_FMPY))
7145 /* If ANCHOR and FLOATER can be combined, then we're
7146 done with this pass. */
7147 if (pa_can_combine_p (new, anchor, floater, 1,
7148 SET_DEST (PATTERN (floater)),
7149 XEXP (SET_SRC (PATTERN (floater)),
7151 XEXP (SET_SRC (PATTERN (floater)),
7158 /* FLOATER will be nonzero if we found a suitable floating
7159 insn for combination with ANCHOR. */
7161 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
7162 || anchor_attr == PA_COMBINE_TYPE_FMPY))
7164 /* Emit the new instruction and delete the old anchor. */
7165 emit_insn_before (gen_rtx_PARALLEL
7167 gen_rtvec (2, PATTERN (anchor),
7168 PATTERN (floater))),
7171 PUT_CODE (anchor, NOTE);
7172 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
7173 NOTE_SOURCE_FILE (anchor) = 0;
7175 /* Emit a special USE insn for FLOATER, then delete
7176 the floating insn. */
7177 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
7178 delete_insn (floater);
7183 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
7186 /* Emit the new_jump instruction and delete the old anchor. */
7188 = emit_jump_insn_before (gen_rtx_PARALLEL
7190 gen_rtvec (2, PATTERN (anchor),
7191 PATTERN (floater))),
7194 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
7195 PUT_CODE (anchor, NOTE);
7196 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
7197 NOTE_SOURCE_FILE (anchor) = 0;
7199 /* Emit a special USE insn for FLOATER, then delete
7200 the floating insn. */
7201 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
7202 delete_insn (floater);
7210 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
7211 rtx new, anchor, floater;
7213 rtx dest, src1, src2;
7215 int insn_code_number;
7218 /* Create a PARALLEL with the patterns of ANCHOR and
7219 FLOATER, try to recognize it, then test constraints
7220 for the resulting pattern.
7222 If the pattern doesn't match or the constraints
7223 aren't met keep searching for a suitable floater
7225 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
7226 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
7227 INSN_CODE (new) = -1;
7228 insn_code_number = recog_memoized (new);
7229 if (insn_code_number < 0
7230 || !constrain_operands (1))
7244 /* There's up to three operands to consider. One
7245 output and two inputs.
7247 The output must not be used between FLOATER & ANCHOR
7248 exclusive. The inputs must not be set between
7249 FLOATER and ANCHOR exclusive. */
7251 if (reg_used_between_p (dest, start, end))
7254 if (reg_set_between_p (src1, start, end))
7257 if (reg_set_between_p (src2, start, end))
7260 /* If we get here, then everything is good. */
7264 /* Return nonzero if references for INSN are delayed.
7266 Millicode insns are actually function calls with some special
7267 constraints on arguments and register usage.
7269 Millicode calls always expect their arguments in the integer argument
7270 registers, and always return their result in %r29 (ret1). They
7271 are expected to clobber their arguments, %r1, %r29, and the return
7272 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
7274 This function tells reorg that the references to arguments and
7275 millicode calls do not appear to happen until after the millicode call.
7276 This allows reorg to put insns which set the argument registers into the
7277 delay slot of the millicode call -- thus they act more like traditional
7280 Note we can not consider side effects of the insn to be delayed because
7281 the branch and link insn will clobber the return pointer. If we happened
7282 to use the return pointer in the delay slot of the call, then we lose.
7284 get_attr_type will try to recognize the given insn, so make sure to
7285 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
7288 insn_refs_are_delayed (insn)
7291 return ((GET_CODE (insn) == INSN
7292 && GET_CODE (PATTERN (insn)) != SEQUENCE
7293 && GET_CODE (PATTERN (insn)) != USE
7294 && GET_CODE (PATTERN (insn)) != CLOBBER
7295 && get_attr_type (insn) == TYPE_MILLI));
7298 /* Return the location of a parameter that is passed in a register or NULL
7299 if the parameter has any component that is passed in memory.
7301 This is new code and will be pushed to into the net sources after
7304 ??? We might want to restructure this so that it looks more like other
7307 function_arg (cum, mode, type, named, incoming)
7308 CUMULATIVE_ARGS *cum;
7309 enum machine_mode mode;
7311 int named ATTRIBUTE_UNUSED;
7314 int max_arg_words = (TARGET_64BIT ? 8 : 4);
7321 /* If this arg would be passed partially or totally on the stack, then
7322 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
7323 handle arguments which are split between regs and stack slots if
7324 the ABI mandates split arguments. */
7325 if (cum->words + FUNCTION_ARG_SIZE (mode, type) > max_arg_words
7326 || mode == VOIDmode)
7332 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
7334 if (cum->words + offset >= max_arg_words
7335 || mode == VOIDmode)
7339 /* The 32bit ABIs and the 64bit ABIs are rather different,
7340 particularly in their handling of FP registers. We might
7341 be able to cleverly share code between them, but I'm not
7342 going to bother in the hope that splitting them up results
7343 in code that is more easily understood.
7345 The 64bit code probably is very wrong for structure passing. */
7348 /* Advance the base registers to their current locations.
7350 Remember, gprs grow towards smaller register numbers while
7351 fprs grow to higher register numbers. Also remember FP regs
7352 are always 4 bytes wide, while the size of an integer register
7353 varies based on the size of the target word. */
7354 gpr_reg_base = 26 - cum->words;
7355 fpr_reg_base = 32 + cum->words;
7357 /* If the argument is more than a word long, then we need to align
7358 the base registers. Same caveats as above. */
7359 if (FUNCTION_ARG_SIZE (mode, type) > 1)
7361 if (mode != BLKmode)
7363 /* First deal with alignment of the doubleword. */
7364 gpr_reg_base -= (cum->words & 1);
7366 /* This seems backwards, but it is what HP specifies. We need
7367 gpr_reg_base to point to the smaller numbered register of
7368 the integer register pair. So if we have an even register
7369 number, then decrement the gpr base. */
7370 gpr_reg_base -= ((gpr_reg_base % 2) == 0);
7372 /* FP values behave sanely, except that each FP reg is only
7374 fpr_reg_base += ((fpr_reg_base % 2) == 0);
7379 int i, offset = 0, ub;
7380 ub = FUNCTION_ARG_SIZE (mode, type);
7382 MAX (0, max_arg_words - cum->words - (cum->words & 1)));
7383 gpr_reg_base -= (cum->words & 1);
7384 for (i = 0; i < ub; i++)
7386 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
7387 gen_rtx_REG (DImode,
7396 return XEXP (loc[0], 0);
7398 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
7404 /* If the argument is larger than a word, then we know precisely
7405 which registers we must use. */
7406 if (FUNCTION_ARG_SIZE (mode, type) > 1)
7421 /* We have a single word (32 bits). A simple computation
7422 will get us the register #s we need. */
7423 gpr_reg_base = 26 - cum->words;
7424 fpr_reg_base = 32 + 2 * cum->words;
7428 if (TARGET_64BIT && mode == TFmode)
7434 gen_rtx_EXPR_LIST (VOIDmode,
7435 gen_rtx_REG (DImode, gpr_reg_base + 1),
7437 gen_rtx_EXPR_LIST (VOIDmode,
7438 gen_rtx_REG (DImode, gpr_reg_base),
7441 /* Determine if the argument needs to be passed in both general and
7442 floating point registers. */
7443 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
7444 /* If we are doing soft-float with portable runtime, then there
7445 is no need to worry about FP regs. */
7446 && ! TARGET_SOFT_FLOAT
7447 /* The parameter must be some kind of float, else we can just
7448 pass it in integer registers. */
7449 && FLOAT_MODE_P (mode)
7450 /* The target function must not have a prototype. */
7451 && cum->nargs_prototype <= 0
7452 /* libcalls do not need to pass items in both FP and general
7454 && type != NULL_TREE
7455 /* All this hair applies to outgoing args only. */
7457 /* Also pass outgoing floating arguments in both registers in indirect
7458 calls with the 32 bit ABI and the HP assembler since there is no
7459 way to the specify argument locations in static functions. */
7464 && FLOAT_MODE_P (mode)))
7470 gen_rtx_EXPR_LIST (VOIDmode,
7471 gen_rtx_REG (mode, fpr_reg_base),
7473 gen_rtx_EXPR_LIST (VOIDmode,
7474 gen_rtx_REG (mode, gpr_reg_base),
7479 /* See if we should pass this parameter in a general register. */
7480 if (TARGET_SOFT_FLOAT
7481 /* Indirect calls in the normal 32bit ABI require all arguments
7482 to be passed in general registers. */
7483 || (!TARGET_PORTABLE_RUNTIME
7487 /* If the parameter is not a floating point parameter, then
7488 it belongs in GPRs. */
7489 || !FLOAT_MODE_P (mode))
7490 retval = gen_rtx_REG (mode, gpr_reg_base);
7492 retval = gen_rtx_REG (mode, fpr_reg_base);
7498 /* If this arg would be passed totally in registers or totally on the stack,
7499 then this routine should return zero. It is currently called only for
7500 the 64-bit target. */
7502 function_arg_partial_nregs (cum, mode, type, named)
7503 CUMULATIVE_ARGS *cum;
7504 enum machine_mode mode;
7506 int named ATTRIBUTE_UNUSED;
7508 unsigned int max_arg_words = 8;
7509 unsigned int offset = 0;
7511 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
7514 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
7515 /* Arg fits fully into registers. */
7517 else if (cum->words + offset >= max_arg_words)
7518 /* Arg fully on the stack. */
7522 return max_arg_words - cum->words - offset;
7526 /* Return 1 if this is a comparison operator. This allows the use of
7527 MATCH_OPERATOR to recognize all the branch insns. */
7530 cmpib_comparison_operator (op, mode)
7532 enum machine_mode mode;
7534 return ((mode == VOIDmode || GET_MODE (op) == mode)
7535 && (GET_CODE (op) == EQ
7536 || GET_CODE (op) == NE
7537 || GET_CODE (op) == GT
7538 || GET_CODE (op) == GTU
7539 || GET_CODE (op) == GE
7540 || GET_CODE (op) == LT
7541 || GET_CODE (op) == LE
7542 || GET_CODE (op) == LEU));
7545 /* Mark ARG (which is really a struct deferred_plabel **) for GC. */
7548 mark_deferred_plabels (arg)
7551 struct deferred_plabel *dp = *(struct deferred_plabel **) arg;
7554 for (i = 0; i < n_deferred_plabels; ++i)
7555 ggc_mark_rtx (dp[i].internal_label);
7558 /* Called to register all of our global variables with the garbage
7564 ggc_add_rtx_root (&hppa_compare_op0, 1);
7565 ggc_add_rtx_root (&hppa_compare_op1, 1);
7566 ggc_add_root (&deferred_plabels, 1, sizeof (&deferred_plabels),
7567 &mark_deferred_plabels);